## Workflow to setup and run ensemble simulations

Creating several bash files that provides direction for where to look for the initial files and where to place the model output files

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob

### Step 0: Setup directories

#### 0.1 Create directories to hold run files & simulation outputs
Repository structure: <br\>
- /home/disk/eos8/ach315/upscale/sims/opt/
- /home/disk/eos8/ach315/upscale/runs/opt/
    - years/ 
        - cultivars/
            - all sites

Code block only needs to be executed once - will throw error otherwise

In [2]:
folder = 'runs/'
#folder = 'sims/'

for i in np.arange(1961,2011):
    os.mkdir('/home/disk/eos8/ach315/upscale/' + folder + 'opt/' + str(i))

cultivars = list()
for i in np.arange(0,100): 
    cultivar = 'var_' + str(i)
    cultivars.append(cultivar)
    
for i in np.arange(1961,2011):
    for j in cultivars:
        os.mkdir('/home/disk/eos8/ach315/upscale/' + folder + 'opt/' + str(i) + '/' + str(j))    

#### 0.2 Create directories to hold job scripts
Repository structure:
- /home/disk/eos8/ach315/upscale/jobs/opt/

All the job scripts are in one place and not subdivided by folders to make things easier to automate.

### Step 1. Create run.txt files

In [36]:
#directories = glob.glob('/home/disk/eos8/ach315/upscale/runs/opt/*')
directories = glob.glob('/home/disk/eos8/ach315/upscale/inits/con/*')
siteyears = pd.read_csv('/home/disk/eos8/ach315/upscale/weadata/site_year_crithr1.csv', dtype=str, index_col=0)
site_info = pd.read_csv('/home/disk/eos8/ach315/upscale/weadata/site_info.csv', dtype=str, index_col=0)

In [38]:
for i in directories:
    year = i.split('/')[-1].split('_')[-1]
    site = i.split('/')[-1].split('_')[-2]

    # setting up directories
    init_dirct_wea = '/home/disk/eos8/ach315/upscale/weadata/data/control/'
    init_dirct_stand = '/home/disk/eos8/ach315/upscale/inits/standard/'
    init_dirct_custom = '/home/disk/eos8/ach315/upscale/inits/con/' + site + '_' + year + '/'

    # strings in run file
    cultivars = glob.glob('/home/disk/eos8/ach315/upscale/inits/var/*')
    for j in cultivars:
        var = j.split('/')[-1].split('.')[-2]
        output_dirct = '/home/disk/eos8/ach315/upscale/sims/opt/' + year + '/' + var + '/'

        str1 = init_dirct_wea + site + '_' + year + '.txt\n'
        str2 = init_dirct_custom + 'time.txt\n'
        str3 = init_dirct_stand + 'biology.txt\n'
        str4 = init_dirct_custom + 'climate.txt\n'
        str5 = init_dirct_stand + 'nitrogen.txt\n'
        str6 = init_dirct_stand + 'solute.txt\n'
        str7 = init_dirct_stand + 'soil.txt\n'
        str8 = init_dirct_custom + 'management.txt\n'
        str9 = init_dirct_stand + 'drip.txt\n'
        str10 = init_dirct_stand + 'water.txt\n'
        str11 = init_dirct_stand + 'waterbound.txt\n'
        str12 = init_dirct_custom + 'init.txt\n'
        str13 = j + '\n' # looping through different cultivar files
        str14 = init_dirct_stand + 'grid.txt\n'
        str15 = init_dirct_stand + 'nod.txt\n'
        str16 = init_dirct_stand + 'massbl.txt\n'
        str17 = output_dirct + 'out1_' + site + '_' + year + '_' + var + '.txt\n'
        str18 = output_dirct + 'out2_' + site + '_' + year + '_' + var + '.txt\n'
        str19 = output_dirct + 'out3.txt\n'
        str20 = output_dirct + 'out4.txt\n'
        str21 = output_dirct + 'out5.txt\n'
        str22 = output_dirct + 'out6.txt\n'
        str23 = output_dirct + 'massbl.txt\n'
        
        # combining strings1
        strings = [str1, str2, str3, str4, str5, str6, str7, str8, str9, str10, str11, str12, str13,
                   str14, str15, str16, str17, str18, str19, str20, str21, str22, str23]

        # writing out run.txt file
        run = open('/home/disk/eos8/ach315/upscale/runs/opt/' + year + '/' + var + 
                   '/run_' + site + '_' + year + '_' + var + '.txt', 'w')
        run.writelines(strings)
        run.close()

### Step 2. Create job files that execute a batch of run files

In [39]:
dirct = '/home/disk/eos8/ach315/upscale/jobs/opt/'
cultivars = glob.glob('/home/disk/eos8/ach315/upscale/inits/var/*')
treatment = 'cont'

for i in np.arange(1961,1991):
    for j in cultivars:
        var = j.split('/')[-1].split('.')[-2]
        str1 = '#!/bin/bash\n'
        str2 = '#PBS -l nodes=1:ppn=1\n'
        str3 = '#PBS -l walltime=08:00:00\n'
        str4 = '#PBS -m a\n'
        str5 = '#PBS -M ach315@uw.edu\n'
        str6 = '#PBS -N ' + treatment + '_' + str(i) + '_' + str(var) + '\n'
        str7 = '\n'
        str8 = 'FILES=/home/disk/eos8/ach315/upscale/runs/opt/' + str(i)+ '/' + str(var) + '/*\n'
        str9 = '\n'
        str10 = 'for file in $FILES\n'
        str11 = 'do\n'
        str12 = '    cd /home/disk/eos8/ach315/MAIZSIM\n'
        str13 = '    timeout 3m maizsim $file\n'
        str14 = 'done\n'

        strings = [str1, str2, str3, str4, str5, str6, str7, str8, str9, str10, str11, str12, str13, str14]

        jobs = open(dirct + '/' + str(i) + '_' + str(var) + '.job', 'w')
        jobs.writelines(strings)
        jobs.close()

### Step 3. Create script that automates qsub jobs

I directly wrote these files out on the server direclty. <br/>
See subjobs.job at '/home/disk/eos8/ach315/upscale/jobs' <br/>