# Process Growth Phases 

This looks at the SQLite Databases generated by Apsim X (Next Gen) for 109 Wheat varieties, 10 differing sow dates for  57,434 locations/sites across Australia.  
Each database file covers one (1) site.

In [1]:
#Import the required libraries
import sys
import os
import datetime
import sqlite3
import numpy as np
import pandas as pd
import math

In [2]:
# define the working directories
apsim_sourcedir = "/OSM/CBR/AG_WHEATTEMP/source"
apsim_outfiledir = "/OSM/CBR/AG_WHEATTEMP/work/output"
metfile_sourcedir = "/OSM/CBR/AG_WHEATTEMP/work/ApsimNG-test/APSIM_run/met"


In [3]:
dbfile_df = pd.DataFrame(columns=['filename'])
dbfile_df.filename = sorted(apsim_sourcedir+'/'+f for f in os.listdir(apsim_sourcedir) if f.endswith('.db'))
print(dbfile_df.head())
dbname = dbfile_df.filename[4]
print(dbname)

                                       filename
0  /OSM/CBR/AG_WHEATTEMP/source/113.60-28.30.db
1  /OSM/CBR/AG_WHEATTEMP/source/113.70-28.45.db
2  /OSM/CBR/AG_WHEATTEMP/source/113.70-28.50.db
3  /OSM/CBR/AG_WHEATTEMP/source/113.75-28.45.db
4  /OSM/CBR/AG_WHEATTEMP/source/113.75-28.50.db
/OSM/CBR/AG_WHEATTEMP/source/113.75-28.50.db


In [4]:
#these are required for calculations of thermal time
num3hr = int(24 / 3)
#print("num3hr: ", num3hr)    
t_range_fract = []

# pre calculate t_range_fract for speed reasons
for period in range(num3hr):
    calcValue = 0.92105 \
                + 0.1140 * period \
                - 0.0703 * math.pow(period, 2) \
                + 0.0053 * math.pow(period, 3)
    t_range_fract.append(calcValue)

#print("t_range_fract: ", t_range_fract)

In [5]:
def linint_3hrly_Temperature(tmax, tmin, xp, fp):
    '''
    Eight interpolations of the air temperature are calculated using 
    a three-hour correction factor.
    For each air three-hour air temperature, a value is calculated.
    The eight three-hour estimates are then averaged to obtain the daily value.
    '''

    #Local Variables
    tot = 0.0            #sum_of of 3 hr interpolations
    
    for period in range(1, num3hr):
        #get mean temperature for 3 hr period (oC)
        #tmean_3hour = temp_3hr(tmax, tmin, period)
        tmean_3hour = tmin + (t_range_fract[period-1] * (tmax - tmin))
        #tot = tot + ttFn.valueIndexed(tmean_3hour)
        tot = tot + np.interp(tmean_3hour, xp,fp)
        
        #print("tmean_3hour: ", tmean_3hour, " - tot: ", tot)

    return tot / num3hr;


In [14]:
def read_ApsimWeather(filename, lat):
    '''
    Reads an apsim weather ('.met') file, removes the header information,
    calculates and adds a date column (based on year and day), and the
    average temperature (based on maxt and mint).
    '''
    import math

    # these XYPairs are use when calculating Thermal Time 
    # and are specific to Wheat only
    xp = [0, 26, 37]
    fp = [0, 26, 0]
    
    lineNo = 0
    with open(filename, "r") as f:
        for line in f:
            lineNo = lineNo + 1
            if line.startswith('year'):
                break;

    # return the data using the starting line no (determined above)
    # original column names=['year','day', 'radn', 'maxt', 'mint', 'rain']
    metData = pd.read_table(filename, sep='\s+', header=None, skiprows=lineNo+1,
                            names=['year','dayofYear', 'radiation', 'maxTemp', 'minTemp', 'rain'])
    
    # add the calculated columns
    metData['runDate'] = pd.to_datetime(metData['year'].astype(str) + " " + metData['dayofYear'].astype(str), format="%Y %j")

    # this may need to be the thermal time, not just average temp
    metData['avgTemp'] = (metData['maxTemp'] + metData['minTemp']) / 2

    #convert the radiation from MJ/m2/day to Photosynthetically active radiation (PAR)
    metData['PARIO'] = metData['radiation'] * 0.47

    #convert the measurement unit for the radiation from MJ/m2/day to J/m2/day
    metData['radnJ'] = metData['radiation'] * 1000000

    metData['PQ'] = metData['PARIO'] / metData['avgTemp']

    # calculation the day length
    radians = math.pi/180
    lambdaRadians = float(latitude) * radians

    sinLAT = math.sin(lambdaRadians)
    cosLAT = math.cos(lambdaRadians)
    sinDMC = math.sin(radians * 23.45)

    #print("radians: ", radians)
    #print("lambdaRadians: ", lambdaRadians)
    #print("sinLAT: ", sinLAT)
    #print("cosLAT: ", cosLAT)
    #print("sinDMC: ", sinDMC)    
    
    metData['sinDEC'] = -sinDMC * np.cos(2 * math.pi * (metData['dayofYear'] + 10) / 365)
    metData['cosDEC'] = np.sqrt(1 - (metData['sinDEC'] * metData['sinDEC']))
    metData['a'] = sinLAT * metData['sinDEC']
    metData['b'] = cosLAT * metData['cosDEC']

    metData['daylength'] = 12 * (1 + (2 / math.pi) * np.arcsin(metData['a']/metData['b']))

    # calculate the Fraction Disfused Radiation (FDR)
    metData['hour'] = np.mod(metData['dayofYear'], 1) * 24
    metData['sinB'] = metData['a'] + metData['b'] * np.cos(2 * math.pi * (metData['hour'] - 12) / 24)
    metData['SC'] = 1367 * (1 + 0.033 * np.cos(2 * math.pi * (metData['dayofYear'] - 10) / 365))
    metData['sinINT'] = metData['a'] * metData['daylength'] + (24 * metData['b'] / math.pi) * \
                        np.cos((math.pi / 2) * ((metData['daylength'] / 12) - 1))

    metData['Ta'] = metData['radnJ'] / (metData['sinINT'] * 3600 * metData['SC'])
    metData['FDR'] = metData['Ta'] * -1.6 +1.32

    # calculate the Evapotranspiration
    metData['vpsl'] = 238.102 * 17.32491 * ((metData['minTemp'] + metData['maxTemp']) /2) / \
                      (((metData['minTemp'] + metData['maxTemp']) / 2) + 238.102) ** 2
    metData['ETpt'] = 1.26 * (metData['radnJ']  * (metData['vpsl'] / (metData['vpsl'] + 0.067))) / 2454000

    metData['ApsimTT'] = metData.apply(lambda x: linint_3hrly_Temperature(x['maxTemp'], x['minTemp'], xp, fp), axis=1)
    
    # sort the columns to be a little more logical
    #cols=['year', 'dayofYear', 'runDate', 'dayLength', 'maxTemp', 'minTemp', 'avgTemp', 'ApsimTT', 'rain', \
    #      'PARIO', 'fracDiffusedRadn', 'vpsl', 'ETpt']
    #metData = metData[cols]    
    
    return metData


In [15]:
def get_simulation_details(dbname):
    '''
    Opens the specified SQL Database and extracts the 'Name' details from the Simulation
    Table, and splits it to Simulation ID, Longitude, Latitude, Variety, SowDate, and
    returns ad dataframe.
    '''

    # connect to the Database
    con = sqlite3.connect(dbname)
    cur = con.cursor()

    # get contents of the _Simulation Table
    strSql = "SELECT ID as SimulationID, Name FROM _Simulations"
    dfSim = pd.read_sql_query(strSql, con, index_col = 'SimulationID')

    # split the 'Name' field into long, lat, variety and sowdate columns
    dfSim[['long','lat','variety','sowdate']] = \
    dfSim['Name'].str.extract("^(?P<long>\d+)_(?P<lat>-?\d+)_(?P<variety>\S+)_(?P<sowdate>\d+-\S+)$", expand=True)

    # format the columns
    pd.options.display.float_format = '{:,.2f}'.format
    dfSim['long'] = dfSim['long'].astype(float) / 100
    dfSim['lat'] = dfSim['lat'].astype(float) / 100

    # create a SimId column (as the original SimulationID is now an index column)
    dfSim['SimID'] = dfSim.index 

    return dfSim


In [16]:
def get_report_details(dbname):
    '''
    Opens the specified SQL Database and extracts the details from the Report
    Table, formats the columns correctly and returns a dataframe
    '''

    # connect to the Database
    con = sqlite3.connect(dbname)
    cur = con.cursor()

    # get contents of the Report Table
    #strSql = "SELECT SimulationID, substr([Clock.Today], 1, 10) as runDate, \
    #      [Wheat.Leaf.LAI] as LAI, [Wheat.AboveGround.Wt] as Biomass, \
    #      [Wheat.Grain.Wt] as Yield, [Wheat.Phenology.Zadok.Stage] as ZadokStage, \
    #      [Wheat.WaterSupplyDemandRatio] as WaterSupplyDemandRatio, \
    #      [Wheat.Root.NUptake] as RootNUptake, [Wheat.Leaf.Fn] as LeafFn \
    #      FROM Report \
    #      ORDER BY SimulationID, runDate"
    
    #Need to exclude consider CUTOFFS
    #Need to use DISTINCT AS THERE SEEMS TO BE SOME DUPLICATIONS IN TO DATA

    #do not need all of the columns, will cut this down from the get go
    strSql = "SELECT DISTINCT SimulationID, substr([Clock.Today], 1, 10) as runDate, \
          [Wheat.Leaf.LAI] as LAI, [Wheat.AboveGround.Wt] as Biomass, \
          [Wheat.Grain.Wt] as Yield, [Wheat.Phenology.Zadok.Stage] as ZadokStage, \
          [Wheat.WaterSupplyDemandRatio] as WaterSupplyDemandRatio \
          FROM Report \
          WHERE [Wheat.Phenology.Zadok.Stage] > 0 \
          ORDER BY SimulationID, runDate"    
    #print(datetime.datetime.now())
    dfReport = pd.read_sql_query(strSql, con, index_col="SimulationID" )
    #print(datetime.datetime.now())

    # format the date columns
    dfReport['runDate'] = pd.to_datetime(dfReport['runDate'], format="%Y-%m-%d")

    # create the SimId column
    dfReport['SimID'] = dfReport.index

    return dfReport


In [17]:
def get_filename(dbname):
    '''
    Takes the full path and filename for the database file, and creates the filename
    that is used for the weather file, and to save the output.

    Note:  cannot use the db filename as it doesn't have the long & lat that we require
           need to manipulate the filename to add the underscrore '_' char
           ONLY need to allow for negative (south) latitudes
    '''
    filename = os.path.basename(dbname)
    filename = os.path.splitext(filename)[0]
    nameparts = filename.split('-')
    filename = nameparts[0] + '_-' + nameparts[1]
    lat = '-' + nameparts[1]

    return filename, lat


In [18]:
def get_weather_details(filename, latitude):
    '''
    Retrieves the weather data for the location (long,lat) specified in the dbname,
    formats the data, and returns a dataframe
    '''

    fullfilename = metfile_sourcedir + "/c_" + filename + ".met"
    dfWeather = read_ApsimWeather(fullfilename, latitude)

    return dfWeather


## Process the data
The following forms a single function:  def process_Apsim_dbfile(dbname):  
#### NOTE:  dbname is defined at the top

In [19]:
print("processing file: ", dbname)
print("started at ", datetime.datetime.now())
filename, latitude = get_filename(dbname)

print("filename: ", filename)
print("latitude: ", latitude)


processing file:  /OSM/CBR/AG_WHEATTEMP/source/113.75-28.50.db
started at  2018-08-01 07:28:01.280039
filename:  113.75_-28.50
latitude:  -28.50


In [20]:
# retrieve the weather data from the weather '.met' file
dfWeather = get_weather_details(filename, latitude)
print(dfWeather.shape)
print(dfWeather.head(5))


(43038, 24)
   year  dayofYear  radiation  maxTemp  minTemp  rain    runDate  avgTemp  \
0  1900          1       32.0     32.4     13.2   0.0 1900-01-01     22.8   
1  1900          2       32.0     32.4     13.2   0.0 1900-01-02     22.8   
2  1900          3       32.0     32.4     13.2   0.0 1900-01-03     22.8   
3  1900          4       32.0     32.4     13.2   0.0 1900-01-04     22.8   
4  1900          5       32.0     32.4     13.2   0.0 1900-01-05     22.8   

   PARIO       radnJ    ...      daylength  hour      sinB           SC  \
0  15.04  32000000.0    ...      13.777211     0 -0.622426  1411.570690   
1  15.04  32000000.0    ...      13.769855     0 -0.623570  1411.683909   
2  15.04  32000000.0    ...      13.761885     0 -0.624811  1411.783888   
3  15.04  32000000.0    ...      13.753310     0 -0.626146  1411.870595   
4  15.04  32000000.0    ...      13.744134     0 -0.627575  1411.944007   

     sinINT        Ta       FDR      vpsl       ETpt    ApsimTT  
0  8.582

In [13]:
#this is so that we can check the calculations and see if everything is correct
#outfilename = apsim_outfiledir + "/" + filename + "_calcs.csv"
#dfWeather.to_csv(outfilename, encoding='utf-8', index=False)


In [21]:
#this will be part of the get_weather_details functionality, but is commented out so that I can check things
cols=['year', 'dayofYear', 'runDate', 'daylength', 'maxTemp', 'minTemp', 'avgTemp', 'ApsimTT', 'rain', \
      'PARIO', 'PQ', 'FDR', 'vpsl', 'ETpt']
dfWeather = dfWeather[cols] 
dfWeather

Unnamed: 0,year,dayofYear,runDate,daylength,maxTemp,minTemp,avgTemp,ApsimTT,rain,PARIO,FDR,vpsl,ETpt
0,1900,1,1900-01-01,13.777211,32.4,13.2,22.80,15.694127,0.0,15.040,0.146049,1.381701,15.670443
1,1900,2,1900-01-02,13.769855,32.4,13.2,22.80,15.694127,0.0,15.040,0.145442,1.381701,15.670443
2,1900,3,1900-01-03,13.761885,32.4,13.2,22.80,15.694127,0.0,15.040,0.144761,1.381701,15.670443
3,1900,4,1900-01-04,13.753310,32.4,13.2,22.80,15.694127,0.0,15.040,0.144007,1.381701,15.670443
4,1900,5,1900-01-05,13.744134,32.4,13.2,22.80,15.694127,0.0,15.040,0.143178,1.381701,15.670443
5,1900,6,1900-01-06,13.734367,32.4,13.2,22.80,15.694127,0.0,15.040,0.142274,1.381701,15.670443
6,1900,7,1900-01-07,13.724015,32.4,13.2,22.80,15.694127,0.0,15.040,0.141296,1.381701,15.670443
7,1900,8,1900-01-08,13.713087,32.4,13.2,22.80,15.694127,0.0,15.040,0.140241,1.381701,15.670443
8,1900,9,1900-01-09,13.701590,32.4,13.2,22.80,15.694127,0.0,15.040,0.139110,1.381701,15.670443
9,1900,10,1900-01-10,13.689535,32.4,13.2,22.80,15.694127,0.0,15.040,0.137902,1.381701,15.670443


In [22]:
# retrieve the Simulation Details from the DB._Sumulation table
dfSim = get_simulation_details(dbname) 
print(dfSim.shape)
print(dfSim.head(5))


(1071, 6)
                                      Name   long    lat     variety sowdate  \
SimulationID                                                                   
1                 11375_-2850_young_29-jul 113.75 -28.50       young  29-jul   
2             11375_-2850_agt_katana_1-apr 113.75 -28.50  agt_katana   1-apr   
3             11375_-2850_agt_scythe_1-apr 113.75 -28.50  agt_scythe   1-apr   
4               11375_-2850_annuello_1-apr 113.75 -28.50    annuello   1-apr   
5                 11375_-2850_aroona_1-apr 113.75 -28.50      aroona   1-apr   

              SimID  
SimulationID         
1                 1  
2                 2  
3                 3  
4                 4  
5                 5  


In [23]:
# retrieve the Details from the DB.Report table
dfReport = get_report_details(dbname)
print(dfReport.shape)
print(dfReport.head(5))


(19725106, 7)
                runDate  LAI  Biomass  Yield  ZadokStage  \
SimulationID                                               
1            1900-07-29 0.00     0.00   0.00        5.00   
1            1900-07-30 0.00     0.00   0.00        6.22   
1            1900-07-31 0.00     0.00   0.00        7.45   
1            1900-08-01 0.00     0.00   0.00        8.67   
1            1900-08-02 0.00     0.00   0.00        9.89   

              WaterSupplyDemandRatio  SimID  
SimulationID                                 
1                               1.00      1  
1                               1.00      1  
1                               1.00      1  
1                               1.00      1  
1                               1.00      1  


In [24]:
# combine the data with the Simulation details, so that we can get the sow date
# and filter it again
dfCombined = dfReport.merge(dfSim, on="SimID", how='left')
dfCombined

Unnamed: 0,runDate,LAI,Biomass,Yield,ZadokStage,WaterSupplyDemandRatio,SimID,Name,long,lat,variety,sowdate
0,1900-07-29,0.00,0.00,0.00,5.00,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
1,1900-07-30,0.00,0.00,0.00,6.22,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
2,1900-07-31,0.00,0.00,0.00,7.45,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
3,1900-08-01,0.00,0.00,0.00,8.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
4,1900-08-02,0.00,0.00,0.00,9.89,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
5,1900-08-03,0.05,2.14,0.00,30.30,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
6,1900-08-04,0.08,3.15,0.00,30.41,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
7,1900-08-05,0.12,4.70,0.00,30.50,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
8,1900-08-06,0.17,6.99,0.00,30.58,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul
9,1900-08-07,0.25,10.35,0.00,30.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul


In [25]:
# create a sowing date (with current year)
dfCombined['sowingdate'] = dfCombined['sowdate'] + '-' + dfCombined['runDate'].dt.year.map(str)
dfCombined['sowingdate'] = pd.to_datetime(dfCombined['sowingdate'], format="%d-%b-%Y")
dfCombined


Unnamed: 0,runDate,LAI,Biomass,Yield,ZadokStage,WaterSupplyDemandRatio,SimID,Name,long,lat,variety,sowdate,sowingdate
0,1900-07-29,0.00,0.00,0.00,5.00,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
1,1900-07-30,0.00,0.00,0.00,6.22,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
2,1900-07-31,0.00,0.00,0.00,7.45,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
3,1900-08-01,0.00,0.00,0.00,8.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
4,1900-08-02,0.00,0.00,0.00,9.89,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
5,1900-08-03,0.05,2.14,0.00,30.30,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
6,1900-08-04,0.08,3.15,0.00,30.41,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
7,1900-08-05,0.12,4.70,0.00,30.50,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
8,1900-08-06,0.17,6.99,0.00,30.58,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
9,1900-08-07,0.25,10.35,0.00,30.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29


In [26]:
# filter the data from the Report details that we are not going to be looking at
# ie, any data prior to our sow date for each year (should reduce size of data considerably)
dfCombined = dfCombined[(dfCombined['runDate'] >= dfCombined['sowingdate'])] 
dfCombined

Unnamed: 0,runDate,LAI,Biomass,Yield,ZadokStage,WaterSupplyDemandRatio,SimID,Name,long,lat,variety,sowdate,sowingdate
0,1900-07-29,0.00,0.00,0.00,5.00,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
1,1900-07-30,0.00,0.00,0.00,6.22,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
2,1900-07-31,0.00,0.00,0.00,7.45,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
3,1900-08-01,0.00,0.00,0.00,8.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
4,1900-08-02,0.00,0.00,0.00,9.89,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
5,1900-08-03,0.05,2.14,0.00,30.30,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
6,1900-08-04,0.08,3.15,0.00,30.41,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
7,1900-08-05,0.12,4.70,0.00,30.50,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
8,1900-08-06,0.17,6.99,0.00,30.58,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29
9,1900-08-07,0.25,10.35,0.00,30.67,1.00,1,11375_-2850_young_29-jul,113.75,-28.50,young,29-jul,1900-07-29


In [27]:
#re-order and filter columns
cols = ['SimID', 'long', 'lat', 'variety', 'sowdate', 'sowingdate', 'runDate', 'LAI', 'Biomass', 'Yield', \
        'ZadokStage', 'WaterSupplyDemandRatio']
dfCombined = dfCombined[cols] 
dfCombined


Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,LAI,Biomass,Yield,ZadokStage,WaterSupplyDemandRatio
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,0.00,0.00,0.00,5.00,1.00
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,0.00,0.00,0.00,6.22,1.00
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,0.00,0.00,0.00,7.45,1.00
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,0.00,0.00,0.00,8.67,1.00
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,0.00,0.00,0.00,9.89,1.00
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,0.05,2.14,0.00,30.30,1.00
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,0.08,3.15,0.00,30.41,1.00
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,0.12,4.70,0.00,30.50,1.00
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,0.17,6.99,0.00,30.58,1.00
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,0.25,10.35,0.00,30.67,1.00


In [28]:
# combine the report data with the weather data
dfCombined = dfCombined.merge(dfWeather, on='runDate', how='left')
dfCombined
# filter the data based on the information we want
#filterCols = ['SimID', 'runDate', 'ZadokStage', 'avgTemp']
#dfSubData = dfCombined[filterCols]


Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,LAI,Biomass,Yield,...,daylength,maxTemp,minTemp,avgTemp,ApsimTT,rain,PARIO,FDR,vpsl,ETpt
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,0.00,0.00,0.00,...,10.60,26.20,13.20,19.70,19.32,0.00,15.04,-0.99,1.22,15.58
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,0.00,0.00,0.00,...,10.62,26.20,13.20,19.70,19.32,0.00,15.04,-0.97,1.22,15.58
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,0.00,0.00,0.00,...,10.64,26.20,13.20,19.70,19.32,0.00,15.04,-0.96,1.22,15.58
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,0.00,0.00,0.00,...,10.66,26.20,13.20,19.70,19.32,0.00,15.04,-0.94,1.22,15.58
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,0.00,0.00,0.00,...,10.69,26.10,13.20,19.65,19.26,0.00,15.04,-0.93,1.22,15.58
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,0.05,2.14,0.00,...,10.71,26.10,13.20,19.65,19.26,0.00,15.04,-0.91,1.22,15.58
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,0.08,3.15,0.00,...,10.73,26.10,13.20,19.65,19.26,0.00,15.04,-0.90,1.22,15.58
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,0.12,4.70,0.00,...,10.75,26.10,13.20,19.65,19.26,0.00,15.04,-0.88,1.22,15.58
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,0.17,6.99,0.00,...,10.78,26.10,13.20,19.65,19.26,0.00,15.04,-0.86,1.22,15.58
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,0.25,10.35,0.00,...,10.80,26.10,13.20,19.65,19.26,0.00,15.04,-0.85,1.22,15.58


In [29]:
#create bins for the phases
bins = [0, 7, 10, 31, 39, 65, 71, 87, 89, 90]
group_names = ['01_Germinating', '02_Emerging', '03_Vegetative', '04_StemElongation',
               '05_EarlyReproductive', '06_GrainSet', '07_GrainFilling', '08_Maturing', '09_Ripening']
dfCombined['phases'] = pd.cut(dfCombined['ZadokStage'], bins, labels=group_names)
dfCombined

#might need to 

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,LAI,Biomass,Yield,...,maxTemp,minTemp,avgTemp,ApsimTT,rain,PARIO,FDR,vpsl,ETpt,phases
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,0.00,0.00,0.00,...,26.20,13.20,19.70,19.32,0.00,15.04,-0.99,1.22,15.58,01_Germinating
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,0.00,0.00,0.00,...,26.20,13.20,19.70,19.32,0.00,15.04,-0.97,1.22,15.58,01_Germinating
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,0.00,0.00,0.00,...,26.20,13.20,19.70,19.32,0.00,15.04,-0.96,1.22,15.58,02_Emerging
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,0.00,0.00,0.00,...,26.20,13.20,19.70,19.32,0.00,15.04,-0.94,1.22,15.58,02_Emerging
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,0.00,0.00,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.93,1.22,15.58,02_Emerging
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,0.05,2.14,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.91,1.22,15.58,03_Vegetative
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,0.08,3.15,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.90,1.22,15.58,03_Vegetative
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,0.12,4.70,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.88,1.22,15.58,03_Vegetative
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,0.17,6.99,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.86,1.22,15.58,03_Vegetative
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,0.25,10.35,0.00,...,26.10,13.20,19.65,19.26,0.00,15.04,-0.85,1.22,15.58,03_Vegetative


In [30]:
# need to work out the Ripening phase
# starts where 90 is maturity (start of ripening stage)
dfCombined['cumApsimTT'] = dfCombined.groupby(by=['SimID','sowingdate'])['ApsimTT'].cumsum()
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,LAI,Biomass,Yield,...,minTemp,avgTemp,ApsimTT,rain,PARIO,FDR,vpsl,ETpt,phases,cumApsimTT
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,0.00,0.00,0.00,...,13.20,19.70,19.32,0.00,15.04,-0.99,1.22,15.58,01_Germinating,19.32
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,0.00,0.00,0.00,...,13.20,19.70,19.32,0.00,15.04,-0.97,1.22,15.58,01_Germinating,38.64
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,0.00,0.00,0.00,...,13.20,19.70,19.32,0.00,15.04,-0.96,1.22,15.58,02_Emerging,57.96
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,0.00,0.00,0.00,...,13.20,19.70,19.32,0.00,15.04,-0.94,1.22,15.58,02_Emerging,77.28
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,0.00,0.00,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.93,1.22,15.58,02_Emerging,96.54
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,0.05,2.14,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.91,1.22,15.58,03_Vegetative,115.80
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,0.08,3.15,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.90,1.22,15.58,03_Vegetative,135.06
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,0.12,4.70,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.88,1.22,15.58,03_Vegetative,154.32
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,0.17,6.99,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.86,1.22,15.58,03_Vegetative,173.58
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,0.25,10.35,0.00,...,13.20,19.65,19.26,0.00,15.04,-0.85,1.22,15.58,03_Vegetative,192.84


In [31]:
#These are cumulative calculations for each phase
dfCombined['cumPhaseAvgTemp'] = dfCombined.groupby(by=['SimID','sowingdate', 'phases'])['avgTemp'].cumsum()
dfCombined['cumPhaseApsimTT'] = dfCombined.groupby(by=['SimID','sowingdate', 'phases'])['ApsimTT'].cumsum()
dfCombined['cumPhaseRain'] = dfCombined.groupby(by=['SimID','sowingdate', 'phases'])['rain'].cumsum()
dfCombined['cumPhaseETpt'] = dfCombined.groupby(by=['SimID','sowingdate', 'phases'])['ETpt'].cumsum()
dfCombined['cumPhasePARIO'] = dfCombined.groupby(by=['SimID','sowingdate', 'phases'])['PARIO'].cumsum()
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,LAI,Biomass,Yield,...,FDR,vpsl,ETpt,phases,cumApsimTT,cumPhaseAvgTemp,cumPhaseApsimTT,cumPhaseRain,cumPhaseETpt,cumPhasePARIO
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,0.00,0.00,0.00,...,-0.99,1.22,15.58,01_Germinating,19.32,19.70,19.32,0.00,15.58,15.04
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,0.00,0.00,0.00,...,-0.97,1.22,15.58,01_Germinating,38.64,39.40,38.64,0.00,31.15,30.08
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,0.00,0.00,0.00,...,-0.96,1.22,15.58,02_Emerging,57.96,19.70,19.32,0.00,15.58,15.04
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,0.00,0.00,0.00,...,-0.94,1.22,15.58,02_Emerging,77.28,39.40,38.64,0.00,31.15,30.08
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,0.00,0.00,0.00,...,-0.93,1.22,15.58,02_Emerging,96.54,59.05,57.90,0.00,46.73,45.12
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,0.05,2.14,0.00,...,-0.91,1.22,15.58,03_Vegetative,115.80,19.65,19.26,0.00,15.58,15.04
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,0.08,3.15,0.00,...,-0.90,1.22,15.58,03_Vegetative,135.06,39.30,38.52,0.00,31.15,30.08
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,0.12,4.70,0.00,...,-0.88,1.22,15.58,03_Vegetative,154.32,58.95,57.78,0.00,46.73,45.12
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,0.17,6.99,0.00,...,-0.86,1.22,15.58,03_Vegetative,173.58,78.60,77.04,0.00,62.30,60.16
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,0.25,10.35,0.00,...,-0.85,1.22,15.58,03_Vegetative,192.84,98.25,96.30,0.00,77.88,75.20


In [32]:
#output a subset so I can look at the data
cols = ['SimID', 'long', 'lat', 'variety', 'sowdate', 'sowingdate', 'runDate', 'year', 'dayofYear', \
        'daylength', 'ZadokStage', 'phases', 'maxTemp', 'minTemp', 'avgTemp', 'cumPhaseAvgTemp', \
        'ApsimTT', 'cumApsimTT', 'cumPhaseApsimTT', 'rain', 'cumPhaseRain', 'PARIO', 'cumPhasePARIO', \
        'FDR', 'vpsl', 'ETpt', 'cumPhaseETpt', 'LAI', 'Biomass', 'Yield','WaterSupplyDemandRatio']
dfCombined = dfCombined[cols]
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,year,dayofYear,daylength,...,PARIO,cumPhasePARIO,FDR,vpsl,ETpt,cumPhaseETpt,LAI,Biomass,Yield,WaterSupplyDemandRatio
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,1900,210,10.60,...,15.04,15.04,-0.99,1.22,15.58,15.58,0.00,0.00,0.00,1.00
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,1900,211,10.62,...,15.04,30.08,-0.97,1.22,15.58,31.15,0.00,0.00,0.00,1.00
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,1900,212,10.64,...,15.04,15.04,-0.96,1.22,15.58,15.58,0.00,0.00,0.00,1.00
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,1900,213,10.66,...,15.04,30.08,-0.94,1.22,15.58,31.15,0.00,0.00,0.00,1.00
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,1900,214,10.69,...,15.04,45.12,-0.93,1.22,15.58,46.73,0.00,0.00,0.00,1.00
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,1900,215,10.71,...,15.04,15.04,-0.91,1.22,15.58,15.58,0.05,2.14,0.00,1.00
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,1900,216,10.73,...,15.04,30.08,-0.90,1.22,15.58,31.15,0.08,3.15,0.00,1.00
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,1900,217,10.75,...,15.04,45.12,-0.88,1.22,15.58,46.73,0.12,4.70,0.00,1.00
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,1900,218,10.78,...,15.04,60.16,-0.86,1.22,15.58,62.30,0.17,6.99,0.00,1.00
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,1900,219,10.80,...,15.04,75.20,-0.85,1.22,15.58,77.88,0.25,10.35,0.00,1.00


In [None]:
subData = dfCombined.loc[dfCombined['SimID'].isin([2,3,4])]
outfilename = apsim_outfiledir + "/sim123_cum_" + filename + ".csv"
subData.to_csv(outfilename, encoding='utf-8', index=False)

In [35]:
#==============================================================
# NEED TO CONVERT THE phases column to string so that I can add stage 10
#==============================================================
#dfData = dfCombined
dfCombined['phases'].dtypes



CategoricalDtype(categories=['01_Germinating', '02_Emerging', '03_Vegetative',
                  '04_StemElongation', '05_EarlyReproductive', '06_GrainSet',
                  '07_GrainFilling', '08_Maturing', '09_Ripening'],
                 ordered=True)

In [36]:
dfCombined['phases'] = dfCombined['phases'].astype(str)
dfCombined['phases'].dtypes

dtype('O')

In [37]:
# filter the data 
#also where Zadok = 90 and cumPhaseApsimTT > 300 (except first record - need to keep this)
#first record to be labelled '10_Harvest'
dfCombined.loc[(dfCombined['ZadokStage'] == 90) & (dfCombined['cumPhaseApsimTT'] >= 300 ),'phases'] = '10_Harvest'
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,year,dayofYear,daylength,...,PARIO,cumPhasePARIO,FDR,vpsl,ETpt,cumPhaseETpt,LAI,Biomass,Yield,WaterSupplyDemandRatio
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,1900,210,10.60,...,15.04,15.04,-0.99,1.22,15.58,15.58,0.00,0.00,0.00,1.00
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,1900,211,10.62,...,15.04,30.08,-0.97,1.22,15.58,31.15,0.00,0.00,0.00,1.00
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,1900,212,10.64,...,15.04,15.04,-0.96,1.22,15.58,15.58,0.00,0.00,0.00,1.00
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,1900,213,10.66,...,15.04,30.08,-0.94,1.22,15.58,31.15,0.00,0.00,0.00,1.00
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,1900,214,10.69,...,15.04,45.12,-0.93,1.22,15.58,46.73,0.00,0.00,0.00,1.00
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,1900,215,10.71,...,15.04,15.04,-0.91,1.22,15.58,15.58,0.05,2.14,0.00,1.00
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,1900,216,10.73,...,15.04,30.08,-0.90,1.22,15.58,31.15,0.08,3.15,0.00,1.00
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,1900,217,10.75,...,15.04,45.12,-0.88,1.22,15.58,46.73,0.12,4.70,0.00,1.00
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,1900,218,10.78,...,15.04,60.16,-0.86,1.22,15.58,62.30,0.17,6.99,0.00,1.00
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,1900,219,10.80,...,15.04,75.20,-0.85,1.22,15.58,77.88,0.25,10.35,0.00,1.00


In [80]:
dfCombined.loc[dfCombined.groupby(['SimID','sowingdate','phases'])['cumPhaseApsimTT'].idxmin(),'firstHarvest'] = 1
dfCombined.loc[(dfCombined['phases'] != '10_Harvest' ),'firstHarvest'] = 1
dfCombined['firstHarvest'] = dfCombined.firstHarvest.fillna(0).astype(int)
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,year,dayofYear,daylength,...,cumPhasePARIO,FDR,vpsl,ETpt,cumPhaseETpt,LAI,Biomass,Yield,WaterSupplyDemandRatio,firstHarvest
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,1900,210,10.60,...,15.04,-0.99,1.22,15.58,15.58,0.00,0.00,0.00,1.00,1
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,1900,211,10.62,...,30.08,-0.97,1.22,15.58,31.15,0.00,0.00,0.00,1.00,1
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,1900,212,10.64,...,15.04,-0.96,1.22,15.58,15.58,0.00,0.00,0.00,1.00,1
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,1900,213,10.66,...,30.08,-0.94,1.22,15.58,31.15,0.00,0.00,0.00,1.00,1
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,1900,214,10.69,...,45.12,-0.93,1.22,15.58,46.73,0.00,0.00,0.00,1.00,1
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,1900,215,10.71,...,15.04,-0.91,1.22,15.58,15.58,0.05,2.14,0.00,1.00,1
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,1900,216,10.73,...,30.08,-0.90,1.22,15.58,31.15,0.08,3.15,0.00,1.00,1
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,1900,217,10.75,...,45.12,-0.88,1.22,15.58,46.73,0.12,4.70,0.00,1.00,1
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,1900,218,10.78,...,60.16,-0.86,1.22,15.58,62.30,0.17,6.99,0.00,1.00,1
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,1900,219,10.80,...,75.20,-0.85,1.22,15.58,77.88,0.25,10.35,0.00,1.00,1


In [81]:
dfCombined = dfCombined[(dfCombined['firstHarvest'] == 1)]
dfCombined

Unnamed: 0,SimID,long,lat,variety,sowdate,sowingdate,runDate,year,dayofYear,daylength,...,cumPhasePARIO,FDR,vpsl,ETpt,cumPhaseETpt,LAI,Biomass,Yield,WaterSupplyDemandRatio,firstHarvest
0,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-29,1900,210,10.60,...,15.04,-0.99,1.22,15.58,15.58,0.00,0.00,0.00,1.00,1
1,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-30,1900,211,10.62,...,30.08,-0.97,1.22,15.58,31.15,0.00,0.00,0.00,1.00,1
2,1,113.75,-28.50,young,29-jul,1900-07-29,1900-07-31,1900,212,10.64,...,15.04,-0.96,1.22,15.58,15.58,0.00,0.00,0.00,1.00,1
3,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-01,1900,213,10.66,...,30.08,-0.94,1.22,15.58,31.15,0.00,0.00,0.00,1.00,1
4,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-02,1900,214,10.69,...,45.12,-0.93,1.22,15.58,46.73,0.00,0.00,0.00,1.00,1
5,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-03,1900,215,10.71,...,15.04,-0.91,1.22,15.58,15.58,0.05,2.14,0.00,1.00,1
6,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-04,1900,216,10.73,...,30.08,-0.90,1.22,15.58,31.15,0.08,3.15,0.00,1.00,1
7,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-05,1900,217,10.75,...,45.12,-0.88,1.22,15.58,46.73,0.12,4.70,0.00,1.00,1
8,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-06,1900,218,10.78,...,60.16,-0.86,1.22,15.58,62.30,0.17,6.99,0.00,1.00,1
9,1,113.75,-28.50,young,29-jul,1900-07-29,1900-08-07,1900,219,10.80,...,75.20,-0.85,1.22,15.58,77.88,0.25,10.35,0.00,1.00,1


In [82]:
subData = dfCombined.loc[dfCombined['SimID'].isin([2,3,4])]
outfilename = apsim_outfiledir + "/sim123_phase10_" + filename + "b.csv"
subData.to_csv(outfilename, encoding='utf-8', index=False)

### Need to determine the phases: 
•	length (no of days in phase), minTemp, maxTemp, and avgTemp,  
•	the cumulativeAvgTemp at the end of each phase, (needs to be called TTAfterSowing)  
•	the counts for number of day where temperatures are below or above specified values (refer to SIP_Temp discussion.docs for further details)  
•	the average and cumulative rainfall   
•	average radiation, cumulative radiation  
•	average watersupplydemandratio


In [87]:
#should I add a column that can be a concatinated key
dfCombined['SimIDsowingdatephase'] = dfCombined['SimID'].map(str) + dfCombined['sowingdate'].map(str) + \
                                     dfCombined['phases']
#dfCombined.loc[(dfCombined['phases'] != '10_Harvest' ),'firstHarvest'] = 1

dfCombined = dfCombined.set_index('SimIDsowingdatephase')


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [136]:
#how many days in each of the phases
dfSummary = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSummary.columns = ['SimID', 'sowingdate', 'phases', 'dayCount']
#dfSummary

#what is the minimimum temperature for the phase
dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['minTemp'].min().reset_index()
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')
#dfSummary

#what is the maximum temperature for the phase
dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['maxTemp'].max().reset_index()
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')
#dfSummary

#what is the average of average temp for the phase
dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['avgTemp'].mean().reset_index()
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSummary


Unnamed: 0,SimID,sowingdate,phases,dayCount,minTemp,maxTemp,avgTemp
0,1,1900-07-29,01_Germinating,2,13.20,26.20,19.70
1,1,1900-07-29,02_Emerging,3,13.20,26.20,19.68
2,1,1900-07-29,03_Vegetative,8,13.20,26.10,19.63
3,1,1900-07-29,04_StemElongation,29,13.20,26.00,19.47
4,1,1900-07-29,05_EarlyReproductive,15,13.20,25.50,19.28
5,1,1900-07-29,06_GrainSet,7,13.20,25.20,19.19
6,1,1900-07-29,07_GrainFilling,35,13.20,25.10,19.00
7,1,1900-07-29,08_Maturing,2,13.20,24.50,18.85
8,1,1900-07-29,09_Ripening,16,13.20,24.50,18.77
9,1,1900-07-29,10_Harvest,1,13.20,24.20,18.70


In [137]:
dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['cumPhaseApsimTT'].max().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'cumApsimTT']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['cumPhaseRain'].min().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'cumRain']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['WaterSupplyDemandRatio'].mean().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'avgWaterSupplyDemandRatio']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['cumPhaseETpt'].max().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'cumETpt']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['cumPhasePARIO'].max().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'cumPARIO']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')


dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['PARIO'].mean().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'avgPARIO']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['FDR'].mean().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'avgFDR']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['PQ'].mean().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'avgPQ']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['daylength'].mean().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'avgdaylength']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].min().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'startDate']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].max().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'endDate']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['Biomass'].last().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'Biomass']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['LAI'].last().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'LAI']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined.groupby(by=['SimID', 'sowingdate', 'phases'])['Yield'].last().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'Yield']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSummary

Unnamed: 0,SimID,sowingdate,phases,dayCount,minTemp,maxTemp,avgTemp,cumApsimTT,cumRain,avgWaterSupplyDemandRatio,cumETpt,cumPARIO,avgPARIO,avgFDR,avgdaylength,startDate,endDate,Biomass,LAI,Yield
0,1,1900-07-29,01_Germinating,2,13.20,26.20,19.70,38.64,0.00,1.00,31.15,30.08,15.04,-0.98,10.61,1900-07-29,1900-07-30,0.00,0.00,0.00
1,1,1900-07-29,02_Emerging,3,13.20,26.20,19.68,57.90,0.00,1.00,46.73,45.12,15.04,-0.94,10.66,1900-07-31,1900-08-02,0.00,0.00,0.00
2,1,1900-07-29,03_Vegetative,8,13.20,26.10,19.63,153.90,0.00,1.00,124.59,120.32,15.04,-0.86,10.79,1900-08-03,1900-08-10,28.03,0.57,0.00
3,1,1900-07-29,04_StemElongation,29,13.20,26.00,19.47,552.44,0.00,1.00,451.50,436.16,15.04,-0.58,11.26,1900-08-11,1900-09-08,558.13,2.80,0.00
4,1,1900-07-29,05_EarlyReproductive,15,13.20,25.50,19.28,282.32,0.00,1.00,233.43,225.60,15.04,-0.30,11.87,1900-09-09,1900-09-23,972.46,2.80,0.00
5,1,1900-07-29,06_GrainSet,7,13.20,25.20,19.19,130.93,0.00,1.00,108.91,105.28,15.04,-0.19,12.18,1900-09-24,1900-09-30,1166.15,2.63,48.25
6,1,1900-07-29,07_GrainFilling,35,13.20,25.10,19.00,647.08,0.00,1.00,544.31,526.40,15.04,-0.04,12.75,1900-10-01,1900-11-04,1684.21,0.06,1027.91
7,1,1900-07-29,08_Maturing,2,13.20,24.50,18.85,36.61,0.00,1.00,31.09,30.08,15.04,0.06,13.22,1900-11-05,1900-11-06,1686.34,0.01,1027.91
8,1,1900-07-29,09_Ripening,16,13.20,24.50,18.77,291.24,0.00,1.00,248.68,240.64,15.04,0.09,13.41,1900-11-07,1900-11-22,1686.69,0.00,1027.91
9,1,1900-07-29,10_Harvest,1,13.20,24.20,18.70,309.37,0.00,1.00,264.22,255.68,15.04,0.11,13.57,1900-11-23,1900-11-23,1686.69,0.00,1027.91


In [138]:
#get the counts of various miniumum temperatures
dfSum = dfCombined[dfCombined['minTemp'] <= 0].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day<=0']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] <= -1].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day<=-1']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] <= -2].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day<=-2']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] <= -3].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day<=-3']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSummary


Unnamed: 0,SimID,sowingdate,phases,dayCount,minTemp,maxTemp,avgTemp,cumApsimTT,cumRain,avgWaterSupplyDemandRatio,...,avgdaylength,startDate,endDate,Biomass,LAI,Yield,day<=0,day<=-1,day<=-2,day<=-3
0,1,1900-07-29,01_Germinating,2,13.20,26.20,19.70,38.64,0.00,1.00,...,10.61,1900-07-29,1900-07-30,0.00,0.00,0.00,,,,
1,1,1900-07-29,02_Emerging,3,13.20,26.20,19.68,57.90,0.00,1.00,...,10.66,1900-07-31,1900-08-02,0.00,0.00,0.00,,,,
2,1,1900-07-29,03_Vegetative,8,13.20,26.10,19.63,153.90,0.00,1.00,...,10.79,1900-08-03,1900-08-10,28.03,0.57,0.00,,,,
3,1,1900-07-29,04_StemElongation,29,13.20,26.00,19.47,552.44,0.00,1.00,...,11.26,1900-08-11,1900-09-08,558.13,2.80,0.00,,,,
4,1,1900-07-29,05_EarlyReproductive,15,13.20,25.50,19.28,282.32,0.00,1.00,...,11.87,1900-09-09,1900-09-23,972.46,2.80,0.00,,,,
5,1,1900-07-29,06_GrainSet,7,13.20,25.20,19.19,130.93,0.00,1.00,...,12.18,1900-09-24,1900-09-30,1166.15,2.63,48.25,,,,
6,1,1900-07-29,07_GrainFilling,35,13.20,25.10,19.00,647.08,0.00,1.00,...,12.75,1900-10-01,1900-11-04,1684.21,0.06,1027.91,,,,
7,1,1900-07-29,08_Maturing,2,13.20,24.50,18.85,36.61,0.00,1.00,...,13.22,1900-11-05,1900-11-06,1686.34,0.01,1027.91,,,,
8,1,1900-07-29,09_Ripening,16,13.20,24.50,18.77,291.24,0.00,1.00,...,13.41,1900-11-07,1900-11-22,1686.69,0.00,1027.91,,,,
9,1,1900-07-29,10_Harvest,1,13.20,24.20,18.70,309.37,0.00,1.00,...,13.57,1900-11-23,1900-11-23,1686.69,0.00,1027.91,,,,


In [139]:
#get the counts of various maximum temperatures
dfSum = dfCombined[dfCombined['minTemp'] >= 30].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=30']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] >= 32].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=32']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] >= 34].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=34']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] >= 36].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=36']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] >= 38].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=38']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSum = dfCombined[dfCombined['minTemp'] >= 40].groupby(by=['SimID', 'sowingdate', 'phases'])['runDate'].count().reset_index()
dfSum.columns = ['SimID', 'sowingdate', 'phases', 'day>=40']
dfSummary = dfSummary.merge(dfSum, on=['SimID', 'sowingdate', 'phases'], how='left')

dfSummary


Unnamed: 0,SimID,sowingdate,phases,dayCount,minTemp,maxTemp,avgTemp,cumApsimTT,cumRain,avgWaterSupplyDemandRatio,...,day<=0,day<=-1,day<=-2,day<=-3,day>=30,day>=32,day>=34,day>=36,day>=38,day>=40
0,1,1900-07-29,01_Germinating,2,13.20,26.20,19.70,38.64,0.00,1.00,...,,,,,,,,,,
1,1,1900-07-29,02_Emerging,3,13.20,26.20,19.68,57.90,0.00,1.00,...,,,,,,,,,,
2,1,1900-07-29,03_Vegetative,8,13.20,26.10,19.63,153.90,0.00,1.00,...,,,,,,,,,,
3,1,1900-07-29,04_StemElongation,29,13.20,26.00,19.47,552.44,0.00,1.00,...,,,,,,,,,,
4,1,1900-07-29,05_EarlyReproductive,15,13.20,25.50,19.28,282.32,0.00,1.00,...,,,,,,,,,,
5,1,1900-07-29,06_GrainSet,7,13.20,25.20,19.19,130.93,0.00,1.00,...,,,,,,,,,,
6,1,1900-07-29,07_GrainFilling,35,13.20,25.10,19.00,647.08,0.00,1.00,...,,,,,,,,,,
7,1,1900-07-29,08_Maturing,2,13.20,24.50,18.85,36.61,0.00,1.00,...,,,,,,,,,,
8,1,1900-07-29,09_Ripening,16,13.20,24.50,18.77,291.24,0.00,1.00,...,,,,,,,,,,
9,1,1900-07-29,10_Harvest,1,13.20,24.20,18.70,309.37,0.00,1.00,...,,,,,,,,,,


In [142]:
subData = dfSummary.loc[dfSummary['SimID'].isin([2,3,4])]
outfilename = apsim_outfiledir + "/sim123_" + filename + "_summary.csv"
subData.to_csv(outfilename, encoding='utf-8', index=False)

In [140]:
outfilename = apsim_outfiledir + "/" + filename + "_summary.csv"
dfSummary.to_csv(outfilename, encoding='utf-8', index=False)
