## Working with pandas dataframes

I am trying to extract the name information using the columns (as a whole, if that makes sense).  I have a function works if I iterate through the rows and treat each 'Name' individually.

In [1]:
import numpy as np
import pandas as pd
import sqlite3
import os
import feather
import datetime

In [2]:
apsim_sourcedir = "/OSM/CBR/AG_WHEATTEMP/source"
apsim_outfiledir = "/OSM/CBR/AG_WHEATTEMP/work/output"
metfile_sourcedir = "/OSM/CBR/AG_WHEATTEMP/work/ApsimNG-test/APSIM_run/met"

In [3]:
dbfile_df = pd.DataFrame(columns=['filename'])
dbfile_df.filename = sorted(apsim_sourcedir+'/'+f for f in os.listdir(apsim_sourcedir) if f.endswith('.db'))
print(dbfile_df.head())

                                       filename
0  /OSM/CBR/AG_WHEATTEMP/source/113.60-28.30.db
1  /OSM/CBR/AG_WHEATTEMP/source/113.70-28.45.db
2  /OSM/CBR/AG_WHEATTEMP/source/113.70-28.50.db
3  /OSM/CBR/AG_WHEATTEMP/source/113.75-28.45.db
4  /OSM/CBR/AG_WHEATTEMP/source/113.75-28.50.db


In [4]:
#we are going to work with just the first filename
print(dbfile_df.filename[0])
con = sqlite3.connect(dbfile_df.filename[0])
cur = con.cursor()

/OSM/CBR/AG_WHEATTEMP/source/113.60-28.30.db


In [5]:
strSql = "SELECT ID as SimulationID, Name FROM _Simulations"
dfSim = pd.read_sql_query(strSql, con, index_col = 'SimulationID')
print(dfSim.shape)
dfSim

(1071, 1)


Unnamed: 0_level_0,Name
SimulationID,Unnamed: 1_level_1
1,11360_-2830_young_29-jul
2,11360_-2830_agt_katana_1-apr
3,11360_-2830_agt_scythe_1-apr
4,11360_-2830_annuello_1-apr
5,11360_-2830_aroona_1-apr
6,11360_-2830_arrino_1-apr
7,11360_-2830_attila_1-apr
8,11360_-2830_axe_1-apr
9,11360_-2830_banks_1-apr
10,11360_-2830_batavia_1-apr


In [6]:
#filename = apsim_outfiledir + "/simNames.csv"
#dfSim.to_csv(filename, encoding='utf-8', index=False)

While the above works nicely, it may not be the best way to achieve this, might need to do some testing when dealing with more data.  So I will use regular expressions instead.

In [7]:
#a more optimal way to achieve the above
dfSim[['long','lat','variety','sowdate']] = dfSim['Name'].str.extract("^(?P<long>\d+)_(?P<lat>-?\d+)_(?P<variety>\S+)_(?P<sowdate>\d+-\S+)$", expand=True)
dfSim

Unnamed: 0_level_0,Name,long,lat,variety,sowdate
SimulationID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,11360_-2830_young_29-jul,11360,-2830,young,29-jul
2,11360_-2830_agt_katana_1-apr,11360,-2830,agt_katana,1-apr
3,11360_-2830_agt_scythe_1-apr,11360,-2830,agt_scythe,1-apr
4,11360_-2830_annuello_1-apr,11360,-2830,annuello,1-apr
5,11360_-2830_aroona_1-apr,11360,-2830,aroona,1-apr
6,11360_-2830_arrino_1-apr,11360,-2830,arrino,1-apr
7,11360_-2830_attila_1-apr,11360,-2830,attila,1-apr
8,11360_-2830_axe_1-apr,11360,-2830,axe,1-apr
9,11360_-2830_banks_1-apr,11360,-2830,banks,1-apr
10,11360_-2830_batavia_1-apr,11360,-2830,batavia,1-apr


In [8]:
#now format the longitude and latitude columns correctly and
#add the SimID column (as we cannot work with the index column)
pd.options.display.float_format = '{:,.2f}'.format
dfSim['long'] = dfSim['long'].astype(float) / 100
dfSim['lat'] = dfSim['lat'].astype(float) / 100
dfSim['SimID'] = dfSim.index

In [9]:
print(dfSim.shape)
dfSim

(1071, 6)


Unnamed: 0_level_0,Name,long,lat,variety,sowdate,SimID
SimulationID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul,1
2,11360_-2830_agt_katana_1-apr,113.60,-28.30,agt_katana,1-apr,2
3,11360_-2830_agt_scythe_1-apr,113.60,-28.30,agt_scythe,1-apr,3
4,11360_-2830_annuello_1-apr,113.60,-28.30,annuello,1-apr,4
5,11360_-2830_aroona_1-apr,113.60,-28.30,aroona,1-apr,5
6,11360_-2830_arrino_1-apr,113.60,-28.30,arrino,1-apr,6
7,11360_-2830_attila_1-apr,113.60,-28.30,attila,1-apr,7
8,11360_-2830_axe_1-apr,113.60,-28.30,axe,1-apr,8
9,11360_-2830_banks_1-apr,113.60,-28.30,banks,1-apr,9
10,11360_-2830_batavia_1-apr,113.60,-28.30,batavia,1-apr,10


### Now retrieve the Report Data

We can use the simIds that were collected above

In [10]:
#This should return all of the columns
#strSql = "SELECT * FROM Report \
#          ORDER BY SimulationID"

strSql = "SELECT SimulationID, substr([Clock.Today], 1, 10) as runDate, [Wheat.Leaf.LAI] as LeafLAI, \
          [Wheat.AboveGround.Wt] as AboveGroundWeight, [Wheat.Grain.Wt] as GrainWeight, \
          [Wheat.Phenology.Zadok.Stage] as ZadokStage, [Wheat.WaterSupplyDemandRatio] as WaterSupplyDemandRatio, \
          [Wheat.Root.NUptake] as RootNUptake, [Wheat.Leaf.Fn] as LeafFn \
          FROM Report \
          ORDER BY SimulationID, runDate"
print(strSql)

SELECT SimulationID, substr([Clock.Today], 1, 10) as runDate, [Wheat.Leaf.LAI] as LeafLAI,           [Wheat.AboveGround.Wt] as AboveGroundWeight, [Wheat.Grain.Wt] as GrainWeight,           [Wheat.Phenology.Zadok.Stage] as ZadokStage, [Wheat.WaterSupplyDemandRatio] as WaterSupplyDemandRatio,           [Wheat.Root.NUptake] as RootNUptake, [Wheat.Leaf.Fn] as LeafFn           FROM Report           ORDER BY SimulationID, runDate


In [None]:
print(datetime.datetime.now())
dfReport = pd.read_sql_query(strSql, con, \
                       index_col="SimulationID"
                      )
print(datetime.datetime.now())
print(dfReport.shape)
dfReport

2018-07-17 14:09:51.067330


In [None]:
# Need to re-format the (string) runDate column to a 'date' column
# and create the SimID column, both of which are used to merge with other dataframes 
dfReport['runDate'] = pd.to_datetime(dfReport['runDate'], format="%Y-%m-%d")
dfReport['SimID'] = dfReport.index
dfReport

### Lets retrieve the weather data and format it as required

In [None]:
print(dbfile_df.filename[0])

In [None]:
def get_weather_filename(sourceDir, dbfilename):
    '''
    takes in the sourceDirectory for the met files, and the SQLite database filename
    and returns the fullpath/filename of the met file
    '''
    
    #apsim_outfiledir 
    #K:\ApsimNG-test\APSIM_run\met
    filename = os.path.basename(dbfilename)
    filename = os.path.splitext(filename)[0]
    nameparts = filename.split('-')
    #cannot use the db filename as it doesn't have the long & lat that we require
    #need to manipulate the filename to add the underscrore '_' char
    filename = nameparts[0] + '_-' + nameparts[1]
    #filename = "115.20_-29.35"
    filename = sourceDir + "/c_" + filename + ".met"
    print(filename)
    #print("/OSM/CBR/AG_WHEATTEMP/work/ApsimNG-test/APSIM_run/met/")
    
    return filename

In [None]:
def compose_date(years, months=1, days=1, weeks=None, hours=None, minutes=None,
                 seconds=None, milliseconds=None, microseconds=None, nanoseconds=None):
    '''
    builds a date based on a year and day of year
    '''
    
    years = np.asarray(years) - 1970
    months = np.asarray(months) - 1
    days = np.asarray(days) - 1
    types = ('<M8[Y]', '<m8[M]', '<m8[D]', '<m8[W]', '<m8[h]',
             '<m8[m]', '<m8[s]', '<m8[ms]', '<m8[us]', '<m8[ns]')
    vals = (years, months, days, weeks, hours, minutes, seconds,
            milliseconds, microseconds, nanoseconds)
    
    return sum(np.asarray(v, dtype=t) for t, v in zip(types, vals)
               if v is not None)

    #df = pd.DataFrame({'doy': [49, 65, 81, 97, 113, 129, 145, 161],
    #               'year': [2000, 2000, 2000, 2001, 2001, 2001, 2001, 2001]})

In [18]:
def read_ApsimWeather(filename):
    '''
    Reads an apsim weather ('.met') file, removes the header information,
    calculates and adds a date column (based on year and day), and the
    average temperature (based on maxt and mint).
    '''

    lineNo = 0
    with open(filename, "r") as f:
        for line in f:
            lineNo = lineNo + 1
            if line.startswith('year'):
                break;

    # return the data using the starting line no (determined above)
    # original column names=['year','day', 'radn', 'maxt', 'mint', 'rain']
    metData = pd.read_table(filename, sep='\s+', header=None, skiprows=lineNo+1,
                            names=['year','dayofYear', 'radiation', 'maxTemp', 'minTemp', 'rain'])

    # add the calculated columns
    metData['runDate'] = pd.to_datetime(metData['year'].astype(str) + " " + metData['dayofYear'].astype(str),  format="%Y %j")

    # this may need to be the thermal time, not just average temp
    metData['avgTemp'] = (metData['maxTemp'] + metData['minTemp']) / 2

    # sort the columns to be a little more logical
    cols=['year', 'dayofYear', 'runDate', 'maxTemp', 'minTemp', 'avgTemp', 'rain', 'radiation']
    metData = metData[cols]

    return metData


In [19]:
filename = get_weather_filename(metfile_sourcedir, dbfile_df.filename[0])
metData = read_ApsimWeather(filename)
print(metData.shape)
metData


/OSM/CBR/AG_WHEATTEMP/work/ApsimNG-test/APSIM_run/met/c_113.60_-28.30.met
(43038, 8)


Unnamed: 0,year,dayofYear,runDate,maxTemp,minTemp,avgTemp,rain,radiation
0,1900,1,1900-01-01,31.80,13.40,22.60,0.00,24.30
1,1900,2,1900-01-02,31.80,13.40,22.60,0.00,24.30
2,1900,3,1900-01-03,31.80,13.40,22.60,0.00,24.30
3,1900,4,1900-01-04,31.80,13.40,22.60,0.00,24.30
4,1900,5,1900-01-05,31.80,13.40,22.60,0.00,24.30
5,1900,6,1900-01-06,31.80,13.40,22.60,0.00,24.30
6,1900,7,1900-01-07,31.80,13.40,22.60,0.00,24.30
7,1900,8,1900-01-08,31.80,13.40,22.60,0.00,24.30
8,1900,9,1900-01-09,31.80,13.40,22.60,0.00,24.30
9,1900,10,1900-01-10,31.80,13.40,22.60,0.00,24.30


### Now need to merge the report data with the weather data

In [20]:
#dfReport.runDate = pd.to_datetime(dfReport.runDate, format="%Y-%m-%d")
dfCombined = dfReport.merge(metData, on='runDate', how='left')
print(dfCombined.shape)
dfCombined

(46093698, 16)


Unnamed: 0,runDate,LeafLAI,AboveGroundWeight,GrainWeight,ZadokStage,WaterSupplyDemandRatio,RootNUptake,LeafFn,SimID,year,dayofYear,maxTemp,minTemp,avgTemp,rain,radiation
0,1900-01-01,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,1,31.80,13.40,22.60,0.00,24.30
1,1900-01-01,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,1,31.80,13.40,22.60,0.00,24.30
2,1900-01-02,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,2,31.80,13.40,22.60,0.00,24.30
3,1900-01-02,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,2,31.80,13.40,22.60,0.00,24.30
4,1900-01-03,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,3,31.80,13.40,22.60,0.00,24.30
5,1900-01-03,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,3,31.80,13.40,22.60,0.00,24.30
6,1900-01-04,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,4,31.80,13.40,22.60,0.00,24.30
7,1900-01-04,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,4,31.80,13.40,22.60,0.00,24.30
8,1900-01-05,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,5,31.80,13.40,22.60,0.00,24.30
9,1900-01-05,0.00,0.00,0.00,0.00,1.00,0.00,0.00,1,1900,5,31.80,13.40,22.60,0.00,24.30


In [21]:
#dfCombined = pd.DataFrame(dfCombined)
filterCols = ['SimID', 'runDate', 'ZadokStage', 'avgTemp']
subData = dfCombined[filterCols]
subData

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp
0,1,1900-01-01,0.00,22.60
1,1,1900-01-01,0.00,22.60
2,1,1900-01-02,0.00,22.60
3,1,1900-01-02,0.00,22.60
4,1,1900-01-03,0.00,22.60
5,1,1900-01-03,0.00,22.60
6,1,1900-01-04,0.00,22.60
7,1,1900-01-04,0.00,22.60
8,1,1900-01-05,0.00,22.60
9,1,1900-01-05,0.00,22.60


### Now add back the simulation details so that we can get sowdate for each simulation

In [22]:
#dfSim
subData = subData.merge(dfSim, on="SimID", how='left')
subData

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,Name,long,lat,variety,sowdate
0,1,1900-01-01,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
1,1,1900-01-01,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
2,1,1900-01-02,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
3,1,1900-01-02,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
4,1,1900-01-03,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
5,1,1900-01-03,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
6,1,1900-01-04,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
7,1,1900-01-04,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
8,1,1900-01-05,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul
9,1,1900-01-05,0.00,22.60,11360_-2830_young_29-jul,113.60,-28.30,young,29-jul


In [23]:
#get a subset of the data
filterCols = ['SimID', 'runDate', 'ZadokStage', 'avgTemp', 'sowdate']
subData = subData[filterCols]
subData

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,sowdate
0,1,1900-01-01,0.00,22.60,29-jul
1,1,1900-01-01,0.00,22.60,29-jul
2,1,1900-01-02,0.00,22.60,29-jul
3,1,1900-01-02,0.00,22.60,29-jul
4,1,1900-01-03,0.00,22.60,29-jul
5,1,1900-01-03,0.00,22.60,29-jul
6,1,1900-01-04,0.00,22.60,29-jul
7,1,1900-01-04,0.00,22.60,29-jul
8,1,1900-01-05,0.00,22.60,29-jul
9,1,1900-01-05,0.00,22.60,29-jul


In [24]:
#how to drop a single column
#subData.drop('date1', axis=1)

#not sure which is the quickest way to do this:
subData['sowingdate'] = subData['sowdate'] + '-' + subData['runDate'].dt.year.map(str)
subData['sowingdate'] = pd.to_datetime(subData['sowingdate'], format="%d-%b-%Y")
#or
#subData.runDate = pd.to_datetime(subData.runDate, format="%d/%m/%Y")
#subData['runDate'] = pd.to_datetime(subData['runDate']).apply(lambda x:x.strftime('%d/%m/%Y'))
#subData['sowingdate'] = pd.to_datetime(subData.sowdate.astype(str).str.cat(subData.runDate.dt.year.map(str), sep='-'), format="%d-%b-%Y")

#This is used if we want to format the date ddifferently
#subData['sowingdate'] = pd.to_datetime(subData['sowingdate']).apply(lambda x:x.strftime('%d/%m/%Y'))
subData


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,sowdate,sowingdate
0,1,1900-01-01,0.00,22.60,29-jul,1900-07-29
1,1,1900-01-01,0.00,22.60,29-jul,1900-07-29
2,1,1900-01-02,0.00,22.60,29-jul,1900-07-29
3,1,1900-01-02,0.00,22.60,29-jul,1900-07-29
4,1,1900-01-03,0.00,22.60,29-jul,1900-07-29
5,1,1900-01-03,0.00,22.60,29-jul,1900-07-29
6,1,1900-01-04,0.00,22.60,29-jul,1900-07-29
7,1,1900-01-04,0.00,22.60,29-jul,1900-07-29
8,1,1900-01-05,0.00,22.60,29-jul,1900-07-29
9,1,1900-01-05,0.00,22.60,29-jul,1900-07-29


### now calculate the cumulative temp info for each simulation

For each SimId, cumulative add the avgt (average Temperature) values between the sowdate and the current date (where the date is > sowdate and the ZadokStage < 70


In [25]:
# Create a copy of the current dataframe so that it can be reset easily
dfSub2 = subData.copy()
dfSub2

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,sowdate,sowingdate
0,1,1900-01-01,0.00,22.60,29-jul,1900-07-29
1,1,1900-01-01,0.00,22.60,29-jul,1900-07-29
2,1,1900-01-02,0.00,22.60,29-jul,1900-07-29
3,1,1900-01-02,0.00,22.60,29-jul,1900-07-29
4,1,1900-01-03,0.00,22.60,29-jul,1900-07-29
5,1,1900-01-03,0.00,22.60,29-jul,1900-07-29
6,1,1900-01-04,0.00,22.60,29-jul,1900-07-29
7,1,1900-01-04,0.00,22.60,29-jul,1900-07-29
8,1,1900-01-05,0.00,22.60,29-jul,1900-07-29
9,1,1900-01-05,0.00,22.60,29-jul,1900-07-29


In [None]:
#save this to a csv, so we can view elsewhere
#outCols = ['SimID', 'sowdate', 'sowingdate', 'runDate', 'ZadokStage', 'avgt']
#outSub = dfSub2[outCols]
#filename = apsim_outfiledir + "/sim57_zadok.csv"
#outSub.to_csv(filename, encoding='utf-8', index=False)

In [26]:
dfSub2['tempavgTemp'] = dfSub2['avgTemp'].where((dfSub2['runDate'] >= dfSub2['sowingdate']) & (dfSub2['ZadokStage'] > 0) & (dfSub2['ZadokStage'] <= 70), 0)
dfSub2['cumAvgTemp'] = dfSub2.groupby(by=['SimID','sowingdate'])['tempavgTemp'].cumsum()
dfSub2

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,sowdate,sowingdate,tempavgTemp,cumAvgTemp
0,1,1900-01-01,0.00,22.60,29-jul,1900-07-29,0.00,0.00
1,1,1900-01-01,0.00,22.60,29-jul,1900-07-29,0.00,0.00
2,1,1900-01-02,0.00,22.60,29-jul,1900-07-29,0.00,0.00
3,1,1900-01-02,0.00,22.60,29-jul,1900-07-29,0.00,0.00
4,1,1900-01-03,0.00,22.60,29-jul,1900-07-29,0.00,0.00
5,1,1900-01-03,0.00,22.60,29-jul,1900-07-29,0.00,0.00
6,1,1900-01-04,0.00,22.60,29-jul,1900-07-29,0.00,0.00
7,1,1900-01-04,0.00,22.60,29-jul,1900-07-29,0.00,0.00
8,1,1900-01-05,0.00,22.60,29-jul,1900-07-29,0.00,0.00
9,1,1900-01-05,0.00,22.60,29-jul,1900-07-29,0.00,0.00


In [27]:
newData = dfSub2[dfSub2['tempavgTemp'] > 0]
newData

Unnamed: 0,SimID,runDate,ZadokStage,avgTemp,sowdate,sowingdate,tempavgTemp,cumAvgTemp
418,1,1900-07-29,5.00,19.90,29-jul,1900-07-29,19.90,19.90
419,1,1900-07-29,5.00,19.90,29-jul,1900-07-29,19.90,39.80
420,1,1900-07-30,6.23,19.85,29-jul,1900-07-29,19.85,59.65
421,1,1900-07-30,6.23,19.85,29-jul,1900-07-29,19.85,79.50
422,1,1900-07-31,7.47,19.85,29-jul,1900-07-29,19.85,99.35
423,1,1900-07-31,7.47,19.85,29-jul,1900-07-29,19.85,119.20
424,1,1900-08-01,8.70,19.85,29-jul,1900-07-29,19.85,139.05
425,1,1900-08-01,8.70,19.85,29-jul,1900-07-29,19.85,158.90
426,1,1900-08-02,9.93,19.85,29-jul,1900-07-29,19.85,178.75
427,1,1900-08-02,9.93,19.85,29-jul,1900-07-29,19.85,198.60


In [None]:
#newData3 = newData.groupby(['SimID','sowingdate'])['cumAvgTemp'].max()
#newData3

In [28]:
newData4 = newData.groupby(['SimID','sowdate'])['cumAvgTemp'].max().reset_index()
newData4
#newData4['sowingdate'] = newData4.sowdate.dt.strftime('%d-%b')
#newData4 


Unnamed: 0,SimID,sowdate,cumAvgTemp
0,1,29-jul,2914.80
1,2,1-apr,1341.75
2,3,1-apr,1697.30
3,4,1-apr,1540.35
4,5,1-apr,1414.75
5,6,1-apr,1500.30
6,7,1-apr,1612.40
7,8,1-apr,1268.65
8,9,1-apr,1447.95
9,10,1-apr,1810.85


In [29]:
newData5 = newData4.groupby(['SimID','sowdate'])['cumAvgTemp'].mean().reset_index()
newData5

Unnamed: 0,SimID,sowdate,cumAvgTemp
0,1,29-jul,2914.80
1,2,1-apr,1341.75
2,3,1-apr,1697.30
3,4,1-apr,1540.35
4,5,1-apr,1414.75
5,6,1-apr,1500.30
6,7,1-apr,1612.40
7,8,1-apr,1268.65
8,9,1-apr,1447.95
9,10,1-apr,1810.85


In [32]:
newData6 = newData5.merge(dfSim, on=['SimID', 'sowdate'], how='left')
filterCols = ['SimID', 'long', 'lat', 'variety', 'sowdate', 'cumAvgTemp']
newData6 = newData6[filterCols]

In [None]:
outfilename = apsim_outfiledir + "/" + filename + "_zadok.csv"
newData5.to_csv(filename, encoding='utf-8', index=False)

In [None]:
#to append to the file, need to use mode='a'
#newData5.to_csv(filename, header=False, mode='a')

In [33]:
newData6

Unnamed: 0,SimID,long,lat,variety,sowdate,cumAvgTemp
0,1,113.60,-28.30,young,29-jul,2914.80
1,2,113.60,-28.30,agt_katana,1-apr,1341.75
2,3,113.60,-28.30,agt_scythe,1-apr,1697.30
3,4,113.60,-28.30,annuello,1-apr,1540.35
4,5,113.60,-28.30,aroona,1-apr,1414.75
5,6,113.60,-28.30,arrino,1-apr,1500.30
6,7,113.60,-28.30,attila,1-apr,1612.40
7,8,113.60,-28.30,axe,1-apr,1268.65
8,9,113.60,-28.30,banks,1-apr,1447.95
9,10,113.60,-28.30,batavia,1-apr,1810.85
