In [54]:
import sys
import pathlib
import os
#sys.path.append("C:\\Users\\mnowatz\\Documents\\Dev\\aepe")
#print(sys.path)
import psycopg2
import requests
import geopandas as gpd
import pandas as pd
import numpy as np
import database as db
import re
from xml.etree.ElementTree import ElementTree, Element, SubElement
import io
import json
import openpyxl
import csv
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles.alignment import Alignment
import apsim.apsim.wrapper as apsim
from apsim.apsim.daymet import Weather
from apsim.apsim_input_writer import get_date, add_management_year
from apsim import run_apsim
import traceback

In [14]:
dbconn = db.connect_to_db('database.ini')
raccoon_2018 = 'raccoon.raccoon_clu_ssurgo_2018'
#raccoon = pd.read_sql(f"SELECT * FROM {raccoon_2018} LIMIT 200;", dbconn)
#greene = pd.read_sql(f"SELECT * FROM {raccoon_2018} WHERE County='Greene';", dbconn)
#greene_soils = pd.read_sql(f"SELECT DISTINCT mukey FROM {raccoon_2018} where County='Greene';", dbconn)
#field_25 = pd.read_sql(f"SELECT * FROM {raccoon_2018} WHERE clukey=2515723;", dbconn)

In [15]:
spin_up_corn = json.loads( open( 'crop_jsons/maize.json', 'r' ).read() )
spin_up_soybean = json.loads( open( 'crop_jsons/soybean.json', 'r' ).read() )

### Step 1: Get the counties we are interested in.

In [None]:
'''
Returns a list of all unique entries in a column.

Args:
    dbconn {database connection} -- connection to postgresql database
    table {str} -- table name
    id_column (str) -- column of interest.
Returns:
    list of all unique entries in a table column
'''
def get_distinct(dbconn, table, id_column):
    entries = pd.read_sql(f'SELECT DISTINCT {id_column} FROM {table};', dbconn)
    entries = entries[id_column].tolist()
    return entries

In [None]:
#bv_query = 'SELECT * FROM raccoon.raccoon_clu_ssurgo_2018 WHERE fips = \'IA021\';'
'''Get info for a county of interest from a geopandas df
Args:
    dbconn {database connection} -- connection to postgresql database
    table {str} -- name of geopd table
    fips {str} -- fips id of the desired county eg. 'IA021'
    geom {str} -- column name that contains shape geometry
Returns:
    geopandas dataframe with county info
'''
def get_county(dbconn, table, fips, geom, limit=False, limit_num=100):
    #Get watershed as geopandas df
    if limit:
        query = f'SELECT * FROM {table} WHERE fips = \'{fips}\' LIMIT {limit_num};'
    else:
        query = f'SELECT * FROM {table} WHERE fips = \'{fips}\';'
    county_gpd = gpd.read_postgis(query, dbconn, geom_col=geom)
    return county_gpd

In [None]:
#buena_vista = get_county(dbconn, 'raccoon.raccoon_clu_ssurgo_2018', 'IA021', "wkb_geometry")

### Step 2: Get the county centroid for creating APSIM met files

In [None]:
'''
Find and return the centroid of a geopandas geometry

Args: 
    geodf {dataframe} -- geopandas dataframe
    id {string} -- the id of interest in the geodf (e.g., 'fips' for county column)
    geometry (string) - geopd column with geometries

Returns:
    {np.array} -- lat and longitude of geometry
'''
def get_centroid(geodf, id, geometry):
    #get the geometry of interest by id - 'fips' for a county
    geom = geodf[[id, geometry]]
    #dissolve geometries to make one big geometry
    dissolved_geom = geom.dissolve(by=id)
    #find the centroid of the dissolved geometry and return its long, lat
    centroid = dissolved_geom[geometry].centroid
    coords = np.vstack([centroid.x, centroid.y]).T
    #change to array and lat, long
    centroid_coords = np.array([coords[0][1], coords[0][0]])
    return centroid_coords

In [None]:
#bv_centroid = get_centroid(buena_vista, 'fips', "wkb_geometry")

In [None]:
'''
Get the weather for given centroid and write to a .met file

Args:
    lat {float} -- latitude of centroid
    long {float} -- longitude of centroid
    year_star {int} -- starting year of weather data
    year_end {int} -- ending year of weather data
    path {str} -- path to write the met files
    filename {str} -- name to give the .met file

Returns:
    None
'''
def create_met(lat, long, start_year, end_year, filename, path='apsim_files/met_files'):
    weather_obj = Weather().from_daymet(lat, long, start_year, end_year)
    weather_obj.write_met_file(f'{path}/{filename}.met')

In [None]:
# greene = get_county(dbconn, 'raccoon.raccoon_clu_ssurgo_2018', 'IA073', "wkb_geometry")
# greene_centroid = get_centroid(greene, 'fips', "wkb_geometry")
# create_met(greene_centroid[0], greene_centroid[1], 2012, 2019, 'greene')

In [16]:
def create_excel_met(lat, long, start_year, end_year, county_name):
    wth_obj = Weather().from_daymet(lat, long, 1980, 2019)
    wth_df = wth_obj.data
    tav = round(wth_df[ 'maxt' ].mean(), 1)
    amp = round(wth_df['maxt'].max(), 1)
    #greene_df.to_excel('greene.xlsx', index=False)
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.alignment = Alignment(horizontal="left")
    for r in dataframe_to_rows(wth_df, index=False, header=True):
        ws.append(r)
    for cells in ws.iter_rows():
        for cell in cells:
            cell.alignment = Alignment(horizontal="left")
    ws.insert_rows(2)
    ws['A2'] = '()'
    ws['B2'] = '()'
    ws['C2'] = '(MJ/m2)'
    ws['D2'] = '(oC)'
    ws['E2'] = '(oC)'
    ws['F2'] = '(mm)'
    ws['G2'] = '(mm)'
    ws['H2'] = '(kPa)'
    ws['I2'] = '(hours)'
    ws.insert_rows(1)
    ws['A1'] = '!Weather generated using C-CHANGE Foresite framework'
    ws.insert_rows(1)
    ws['A1'] = f'amp = {amp}'
    ws.insert_rows(1)
    ws['A1'] = f'tav = {tav}'
    ws.insert_rows(1)
    ws['A1'] = f'longitude = {long} (DECIMAL DEGREES)'
    ws.insert_rows(1)
    ws['A1'] = f'latitude = {lat} (DECIMAL DEGREES)'
    ws.insert_rows(1)
    ws['A1'] = 'stationname = Daymet weather'
    ws.insert_rows(1)
    ws['A1'] = '[weather.met.weather]'
    wb.save(f'apsim_files/{county_name}/met_files/{county_name}.xlsx')

In [None]:
"""
Create all met files for each county in a given geometry.

Args:
    dbconn {database connection} -- connection to postgresql database
    counties {array/list} -- every county to be run on in the geometry
    table {str} -- name of the table within the database to get geometries from
    id_column {str} -- table column that has unique ids (in this case fips) for each county.
    geo_col {str} -- table column that has geometry.
    name_col {str} -- column that has the name for each county - can just use fips columns if no county names in table.

Returns:
    Met file for each county in counties.
"""
def create_all_met(dbconn, counties, table, id_col='fips', geo_col='wkb_geometry', name_col='county'):
    for i in counties:
        county = get_county(dbconn, table, i, geo_col)
        county_name = county[name_col][0].replace(" ", "_")
        print(f'Geopandas table for {county_name} county created.')
        centroid = get_centroid(county, id_col, geo_col)
        print(f'Centroid located at {centroid}.')
        weather = create_excel_met(centroid[0], centroid[1], 1980, 2019, county_name)
        print(f"Met file for {county_name}/{i} at location {centroid} created.")

In [18]:
def create_all_excel_met(dbconn, counties, table, id_col='fips', geo_col='wkb_geometry', name_col='county'):
    for i in counties:
        county = get_county(dbconn, table, i, geo_col)
        county_name = county[name_col][0].replace(" ", "_")
        print(f'Geopandas table for {county_name} county created.')
        centroid = get_centroid(county, id_col, geo_col)
        print(f'Centroid located at {centroid}.')
        if not os.path.exists(f'apsim_files/{county_name}/met_files'):
            os.makedirs(f'apsim_files/{county_name}/met_files')
        create_excel_met(centroid[0], centroid[1], 1980, 2019, county_name)
        print(f"Met file for {county_name}/{i} at location {centroid} created.")

### Step 3: Get all distinct counties and create met files for all county geometry lat/lon

In [19]:
fips = get_distinct(dbconn, raccoon_2018, 'fips')
#create_all_met(dbconn, fips, raccoon_2018)
create_all_excel_met(dbconn, fips, raccoon_2018)

Geopandas table for Calhoun county created.
Centroid located at [ 42.38220278 -94.64373465].
Met file for Calhoun/IA025 at location [ 42.38220278 -94.64373465] created.
Geopandas table for Buena_Vista county created.
Centroid located at [ 42.72360279 -95.07483597].
Met file for Buena_Vista/IA021 at location [ 42.72360279 -95.07483597] created.
Geopandas table for Boone county created.
Centroid located at [ 41.90142452 -94.14423565].
Met file for Boone/IA015 at location [ 41.90142452 -94.14423565] created.
Geopandas table for Hamilton county created.
Centroid located at [ 42.40103778 -94.27051064].
Met file for Hamilton/IA079 at location [ 42.40103778 -94.27051064] created.
Geopandas table for Pocahontas county created.
Centroid located at [ 42.70625119 -94.81755961].
Met file for Pocahontas/IA151 at location [ 42.70625119 -94.81755961] created.
Geopandas table for Clay county created.
Centroid located at [ 42.9126454  -94.96637124].
Met file for Clay/IA041 at location [ 42.9126454  -94

### Step 4: Get clukey crop rotations

In [None]:
"""
Get the crop rotation for each clukey.

Args:
    df {obj} -- Dataframe that contains individual clukey information.
    crop_column {str} -- Column name that contains the label for what crop is growing for a given year.

Returns:
    Str of the rotation. e.g., 'cs' = corn-soy
"""
def get_rotation(df, crop_column):
    #save rotation for clukey to crops list
    crops = []
    for i in df.index:
        val = df.loc[i, crop_column]
        if val == 'Corn' or val == 'Soybean':
            crops.append(val)
        else:
            crops.append('other')
    #evaluate crops list and return a rotation
    if all(x in crops for x in ['Corn', 'Soybean']):
        rotation = 'cs'
    elif all(x in crops for x in ['Corn']):
        rotation = 'cc'
    else:
        rotation = 'other'
    return rotation

In [None]:
#rots = pd.read_sql('SELECT * FROM raccoon.raccoon_rots', dbconn)
rots = pd.read_sql('SELECT * FROM raccoon.raccoon_rots;', dbconn)
rots['rotation'] = ''

In [None]:
#group df by clukey, sort by year, then loop through and get rotation, appending to original df.
grouped = rots.groupby('clukey')
clukeys = rots.drop_duplicates('clukey')
for i in clukeys['clukey']:
    field = grouped.get_group(i).sort_values(by=['years'], ascending=True)
    rotation = get_rotation(field, 'crop')
    rots.loc[rots['clukey'] == i, 'rotation'] = rotation

### Step 5: Get the soil properties for each individual mukey

In [None]:
#field_25 = pd.read_sql(f"SELECT * FROM {raccoon_2018} WHERE clukey=2515723;", dbconn)
#greene = pd.read_sql(f"SELECT * FROM {raccoon_2018} WHERE County='Greene';", dbconn)
#greene_soils = pd.read_sql(f"SELECT DISTINCT mukey FROM {raccoon_2018} where County='Greene';", dbconn)

### Loop through all rows - don't run

In [None]:
#def create_input_table(dbconn, table, fip, start_year=2016, end_year=2018, id_col='fips', geo_col='wkb_geometry', name_col='county', soil_col="mukey", limit=False):
def create_apsim_files(df, rotations_df, dbconn, field_key='clukey', soil_key='mukey', county_col='county', rotation_col='rotation', crop_col='crop', start_year=2016, end_year=2018):
    if not os.path.exists('apsim_files'):
        os.makedirs('apsim_files')
    start_date = f'01/01/{start_year}'
    end_date = f'31/12/{end_year}'
    #save rotation for clukey to crops list
    #loop through field keys e.g., clukeys
    sim_count = 0
    for i in df[field_key]:
        field_id = i
        #get field information
        #TODO get 'clukey' and 'county' to work as function inputs instead of hardcoded
        field = df.loc[df['clukey'] == i]
        #get field rotation
        rotation_row = rotations_df.loc[rotations_df[field_key] == i]
        rotation = get_rotation(rotation_row, crop_col)
        #get unique soil keys e.g., mukeys
        soils = field.drop_duplicates(soil_key)
        runs = soils['mukey']
        #get weather file for desired county
        county_name = field.iloc[0]['county'].replace(" ", "_")
        met_name = f"{county_name}.met"
        met_path = f"met_files/{met_name}"
        #create apsim file for each unique soil in field
        for i in runs:
            try:
                soil_id = i
                soil_query = '''select * from api.get_soil_properties( array[{}]::text[] )'''.format( i )
                soil_df = pd.read_sql( soil_query, dbconn )
                if soil_df.empty:
                    continue
                #soil_row = soils_df.loc[soils_df[f'{soil_key}'] == i]
                #initialize .apsim xml
                apsim_xml = Element( 'folder' )
                apsim_xml.set( 'version', '36' )
                apsim_xml.set( 'creator', 'C-CHANGE Foresite' )
                apsim_xml.set( 'name', county_name )
                sim = SubElement( apsim_xml, 'simulation' )
                sim.set( 'name', f'{county_name} {field_id}' )
                
                #set met file
                metfile = SubElement( sim, 'metfile' )
                metfile.set( 'name', f'{county_name}' )
                filename = SubElement( metfile, 'filename' )
                filename.set( 'name', 'filename' )
                filename.set( 'input', 'yes' )
                filename.text = met_path

                #set clock
                clock = SubElement( sim, 'clock' )
                clock_start = SubElement( clock, 'start_date' )
                clock_start.set( 'type', 'date' )
                clock_start.set( 'description', 'Enter the start date of the simulation' )
                clock_start.text = start_date
                clock_end = SubElement( clock, 'end_date' )
                clock_end.set( 'type', 'date' )
                clock_end.set( 'description', 'Enter the end date of the simulation' )
                clock_end.text = end_date
                sumfile = SubElement( sim, 'summaryfile' )
                area = SubElement( sim, 'area' )
                area.set( 'name', 'paddock' )

                # add soil xml
                soil = apsim.Soil( soil_df, SWIM = False, SaxtonRawls = False )
                area.append( soil.soil_xml() )
                ### surface om
                surfom_xml = apsim.init_surfaceOM( 'maize', 'maize', 3500, 65, 0.0 )
                area.append( surfom_xml )
                ### fertilizer
                fert_xml = SubElement( area, 'fertiliser' )

                ### crops
                crop_xml = SubElement( area, 'maize' )
                crop_xml = SubElement( area, 'soybean' )
                crop_xml = SubElement( area, 'wheat' )

                ### output file
                outvars = [
                    'dd/mm/yyyy as Date', 'day', 'year',
                    'yield', 'biomass', 'fertiliser',
                    'surfaceom_c', 'subsurface_drain',
                    'subsurface_drain_no3', 'leach_no3',
                    'corn_buac', 'soy_buac' ]
                output_xml = apsim.set_output_variables( f'{county_name}_{field_id}_{soil_id}.out', outvars )
                area.append( output_xml )

                graph_no3 = [
                    'Cumulative subsurface_drain',
                    'Cumulative subsurface_drain_no3',
                    'Cumulative leach_no3'
                ]
                graph_yield = [
                    'yield',
                    'biomass',
                    'corn_buac'
                ]
                graph_all = [
                    'yield', 'biomass', 'fertiliser',
                    'surfaceom_c', 'Cumulative subsurface_drain',
                    'Cumulative subsurface_drain_no3',
                    'Cumulative leach_no3', 'corn_buac',
                    'soy_buac'
                ]

                output_xml.append( apsim.add_xy_graph( 'Date', graph_no3, 'no3' ) )
                output_xml.append( apsim.add_xy_graph( 'Date', graph_yield, 'yield' ) )
                output_xml.append( apsim.add_xy_graph( 'Date', graph_all, 'all outputs' ) )

                op_man = apsim.OpManager()
                op_man.add_empty_manager()
                if rotation == 'cs':
                    add_management_year(op_man, spin_up_corn, 2016)
                    add_management_year(op_man, spin_up_soybean, 2017)
                    add_management_year(op_man, spin_up_corn, 2018)
                elif rotation == 'cc':
                    add_management_year(op_man, spin_up_corn, 2018)
                    add_management_year(op_man, spin_up_corn, 2016)
                    add_management_year(op_man, spin_up_corn, 2017)
                else:
                    continue
                area.append( op_man.man_xml )
                outfile = f'apsim_files/{county_name}_{field_id}_{soil_id}.apsim'
                ### management data
                tree = ElementTree()
                tree._setroot( apsim_xml )
                tree.write( outfile )
                sim_count += 1
                if (sim_count % 5 == 0):
                    print(f'Finished with {sim_count} files.')
            except:
                print(f'File creation failed for APSIM run {sim_count}')
                sim_count +=1
                continue

### Just loop through unique soils for a given county

In [102]:
greene_soils = pd.read_sql(f"SELECT DISTINCT mukey FROM {raccoon_2018} where County='Greene';", dbconn)
greene_soils_list = list(greene_soils['mukey'][0:5])
#greene_test = greene_soils_list[0:4]

In [104]:
greene_soils_list

['703831', '403511', '406211', '403455', '406372']

In [106]:
def county_mukey_runs(soils_list, dbconn, rotation, county_name, fips, start_year=2016, end_year=2018):
    if not os.path.exists(f'apsim_files/{county_name}'):
        os.makedirs(f'apsim_files/{county_name}')
    start_date = f'01/01/{start_year}'
    end_date = f'31/12/{end_year}'
    #save rotation for clukey to crops list
    #loop through field keys e.g., clukeys
    sim_count = 0
    met_name = f"{county_name}.met"
    met_path = f"met_files/{met_name}"
    for i in soils_list:
        try:
            soil_id = i
            soil_query = '''select * from api.get_soil_properties( array[{}]::text[] )'''.format( i )
            soil_df = pd.read_sql( soil_query, dbconn )
            if soil_df.empty:
                continue
            #soil_row = soils_df.loc[soils_df[f'{soil_key}'] == i]
            #initialize .apsim xml
            apsim_xml = Element( 'folder' )
            apsim_xml.set( 'version', '36' )
            apsim_xml.set( 'creator', 'C-CHANGE Foresite' )
            apsim_xml.set( 'name', county_name )
            sim = SubElement( apsim_xml, 'simulation' )
            sim.set( 'name', f'County_{county_name}_{fips}_mukey_{soil_id}_rot_{rotation}' )
            
            #set met file
            metfile = SubElement( sim, 'metfile' )
            metfile.set( 'name', f'{county_name}' )
            filename = SubElement( metfile, 'filename' )
            filename.set( 'name', 'filename' )
            filename.set( 'input', 'yes' )
            filename.text = met_path

            #set clock
            clock = SubElement( sim, 'clock' )
            clock_start = SubElement( clock, 'start_date' )
            clock_start.set( 'type', 'date' )
            clock_start.set( 'description', 'Enter the start date of the simulation' )
            clock_start.text = start_date
            clock_end = SubElement( clock, 'end_date' )
            clock_end.set( 'type', 'date' )
            clock_end.set( 'description', 'Enter the end date of the simulation' )
            clock_end.text = end_date
            sumfile = SubElement( sim, 'summaryfile' )
            area = SubElement( sim, 'area' )
            area.set( 'name', 'paddock' )

            # add soil xml
            soil = apsim.Soil( soil_df, SWIM = False, SaxtonRawls = False )
            area.append( soil.soil_xml() )
            ### surface om
            surfom_xml = apsim.init_surfaceOM( 'maize', 'maize', 3500, 65, 0.0 )
            area.append( surfom_xml )
            ### fertilizer
            fert_xml = SubElement( area, 'fertiliser' )

            ### crops
            crop_xml = SubElement( area, 'maize' )
            crop_xml = SubElement( area, 'soybean' )
            #crop_xml = SubElement( area, 'wheat' )

            ### output file
            outvars = [
                'dd/mm/yyyy as Date', 'day', 'year',
                'yield', 'biomass', 'fertiliser',
                'surfaceom_c', 'subsurface_drain',
                'subsurface_drain_no3', 'leach_no3',
                'corn_buac', 'soy_buac' ]
            output_xml = apsim.set_output_variables( f'{county_name}_{fips}_mukey_{soil_id}.out', outvars )
            area.append( output_xml )
            graph_no3 = [
                'Cumulative subsurface_drain',
                'Cumulative subsurface_drain_no3',
                'Cumulative leach_no3'
            ]
            graph_yield = [
                'yield',
                'biomass',
                'corn_buac'
            ]
            graph_all = [
                'yield', 'biomass', 'fertiliser',
                'surfaceom_c', 'Cumulative subsurface_drain',
                'Cumulative subsurface_drain_no3',
                'Cumulative leach_no3', 'corn_buac',
                'soy_buac'
            ]

            output_xml.append( apsim.add_xy_graph( 'Date', graph_no3, 'no3' ) )
            output_xml.append( apsim.add_xy_graph( 'Date', graph_yield, 'yield' ) )
            output_xml.append( apsim.add_xy_graph( 'Date', graph_all, 'all outputs' ) )

            op_man = apsim.OpManager()
            op_man.add_empty_manager()
            if rotation == 'cfs':
                add_management_year(op_man, spin_up_corn, 2016)
                add_management_year(op_man, spin_up_soybean, 2017)
                add_management_year(op_man, spin_up_corn, 2018)
            elif rotation == 'sfc':
                add_management_year(op_man, spin_up_soybean, 2016)
                add_management_year(op_man, spin_up_corn, 2017)
                add_management_year(op_man, spin_up_soybean, 2018)
            else:
                add_management_year(op_man, spin_up_corn, 2016)
                add_management_year(op_man, spin_up_corn, 2017)
                add_management_year(op_man, spin_up_corn, 2018)
            
            area.append( op_man.man_xml )
            outfile = f'apsim_files/{county_name}/{county_name}_{soil_id}_{rotation}.apsim'
            ### management data
            tree = ElementTree()
            tree._setroot( apsim_xml )
            tree.write( outfile )
            sim_count += 1
            if (sim_count % 5 == 0):
                print(f'Finished with {sim_count} files.')
        except:
            print(f'File creation failed for APSIM run {sim_count} mukey {soil_id}')
            sim_count +=1
            continue

In [107]:
county_mukey_runs(greene_soils_list, dbconn, 'cfs', 'Greene', 'IA073')

In [156]:
greene_403 = pd.read_csv( 'C:\\Users\\mnowatz\\Documents\\Dev\\aepe\\analyses\\apsim_files\\Greene\\County_Greene_IA073_mukey_403455_rot_cfs.out', header = 3, delim_whitespace = True )

In [157]:
greene_403

Unnamed: 0,Date,day,year,paddock.soybean.yield,paddock.wheat.yield,paddock.maize.yield,paddock.soybean.biomass,paddock.wheat.biomass,paddock.maize.biomass,fertiliser,surfaceom_c,subsurface_drain,subsurface_drain_no3,leach_no3,corn_buac,soy_buac
0,(dd/mm/yyyy),(day),(year),(kg/ha),(kg/ha),(kg/ha),(kg/ha),(kg/ha),(kg/ha),(kg/ha),(kg/ha),(),(),(kg/ha),(),()
1,01/01/2016,1,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
2,02/01/2016,2,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
3,03/01/2016,3,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
4,04/01/2016,4,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,27/12/2018,361,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.41943,?,?,0.25412,0.00000,43.90780
1093,28/12/2018,362,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.25424,0.00000,43.90780
1094,29/12/2018,363,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.24779,0.00000,43.90780
1095,30/12/2018,364,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.23665,0.00000,43.90780


In [158]:
greene_403_header = str(pd.read_csv('C:\\Users\\mnowatz\\Documents\\Dev\\aepe\\analyses\\apsim_files\\Greene\\County_Greene_IA073_mukey_403455_rot_cfs.out', header=2, nrows=0).columns.values)

In [159]:
greene_403_header

"['Title = County_Greene_IA073_mukey_403455_rot_cfs']"

In [160]:
mukey_pattern = "mukey_(.*?)_rot"
fips_pattern = "Greene_(.*?)_mukey"

In [161]:
mukey = int(re.search(mukey_pattern, greene_403_header).group(1))
fips = re.search(fips_pattern, greene_403_header).group(1)

In [162]:
mukey

403455

In [163]:
fips

'IA073'

In [164]:
greene_403.insert(0, 'fips', fips)
greene_403.insert(1, 'mukey', mukey)

In [165]:
greene_403 = greene_403.reset_index(drop=True)

In [166]:
greene_403 = greene_403.drop([0])

In [167]:
greene_403

Unnamed: 0,fips,mukey,Date,day,year,paddock.soybean.yield,paddock.wheat.yield,paddock.maize.yield,paddock.soybean.biomass,paddock.wheat.biomass,paddock.maize.biomass,fertiliser,surfaceom_c,subsurface_drain,subsurface_drain_no3,leach_no3,corn_buac,soy_buac
1,IA073,403455,01/01/2016,1,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
2,IA073,403455,02/01/2016,2,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
3,IA073,403455,03/01/2016,3,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
4,IA073,403455,04/01/2016,4,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
5,IA073,403455,05/01/2016,5,2016,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,0.00000,1400.00000,?,?,0.00000,0.00000,0.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1092,IA073,403455,27/12/2018,361,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.41943,?,?,0.25412,0.00000,43.90780
1093,IA073,403455,28/12/2018,362,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.25424,0.00000,43.90780
1094,IA073,403455,29/12/2018,363,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.24779,0.00000,43.90780
1095,IA073,403455,30/12/2018,364,2018,2563.74414,0.00000,0.00000,5954.30420,0.00000,0.00000,0.00000,2555.37598,?,?,0.23665,0.00000,43.90780


In [154]:
greene_403 = greene_403.astype( {
            'fips' : 'string',
            'mukey' : 'int64',
            'date' : 'datetime64',
            'day': 'int64',
            'year': 'int64',
            'yield': 'float64',
            'biomass': 'float64',
            'fertiliser': 'float64',
            #'n2o_atm': 'float64',
            'surfaceom_c': 'float64',
            'subsurface_drain': 'float64',
            'subsurface_drain_no3': 'float64',
            'leach_no3': 'float64',
            'corn_buac' : 'float64',
            'soy_buac' : 'float64'
        } )

KeyError: 'Only a column name can be used for the key in a dtype mappings argument.'

In [None]:
daily_df = pd.read_csv( file, header = [3,4], delim_whitespace = True )
df_header = str(pd.read_csv(file, header=2, nrows=0).columns.values)
mukey_pattern = "mukey_(.*?)_rot"
mukey = int(re.search(mukey_pattern, df_header).group(1))
fips_pattern = "_(.*?)_mukey"
fips_str = re.search(fips_pattern, df_header).group(1)
daily_df.insert(1, 'fips', fips_str)
daily_df.insert(2, 'mukey', mukey)
daily_df = daily_df.reset_index( drop = True )