### Notebook for analyzing BioCentury Research Farm Accola field site - author @ Matt Nowatzke

In [8]:
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import database as db
import gdal
import rasterio as rio
from rasterio.plot import plotting_extent
import analyses.run_apsim
import earthpy as et
import earthpy.plot as ep
import rasterstats as rs
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt
from rasterstats import zonal_stats
from analyses.munging import get_centroid
from apsim.daymet import create_excel_met
from apsim.apsim_input_writer import create_mukey_runs
from apsim.apsim_output_parser import parse_all_output_field, parse_summary_output_field
import matplotlib.pyplot as plt

### Load management data for 2018 soybeans following corn and 2019 corn following soybeans.

In [2]:
#Load the management data
accola_soy_mgmt_2018 = json.loads( open( 'crop_jsons/accola_sfc_2018.json', 'r' ).read() )
accola_corn_mgmt_2019 = json.loads( open( 'crop_jsons/accola_cfs_2019.json', 'r' ).read() )

Create database connection and get a list of all the unique soils (mukeys) for the field.

In [3]:
dbconn = db.connect_to_db('database.ini')

In [4]:
accola_soils = gpd.read_postgis('SELECT * FROM biocent_farms.accola_ssurgo;', dbconn, geom_col='wkb_geometry')
accola_mukeys = list(np.unique(accola_soils['mukey']))
#accola_soils = pd.read_sql('SELECT * FROM biocent_farms.accola_ssurgo;', dbconn)
accola_mukeys

['2765537', '2800480', '2835021', '2835194', '2922031', '411333']

### Get the centroid of the field and create met file.
This met file--if created on a Windows machine--is a an Excel file that should be saved afterwards as a Formatted Text file (*.prn)  
See: https://www.apsim.info/support/apsim-training-manuals/creating-an-apsim-met-file-using-excel/  
This is a known problem with Pandas on Windows. If creating the met file on Mac or Linux, feel free to use the daymet.create_met function instead.

In [None]:
#accola_centroid = get_centroid(accola_soils, 'areasymbol', 'wkb_geometry')
#create_excel_met(accola_centroid[0], accola_centroid[1], 2012, 2019, 'accola')

Create runs using the default soil calcs or Saxton Rawls

In [5]:
create_mukey_runs(accola_mukeys, dbconn, 'cfs', 'accola.met', 'AccolaDefaultOld', start_year=2017, end_year=2019, sfc_mgmt=accola_soy_mgmt_2018, cfs_mgmt=accola_corn_mgmt_2019)
create_mukey_runs(accola_mukeys, dbconn, 'cfs', 'accola.met', 'AccolaSaxtonOld', start_year=2017, end_year=2019, sfc_mgmt=accola_soy_mgmt_2018, cfs_mgmt=accola_corn_mgmt_2019, saxton=True)

Finished! All files created!
Finished! All files created!


Run simulations

In [6]:
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaSaxton\\*.apsim", sim_files_path="apsim_files\\AccolaSaxton\\*.sim")
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaSaxtonOld\\*.apsim", sim_files_path="apsim_files\\AccolaSaxtonOld\\*.sim")

Running on 62 cores
Converting 6 .apsim files to .sim files.
Running Apsim for 6 .sim files...
Runs completed.
Processing time: 2.1175 seconds
Running on 62 cores
Converting 6 .apsim files to .sim files.
Running Apsim for 6 .sim files...
Runs completed.
Processing time: 2.1165 seconds


In [7]:
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaDefault\\*.apsim", sim_files_path="apsim_files\\AccolaDefault\\*.sim")
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaDefaultOld\\*.apsim", sim_files_path="apsim_files\\AccolaDefaultOld\\*.sim")

Running on 62 cores
Converting 6 .apsim files to .sim files.
Running Apsim for 6 .sim files...
Runs completed.
Processing time: 2.1084 seconds
Running on 62 cores
Converting 6 .apsim files to .sim files.
Running Apsim for 6 .sim files...
Runs completed.
Processing time: 2.1173 seconds


In [8]:
accola_maize_output_2019_saxton = parse_summary_output_field("apsim_files/AccolaSaxton/", year=2019)
accola_maize_output_2019_saxton_old = parse_summary_output_field("apsim_files/AccolaSaxtonOld/", year=2019)

In [9]:
accola_maize_output_2019_default = parse_summary_output_field("apsim_files/AccolaDefault/", year=2019)
accola_maize_output_2019_default_old = parse_summary_output_field("apsim_files/AccolaDefaultOld/", year=2019)

In [None]:
# accola_maize_output_2019_saxton.to_csv('accola_saxton_maize_2019.csv', index=False)
# accola_maize_output_2019_default.to_csv('accola_default_maize_2019.csv', index=False)

In [10]:
accola_apsim_yield_2019_saxton = accola_maize_output_2019_saxton[['mukey', 'corn_buac']]
accola_apsim_yield_2019_saxton

Unnamed: 0,mukey,corn_buac
0,2765537,199.89569
1,2800480,201.67325
2,2835021,205.76308
3,2835194,207.9527
4,2922031,203.81483
5,411333,210.01071


In [11]:
accola_apsim_yield_2019_saxton_old = accola_maize_output_2019_saxton_old[['mukey', 'corn_buac']]
accola_apsim_yield_2019_saxton_old

Unnamed: 0,mukey,corn_buac
0,2765537,144.91823
1,2800480,134.41357
2,2835021,176.60591
3,2835194,188.40086
4,2922031,163.13495
5,411333,184.84686


In [12]:
accola_apsim_yield_2019_default = accola_maize_output_2019_default[['mukey', 'corn_buac']]
accola_apsim_yield_2019_default

Unnamed: 0,mukey,corn_buac
0,2765537,206.29016
1,2800480,205.90283
2,2835021,205.93431
3,2835194,207.34471
4,2922031,205.66007
5,411333,200.76477


In [13]:
accola_apsim_yield_2019_default_old = accola_maize_output_2019_default_old[['mukey', 'corn_buac']]
accola_apsim_yield_2019_default_old

Unnamed: 0,mukey,corn_buac
0,2765537,173.01106
1,2800480,145.71753
2,2835021,169.79439
3,2835194,192.79851
4,2922031,164.62152
5,411333,147.62572


In [None]:
#accola_yield = gpd.read_file("yield_monitor/Accola2019YieldwGeom.dbf")
#accola_yield_clean = accola_yield[['Yld_Vol_Dr','geometry']]
#accola_yield_clean['Yld_Vol_Dr'].describe()

In [None]:
#YM data with turn rows
#accola_ssurgo_geojson = 'C:/Users/mjn/Documents/Foresite/yield_monitor_data/analyses/accola/accola_ssurgo.geojson'
#accola_raster_yield = "C:\\Users\\mjn\\Documents\\Foresite\\yield_monitor_data\\analyses\\accola\\accola_maize_2019_raster_30.tif"

#YM data without turn rows
accola_ssurgo_geojson = "C:\\Users\\mjn\\Documents\\Foresite\\yield_monitor_data\\analyses\\accola\\ssurgo\\accola_ssurgo_noturnrows.geojson"
accola_raster_yield = "C:\\Users\\mjn\\Documents\\Foresite\\yield_monitor_data\\analyses\\accola\\yield_monitor\\no_turnrow\\accola_maize_noturn_ym_raster_2019.tif"

In [None]:
stats = zonal_stats(accola_ssurgo_geojson, accola_raster_yield, geojson_out=True, stats=['min', 'max', 'median', 'mean', 'std', 'range'])

In [None]:
accola_maize_2019 = gpd.GeoDataFrame.from_features(stats)

In [None]:
accola_maize_2019['Field'] = 'Accola'

In [None]:
accola_maize_2019 = accola_maize_2019[['geometry', 'Field', 'objectid', 'areasymbol', 'musym', 'mukey', 'shape_area', 'mean', 'median', 'min', 'max', 'std', 'range']]

In [None]:
accola_maize_2019 = accola_maize_2019.round({'mean':2,'median':2, 'min':2, 'max':2, 'std':2, 'range':2})

In [None]:
accola_maize_2019.to_file('accola_maize_2019_stats.geojson', driver='GeoJSON')

In [None]:
accola_maize_2019 = gpd.read_file("accola_maize_2019_stats.geojson")

In [None]:
def wkb_hexer(line):
    return line.wkb_hex

In [None]:
accola_maize_2019['geometry'] = accola_maize_2019['geometry'].apply(wkb_hexer)

In [None]:
accola_maize_df = pd.DataFrame(accola_maize_2019)

In [None]:
db_schema = 'biocent_farms'
db_table = 'accola_maize_2019_yield_zonal_stats' 
accola_maize_df.to_sql(
con = dbconn,
name = db_table,
schema = db_schema,
if_exists = 'replace',
index = False,
chunksize=1000,
method='multi' )

In [None]:
accola_maize_2019 = accola_maize_2019[['objectid', 'mukey', 'mean']]
accola_maize_2019.sort_values('mukey')

In [None]:
with rio.open(accola_raster_yield) as src:
    accola_raster_array = src.read(1, masked=True)
accola_raster_array = accola_raster_array.ravel()

In [None]:
plt.hist(accola_raster_array,
        color='purple')
plt.xlabel('Maize yield')
plt.ylabel('Frequency')
plt.title('Accola 2019 Maize Yield')
plt.show()

![SSURGO soils with raster yield overlay](images/accola_ssurgo_rasteryield_2019.png "Accola SSURGO soils with yield overlay")  

In [None]:
pred_vs_obs_def = accola_maize_2019.merge(accola_apsim_yield_2019_default, on='mukey', how='left')
pred_vs_obs_def.columns = ['objectid', 'mukey', 'observed', 'predicted']
pred_vs_obs_def['resid'] = pred_vs_obs_def['predicted'] - pred_vs_obs_def['observed']
pred_vs_obs_def.sort_values('mukey')

In [None]:
yield_monitor = tuple(pred_vs_obs_def['observed'])
yield_monitor = [round(num, 2) for num in yield_monitor]
apsim_predicted_def = tuple(pred_vs_obs_def['predicted'])
apsim_predicted_def = [round(num, 2) for num in apsim_predicted_def]

In [None]:
fig, ax = plt.subplots()
ax.scatter(yield_monitor, apsim_predicted_def)
ax.plot([100,250],[100,250])
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
rmse = mean_squared_error(yield_monitor, apsim_predicted_def, squared=False)
rmse

In [None]:
r_sq = r2_score(yield_monitor, apsim_predicted_def)
r_sq

In [None]:
corr_matrix = np.corrcoef(yield_monitor, apsim_predicted_def)
corr_xy = corr_matrix[0,1]
r_squared = corr_xy**2
r_squared

In [None]:
corr_matrix

In [None]:
#gdal.Grid('yield_avg.tif', 'accola_yield_basic.csv')

In [None]:
def reproject(vector_gpd, raster):
    proj = raster.crs.to_epsg()
    print("Original vector layer projection: ", vector_gpd.crs)
    reproj = vector_gpd.to_crs(proj)
    print("New vector layer projection (PROJ4): ", reproj.crs)
    return reproj

In [None]:
yield_with_dem_path = "C:\\Users\\mjn\\Documents\\Foresite\\yield_monitor_data\\analyses\\accola\\accola_maize_2019_yield_w_elev_noturnrows.geojson"

In [None]:
yld_with_dem = gpd.read_file(yield_with_dem_path)

In [None]:
yld_with_dem.head()

In [None]:
yld_with_dem = yld_with_dem.round({'Yld_Vol_Dr':2})
yld_with_dem = yld_with_dem.astype({'Obj__Id':'int32'})

In [None]:
yld_with_dem.head()

In [None]:
fig, ax = plt.subplots()
ax.scatter(yld_with_dem['Yld_Vol_Dr'], yld_with_dem['rvalue_1'], alpha=0.2)
ax.set_xlabel('Yield Monitor (bu/ac')
ax.set_ylabel('DEM (elevation)')
plt.show()

In [None]:
len(yld_with_dem['Yld_Vol_Dr'])

In [None]:
dem_pred = tuple(yld_with_dem['rvalue_1'])
yld_obsv = tuple(yld_with_dem['Yld_Vol_Dr'])

In [None]:
corr_matrix = np.corrcoef(yld_obsv, dem_pred)
corr_xy = corr_matrix[0,1]
corr_xy

In [None]:
yield_with_ym_elev_path = "C:\\Users\\mjn\\Documents\\Foresite\\yield_monitor_data\\analyses\\accola\\accola_maize_2019_noturn_ym_elev.geojson"
yield_with_ym_elev_path = gpd.read_file(yield_with_ym_elev_path)

In [None]:
yield_with_ym_elev_path.head()

In [None]:
yield_with_ym_elev_path = yield_with_ym_elev_path.round({'Yld_Vol_Dr':2})
yield_with_ym_elev_path = yield_with_ym_elev_path.astype({'Obj__Id':'int32'})

In [None]:
yield_with_ym_elev_path.head()

In [None]:
fig, ax = plt.subplots()
ax.scatter(yield_with_ym_elev_path['Yld_Vol_Dr'], yield_with_ym_elev_path['Elevation_'], alpha=0.2)
ax.set_xlabel('Yield Monitor (bu/ac')
ax.set_ylabel('Yield Monitor Elevation')
plt.show()

In [None]:
ym_elev = tuple(yield_with_ym_elev_path['Elevation_'])
yld_obsv = tuple(yield_with_ym_elev_path['Yld_Vol_Dr'])

In [None]:
corr_matrix = np.corrcoef(yld_obsv, ym_elev)
corr_xy = corr_matrix[0,1]
corr_xy