### Notebook for analyzing BioCentury Research Farm Accola field site - author @ Matt Nowatzke

In [1]:
import json
import pandas as pd
import geopandas as gpd
import numpy as np
import database as db
import gdal
import rasterio
import run_apsim
from sklearn.metrics import mean_squared_error, r2_score
from math import sqrt
from rasterstats import zonal_stats
from munging import get_centroid
from apsim.daymet import create_excel_met
from apsim.apsim_input_writer import create_mukey_runs
from apsim.apsim_output_parser import parse_all_output_field, parse_summary_output_field
import matplotlib.pyplot as plt

### Load management data for 2018 soybeans following corn and 2019 corn following soybeans.

In [3]:
#Load the management data
accola_soy_mgmt_2018 = json.loads( open( 'crop_jsons/accola_sfc_2018.json', 'r' ).read() )
accola_corn_mgmt_2019 = json.loads( open( 'crop_jsons/accola_cfs_2019.json', 'r' ).read() )

Create database connection and get a list of all the unique soils (mukeys) for the field.

In [2]:
dbconn = db.connect_to_db('database.ini')

In [9]:
accola_soils = gpd.read_postgis('SELECT * FROM biocent_farms.accola_ssurgo;', dbconn, geom_col='wkb_geometry')
accola_mukeys = list(np.unique(accola_soils['mukey']))
#accola_soils = pd.read_sql('SELECT * FROM biocent_farms.accola_ssurgo;', dbconn)

### Get the centroid of the field and create met file.
This met file--if created on a Windows machine--is a an Excel file that should be saved afterwards as a Formatted Text file (*.prn)  
See: https://www.apsim.info/support/apsim-training-manuals/creating-an-apsim-met-file-using-excel/  
This is a known problem with Pandas on Windows. If creating the met file on Mac or Linux, feel free to use the daymet.create_met function instead.

In [None]:
accola_centroid = get_centroid(accola_soils, 'areasymbol', 'wkb_geometry')
create_excel_met(accola_centroid[0], accola_centroid[1], 2012, 2019, 'accola')

Create runs using the default soil calcs or Saxton Rawls

In [32]:
create_mukey_runs(accola_mukeys, dbconn, 'cfs', 'accola.met', 'AccolaDefault', start_year=2017, end_year=2019, sfc_mgmt=accola_soy_mgmt_2018, cfs_mgmt=accola_corn_mgmt_2019)
create_mukey_runs(accola_mukeys, dbconn, 'cfs', 'accola.met', 'AccolaSaxton', start_year=2017, end_year=2019, sfc_mgmt=accola_soy_mgmt_2018, cfs_mgmt=accola_corn_mgmt_2019, saxton=True)

Finished! All files created!
Finished! All files created!


In [33]:
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaSaxton\\*.apsim", sim_files_path="apsim_files\\AccolaSaxton\\*.sim")

100%|██████████| 6/6 [00:00<?, ?it/s]Running on 62 cores
Converting 6 .apsim files to .sim files.

100%|██████████| 6/6 [00:00<?, ?it/s]Running Apsim for 6 .sim files...

Runs completed.


In [34]:
accola_maize_output_2019_saxton = parse_summary_output_field("apsim_files/AccolaSaxton/", year=2019)

In [35]:
accola_maize_output_2019_saxton

Unnamed: 0,title,field,mukey,rotation,year,soybean_yield,maize_yield,corn_buac,soy_buac,soybean_biomass,maize_biomass,fertiliser,surfaceom_c_init,surfaceom_c_end,leach_no3
0,name_AccolaSaxton_mukey_2765537_rot_cfs_sim,AccolaSaxton,2765537,cfs,2019,0.0,7970.371,149.09282,0.0,0.0,14946.46419,168.0,1693.12085,2609.60669,44.23188
1,name_AccolaSaxton_mukey_2800480_rot_cfs_sim,AccolaSaxton,2800480,cfs,2019,0.0,7483.47698,139.98505,0.0,0.0,14401.70423,168.0,1611.66638,2570.8523,49.77821
2,name_AccolaSaxton_mukey_2835021_rot_cfs_sim,AccolaSaxton,2835021,cfs,2019,0.0,9605.81717,179.68529,0.0,0.0,16246.80922,168.0,1990.76782,2476.79419,50.67606
3,name_AccolaSaxton_mukey_2835194_rot_cfs_sim,AccolaSaxton,2835194,cfs,2019,0.0,10369.68797,193.97417,0.0,0.0,17463.05681,168.0,2086.23584,2655.04541,48.45929
4,name_AccolaSaxton_mukey_2922031_rot_cfs_sim,AccolaSaxton,2922031,cfs,2019,0.0,8928.34008,167.01248,0.0,0.0,15495.66876,168.0,1853.86206,2440.91382,48.31842
5,name_AccolaSaxton_mukey_411333_rot_cfs_sim,AccolaSaxton,411333,cfs,2019,0.0,10178.22451,190.39267,0.0,0.0,17405.44352,168.0,2061.10571,2709.40625,96.28025


In [36]:
accola_apsim_yield_2019_saxton = accola_maize_output_2019_saxton[['mukey', 'corn_buac']]
accola_apsim_yield_2019_saxton

Unnamed: 0,mukey,corn_buac
0,2765537,149.09282
1,2800480,139.98505
2,2835021,179.68529
3,2835194,193.97417
4,2922031,167.01248
5,411333,190.39267


In [37]:
run_apsim.run_all_simulations(apsim_files_path="apsim_files\\AccolaDefault\\*.apsim", sim_files_path="apsim_files\\AccolaDefault\\*.sim")

100%|██████████| 6/6 [00:00<?, ?it/s]Running on 62 cores
Converting 6 .apsim files to .sim files.

100%|██████████| 6/6 [00:00<?, ?it/s]Running Apsim for 6 .sim files...

Runs completed.


In [38]:
accola_maize_output_2019_default = parse_summary_output_field("apsim_files/AccolaDefault/", year=2019)

In [39]:
accola_maize_output_2019_default

Unnamed: 0,title,field,mukey,rotation,year,soybean_yield,maize_yield,corn_buac,soy_buac,soybean_biomass,maize_biomass,fertiliser,surfaceom_c_init,surfaceom_c_end,leach_no3
0,name_AccolaDefault_mukey_2765537_rot_cfs_sim,AccolaDefault,2765537,cfs,2019,0.0,9442.21313,176.62492,0.0,0.0,16316.17532,168.0,2007.14356,2578.01001,23.30919
1,name_AccolaDefault_mukey_2800480_rot_cfs_sim,AccolaDefault,2800480,cfs,2019,0.0,7941.15972,148.5464,0.0,0.0,15120.6134,168.0,1681.04028,2693.2898,36.83828
2,name_AccolaDefault_mukey_2835021_rot_cfs_sim,AccolaDefault,2835021,cfs,2019,0.0,9246.75745,172.96875,0.0,0.0,15953.10282,168.0,1926.73731,2504.61475,41.74329
3,name_AccolaDefault_mukey_2835194_rot_cfs_sim,AccolaDefault,2835194,cfs,2019,0.0,10588.41608,198.06566,0.0,0.0,17934.27387,168.0,2137.77148,2764.91382,35.02096
4,name_AccolaDefault_mukey_2922031_rot_cfs_sim,AccolaDefault,2922031,cfs,2019,0.0,8962.22686,167.64636,0.0,0.0,15678.8284,168.0,1856.05933,2511.13892,38.44213
5,name_AccolaDefault_mukey_411333_rot_cfs_sim,AccolaDefault,411333,cfs,2019,0.0,8140.92756,152.28323,0.0,0.0,14772.84193,168.0,1708.92236,2462.09253,82.37149


In [41]:
accola_apsim_yield_2019_default = accola_maize_output_2019_default[['mukey', 'corn_buac']]
accola_apsim_yield_2019_default

Unnamed: 0,mukey,corn_buac
0,2765537,176.62492
1,2800480,148.5464
2,2835021,172.96875
3,2835194,198.06566
4,2922031,167.64636
5,411333,152.28323


In [None]:
#gdal Rasterize (vector to raster)
#raster analysis > zonal statistics
#user python rasterstats to calculate zonal statistics
# from rasterstats import zonal_stats
# stats = zonal_stats("tests/data/polygons.shp", "tests/data/elevation.tif")
# stats[1].keys()
# ['count', 'min', 'max', 'mean']
# [f['mean'] for f in stats]
# [756.6057470703125, 114.660084635416666]

In [None]:
#accola_yield = gpd.read_file("yield_monitor/Accola2019YieldwGeom.dbf")

In [None]:
accola_yield_clean = accola_yield[['Yld_Vol_Dr','geometry']]
#accola_yield_clean['Yld_Vol_Dr'].describe()

In [None]:
stats = zonal_stats('C:/Users/mjn/Documents/Foresite/yield_monitor_data/accola_ssurgo.geojson', "C:/Users/mjn/Documents/Foresite/yield_monitor_data/accola_2019_yield_raster.tif", geojson_out=True)

In [None]:
accola_maize_2019 = gpd.GeoDataFrame.from_features(stats)
accola_maize_2019

In [None]:
accola_maize_2019 = accola_maize_2019[['objectid', 'mukey', 'mean']]
accola_maize_2019.sort_values('mukey')

In [None]:
pred_vs_obs = accola_maize_2019.merge(accola_apsim_yield_2019, on='mukey', how='left')
pred_vs_obs.columns = ['objectid', 'mukey', 'observed', 'predicted']
#pred_vs_obs.sort_values('mukey')

In [None]:
pred_vs_obs

In [None]:
yield_monitor = tuple(pred_vs_obs['observed'])
yield_monitor = [round(num, 2) for num in yield_monitor]
apsim_predicted = tuple(pred_vs_obs['predicted'])
apsim_predicted = [round(num, 2) for num in apsim_predicted]

In [None]:
yield_monitor

In [None]:
apsim_predicted

In [None]:
fig, ax = plt.subplots()
ax.scatter(yield_monitor, apsim_predicted)
ax.plot([100,250],[100,250])
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

In [None]:
rmse = mean_squared_error(yield_monitor, apsim_predicted, squared=False)
rmse

In [None]:
r_sq = r2_score(yield_monitor, apsim_predicted)
r_sq

In [None]:
corr_matrix = np.corrcoef(yield_monitor, apsim_predicted)
corr_xy = corr_matrix[0,1]
r_squared = corr_xy**2
r_squared

In [None]:
corr_matrix

In [None]:
#gdal.Grid('yield_avg.tif', 'accola_yield_basic.csv')

In [None]:
len(accola_yield_clean.loc[accola_yield_clean['Yld_Vol_Dr'] <= 150])

In [None]:
# For re-projecting input vector layer to raster projection
def reproject(vector_gpd, raster):
    proj = raster.crs.to_proj4()
    print("Original vector layer projection: ", vector_gpd.crs)
    reproj = vector_gpd.to_crs(proj)
    print("New vector layer projection (PROJ4): ", reproj.crs)
    fig, ax = plt.subplots(figsize=(15, 15))
    rplt.show(raster, ax=ax)
    reproj.plot(ax=ax, facecolor='none', edgecolor='red')
    fig.show()
    return reproj
#stats list: ['min', 'max', 'mean', 'count', 'sum', 'std', 'median', 'majority', 'minority', 'unique', 'range']
def get_zonal_stats(vector, raster, stats):
    # Run zonal statistics, store result in geopandas dataframe
    result = zonal_stats(vector, raster, stats=stats, geojson_out=True)
    geostats = gpd.GeoDataFrame.from_features(result)
    return geostats
    # For generating raster from zonal statistics result
def stats_to_raster(zdf, raster, stats, out_raster, no_data='y'):
    meta = raster.meta.copy()
    out_shape = raster.shape
    transform = raster.transform
    dtype = raster.dtypes[0]
    field_list = list_columns(stats)
    index = int(input("Rasterize by which field? "))
    zone = zdf[field_list[index]]
    shapes = ((geom,value) for geom, value in zip(zdf.geometry, zone))
    burned = rasterize(shapes=shapes, fill=0, out_shape=out_shape, transform=transform)
    show(burned)
    meta.update(dtype=rasterio.float32, nodata=0)
    # Optional to set nodata values to min of stat
    if no_data == 'y':
        cutoff = min(zone.values)
        print("Setting nodata cutoff to: ", cutoff)
        burned[burned < cutoff] = 0 
    with rasterio.open(out_raster, 'w', **meta) as out:
        out.write_band(1, burned)
    print("Zonal Statistics Raster generated")