# Merging all models into one DataFrame for ease of analysis

**Note** The NEX agvar projections still need to be sorted!

In [1]:
import numpy as np
import pandas as pd

## AgVar

### GMFD

In [2]:
# GMFD
gmfd = pd.read_csv("../../data/ACI_output/raw/GMFD/agvar_historical_gmfd.csv")
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)

In [3]:
gmfd.head()

Unnamed: 0,GEOID,Year,gdd,egdd,prcp
0,31039,1956,1511.181607,57.201641,0.362867
1,31039,1957,1436.631185,46.470627,0.498077
2,31039,1958,1327.679602,21.887867,0.479633
3,31039,1959,1510.308176,38.535865,0.644508
4,31039,1960,1377.63826,28.172491,0.561391


In [23]:
# Split ag variables
temp1 = gmfd.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : "GMFD"}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = gmfd.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : "GMFD"}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = gmfd.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : "GMFD"}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
gmfd = temp1.append(temp2).append(temp3)

In [100]:
gmfd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GMFD
AgVar,GEOID,Year,Unnamed: 3_level_1
gdd,31039,1956,1511.181607
gdd,31039,1957,1436.631185
gdd,31039,1958,1327.679602
gdd,31039,1959,1510.308176
gdd,31039,1960,1377.63826


### NEX

In [22]:
# Historical hindcasts of all models
nex_hind = ["agvar_historical_r1i1p1_ACCESS1-0.csv",
"agvar_historical_r1i1p1_BNU-ESM.csv",
"agvar_historical_r1i1p1_CCSM4.csv",
"agvar_historical_r1i1p1_CESM1-BGC.csv",
"agvar_historical_r1i1p1_CNRM-CM5.csv",
"agvar_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"agvar_historical_r1i1p1_CanESM2.csv",
"agvar_historical_r1i1p1_GFDL-CM3.csv",
"agvar_historical_r1i1p1_GFDL-ESM2G.csv",
"agvar_historical_r1i1p1_GFDL-ESM2M.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-LR.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-MR.csv",
"agvar_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"agvar_historical_r1i1p1_MIROC-ESM.csv",
"agvar_historical_r1i1p1_MIROC5.csv",
"agvar_historical_r1i1p1_MPI-ESM-LR.csv",
"agvar_historical_r1i1p1_MPI-ESM-MR.csv",
"agvar_historical_r1i1p1_MRI-CGCM3.csv",
"agvar_historical_r1i1p1_NorESM1-M.csv",
"agvar_historical_r1i1p1_bcc-csm1-1.csv",
"agvar_historical_r1i1p1_inmcm4.csv"]

nex_proj = []

In [102]:
# Get all NEX models
nex  = pd.read_csv("../../data/ACI_output/raw/nex/" + nex_hind[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = nex.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = nex.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = nex.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
nex = temp1.append(temp2).append(temp3)

for name in nex_hind[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/nex/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    nex = pd.merge(nex, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [103]:
# Add ensemble mean
nex["ensemble_mean"] = nex.mean(axis = 1)

In [104]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,2304.698882,2144.184945,2203.868464,...,2163.953122,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434
gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,2250.477755,2220.229067,2272.217969,...,2243.69976,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462
gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,2279.947845,2173.38348,2205.599153,...,2248.739139,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059
gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,2342.238382,2207.901947,2214.32942,...,2264.593954,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658
gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,2223.339429,2114.466966,2254.202858,...,2195.122728,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214


In [105]:
# Merge NEX with GMFD
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [107]:
nex_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,...,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434,
1,gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,...,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462,
2,gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,...,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059,
3,gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,...,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658,
4,gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,...,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214,


In [113]:
nex_all.to_csv('./output/nex_agvar_hist.csv', index = False)

### CMIP

In [27]:
# Historical hindcasts of all models
cmip_all = ["agvar_ACCESS1-0.historical+rcp85.csv",
"agvar_BNU-ESM.historical+rcp85.csv",
"agvar_CCSM4_historical+rcp85.csv",
"agvar_CESM1-BGC.historical+rcp85.csv",
"agvar_CNRM-CM5.historical+rcp85.csv",
"agvar_CSIRO-Mk3-6-0.historical+rcp85.csv",
"agvar_CanESM2.historical+rcp85.csv",
"agvar_GFDL-CM3.historical+rcp85.csv",
"agvar_GFDL-ESM2G.historical+rcp85.csv",
"agvar_GFDL-ESM2M.historical+rcp85.csv",
"agvar_IPSL-CM5A-LR.historical+rcp85.csv",
"agvar_IPSL-CM5A-MR.historical+rcp85.csv",
"agvar_MIROC-ESM-CHEM.historical+rcp85.csv",
"agvar_MIROC-ESM.historical+rcp85.csv",
"agvar_MIROC5.historical+rcp85.csv",
"agvar_MPI-ESM-LR.historical+rcp85.csv",
"agvar_MPI-ESM-MR.historical+rcp85.csv",
"agvar_MRI-CGCM3.historical+rcp85.csv",
"agvar_NorESM1-M.historical+rcp85.csv",
"agvar_bcc-csm1-1_historical+rcp85.csv",
"agvar_inmcm4.historical+rcp85.csv"]

In [28]:
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/raw/cmip/" + cmip_all[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = cmip.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = cmip.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = cmip.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
cmip = temp1.append(temp2).append(temp3)

for name in cmip_all[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/cmip/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace(".historical+rcp85","").replace(".csv","").replace("agvar_","").replace("_historical+rcp85","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    cmip = pd.merge(cmip, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [93]:
# Add ensemble mean
cmip["ensemble_mean"] = cmip.mean(axis = 1)

In [110]:
cmip.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,1867.806602,1664.26396,1944.34855,...,2338.944396,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209
gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,1899.49826,1893.855123,1865.913943,...,2379.578363,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468
gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,1995.655442,1678.066385,1763.861486,...,2417.045808,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734
gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,2050.906396,1801.231739,1833.24215,...,2443.542263,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665
gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,1926.30831,1510.14475,2015.395665,...,2355.680809,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851


In [111]:
# Merge CMIP with GMFD
cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [112]:
cmip_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,...,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209,
1,gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,...,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468,
2,gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,...,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734,
3,gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,...,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665,
4,gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,...,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851,


In [114]:
cmip_all.to_csv('./output/cmip_agvar_all.csv', index = False)

## Yields

### USDA

In [50]:
# Read in USDA data
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.filter(['USDA'])

In [51]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1950,0.051832
1001,1951,-0.266191
1001,1952,-0.683117
1001,1953,0.066759
1001,1954,-0.579166


In [52]:
# Subtract means
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')

In [53]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1950,0.131703
1001,1951,-0.18632
1001,1952,-0.603246
1001,1953,0.14663
1001,1954,-0.499295


In [54]:
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_historical_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

In [55]:
gmfd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GMFD
GEOID,Year,Unnamed: 2_level_1
31039,1956,-0.118012
31039,1957,-0.049012
31039,1958,0.042209
31039,1959,0.016406
31039,1960,0.035894


### NEX historical

In [56]:
nex_hist = ["yield_historical_r1i1p1_ACCESS1-0.csv",
"yield_historical_r1i1p1_BNU-ESM.csv",
"yield_historical_r1i1p1_CCSM4.csv",
"yield_historical_r1i1p1_CESM1-BGC.csv",
"yield_historical_r1i1p1_CNRM-CM5.csv",
"yield_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_historical_r1i1p1_CanESM2.csv",
"yield_historical_r1i1p1_GFDL-CM3.csv",
"yield_historical_r1i1p1_GFDL-ESM2G.csv",
"yield_historical_r1i1p1_GFDL-ESM2M.csv",
"yield_historical_r1i1p1_IPSL-CM5A-LR.csv",
"yield_historical_r1i1p1_IPSL-CM5A-MR.csv",
"yield_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_historical_r1i1p1_MIROC-ESM.csv",
"yield_historical_r1i1p1_MIROC5.csv",
"yield_historical_r1i1p1_MPI-ESM-LR.csv",
"yield_historical_r1i1p1_MPI-ESM-MR.csv",
"yield_historical_r1i1p1_MRI-CGCM3.csv",
"yield_historical_r1i1p1_NorESM1-M.csv",
"yield_historical_r1i1p1_bcc-csm1-1.csv",
"yield_historical_r1i1p1_inmcm4.csv"]

In [57]:
# HISTORICAL
# Get nex models
nex  = pd.read_csv("../../data/ACI_output/final/NEX/res_80-05_" + nex_hist[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex = nex[nex.Year <= 2005]
nex.rename(columns = {"yield" : nex_hist[0].replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_hist[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_80-05_" + name)
    data = data[data.Year <= 2005]
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

Read in: BNU-ESM. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 4)
Read in: CCSM4. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 5)
Read in: CESM1-BGC. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 6)
Read in: CNRM-CM5. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 7)
Read in: CSIRO-Mk3-6-0. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 8)
Read in: CanESM2. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 9)
Read in: GFDL-CM3. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 10)
Read in: GFDL-ESM2G. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 11)
Read in: GFDL-ESM2M. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 12)
Read in: IPSL-CM5A-LR. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 13)
Read in: IPSL-CM5A-MR. Shape: (80990, 3). Merging now...
Merge complete. New 

In [58]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1001,1980,0.169515,0.29469,0.074264,0.544695,0.237401,-0.221776,0.242482,0.161173,0.171375,0.275581,...,-0.08972,-0.251858,0.147886,0.505906,-0.261461,0.170337,-0.165047,0.015839,0.333367,0.113723
1001,1981,-0.650255,-0.45957,-0.535573,0.108177,-0.795622,0.305928,0.052353,0.270386,0.597981,-1.059506,...,0.140883,0.073008,-0.070041,0.099308,0.586349,0.121516,0.253145,0.09295,-0.156763,-0.016502
1001,1982,0.044141,0.168013,0.578145,0.131941,0.277596,-0.292247,0.269612,0.407882,-0.329558,-0.319957,...,0.171202,-0.066279,0.303238,0.240003,-0.437391,0.000263,-0.081429,0.231309,0.534565,0.121897
1001,1983,-0.130967,0.141507,0.278859,0.313937,-0.032706,0.712358,-0.14916,0.262759,-0.317201,0.20024,...,0.445797,0.231787,0.094451,0.307045,-0.099489,0.070118,0.223871,0.147447,0.469267,0.205147
1001,1984,0.283384,0.306558,0.348087,-0.103729,-0.438092,-0.434618,0.285097,0.015296,-0.030713,0.577867,...,0.193259,0.460201,0.225564,0.604424,-0.559712,0.227628,0.315318,0.249078,0.322612,0.16063


In [59]:
# Merge CMIP with GMFD and USDA
nex_all = pd.merge(nex.reset_index(), gmfd.query('Year <= 2005 and Year >= 1980'), on = ["GEOID", "Year"], how = 'outer').dropna(thresh = 5)
nex_all = pd.merge(nex_all, usda.query('Year <= 2005 and Year >= 1980').reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna(thresh = 5)

In [60]:
nex_all.head()

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,1001,1980,0.169515,0.29469,0.074264,0.544695,0.237401,-0.221776,0.242482,0.161173,...,0.147886,0.505906,-0.261461,0.170337,-0.165047,0.015839,0.333367,0.113723,-0.419677,-0.685777
1,1001,1981,-0.650255,-0.45957,-0.535573,0.108177,-0.795622,0.305928,0.052353,0.270386,...,-0.070041,0.099308,0.586349,0.121516,0.253145,0.09295,-0.156763,-0.016502,-0.335762,-0.88824
2,1001,1982,0.044141,0.168013,0.578145,0.131941,0.277596,-0.292247,0.269612,0.407882,...,0.303238,0.240003,-0.437391,0.000263,-0.081429,0.231309,0.534565,0.121897,0.053755,0.322779
3,1001,1983,-0.130967,0.141507,0.278859,0.313937,-0.032706,0.712358,-0.14916,0.262759,...,0.094451,0.307045,-0.099489,0.070118,0.223871,0.147447,0.469267,0.205147,-0.342909,-0.135128
4,1001,1984,0.283384,0.306558,0.348087,-0.103729,-0.438092,-0.434618,0.285097,0.015296,...,0.225564,0.604424,-0.559712,0.227628,0.315318,0.249078,0.322612,0.16063,-0.076105,-0.069189


In [61]:
nex_all.to_csv('./output/nex_yield_80-05.csv', index = False)

## NEX projection

In [14]:
nex_proj = ["yield_rcp85_r1i1p1_ACCESS1-0.csv",
"yield_rcp85_r1i1p1_BNU-ESM.csv",
"yield_rcp85_r1i1p1_CCSM4.csv",
"yield_rcp85_r1i1p1_CESM1-BGC.csv",
"yield_rcp85_r1i1p1_CNRM-CM5.csv",
"yield_rcp85_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_rcp85_r1i1p1_CanESM2.csv",
"yield_rcp85_r1i1p1_GFDL-CM3.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2G.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2M.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-LR.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-MR.csv",
"yield_rcp85_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_rcp85_r1i1p1_MIROC-ESM.csv",
"yield_rcp85_r1i1p1_MIROC5.csv",
"yield_rcp85_r1i1p1_MPI-ESM-LR.csv",
"yield_rcp85_r1i1p1_MPI-ESM-MR.csv",
"yield_rcp85_r1i1p1_MRI-CGCM3.csv",
"yield_rcp85_r1i1p1_NorESM1-M.csv",
"yield_rcp85_r1i1p1_bcc-csm1-1.csv",
"yield_rcp85_r1i1p1_inmcm4.csv"]

In [15]:
# Get nex models
nex = pd.read_csv("../../data/ACI_output/final/NEX/res_" + nex_proj[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex.rename(columns = {"yield" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_proj[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

FileNotFoundError: [Errno 2] File ../../data/ACI_output/final/NEX/res_yield_rcp85_r1i1p1_ACCESS1-0.csv does not exist: '../../data/ACI_output/final/NEX/res_yield_rcp85_r1i1p1_ACCESS1-0.csv'

In [42]:
nex.to_csv('./output/nex_yield_proj.csv')

### CMIP

In [62]:
cmip_names = ["yield_ACCESS1-0.historical+rcp85.csv",
"yield_BNU-ESM.historical+rcp85.csv",
"yield_CCSM4_historical+rcp85.csv",
"yield_CESM1-BGC.historical+rcp85.csv",
"yield_CNRM-CM5.historical+rcp85.csv",
"yield_CSIRO-Mk3-6-0.historical+rcp85.csv",
"yield_CanESM2.historical+rcp85.csv",
"yield_GFDL-CM3.historical+rcp85.csv",
"yield_GFDL-ESM2G.historical+rcp85.csv",
"yield_GFDL-ESM2M.historical+rcp85.csv",
"yield_IPSL-CM5A-LR.historical+rcp85.csv",
"yield_IPSL-CM5A-MR.historical+rcp85.csv",
"yield_MIROC-ESM-CHEM.historical+rcp85.csv",
"yield_MIROC-ESM.historical+rcp85.csv",
"yield_MIROC5.historical+rcp85.csv",
"yield_MPI-ESM-LR.historical+rcp85.csv",
"yield_MPI-ESM-MR.historical+rcp85.csv",
"yield_MRI-CGCM3.historical+rcp85.csv",
"yield_NorESM1-M.historical+rcp85.csv",
"yield_bcc-csm1-1_historical+rcp85.csv",
"yield_inmcm4.historical+rcp85.csv"]

In [63]:
# HISTORICAL
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_80-05_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip = cmip.query('Year >= 1980 and Year <= 2005')
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_80-05_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    data = data.query('Year >= 1980 and Year <= 2005')
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 4)
Read in: CCSM4. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 5)
Read in: CESM1-BGC. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 6)
Read in: CNRM-CM5. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 7)
Read in: CSIRO-Mk3-6-0. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 8)
Read in: CanESM2. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 9)
Read in: GFDL-CM3. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 10)
Read in: GFDL-ESM2G. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 11)
Read in: GFDL-ESM2M. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 12)
Read in: IPSL-CM5A-LR. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 13)
Read in: IPSL-CM5A-MR. Shape: (80990, 3). Merging now...
Merge complete. New 

In [64]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,1980,0.217945,0.324584,0.036494,0.415429,0.133706,-0.272076,0.638334,-0.068478,0.154726,0.595946,...,0.016617,-0.265607,0.180868,0.384607,-0.058644,0.166164,-0.274501,0.074272,0.222134,0.121953
01001,1981,-0.767354,-0.543273,-0.469901,0.003948,-0.291829,0.447149,0.140263,0.429684,0.286003,-1.325654,...,0.384566,0.199951,-0.168930,-0.089842,0.416293,0.361969,0.176834,0.301758,-0.291715,-0.013740
01001,1982,0.086228,0.242365,0.441585,-0.000753,0.316431,-0.327199,0.743273,0.179636,-0.272347,-0.558281,...,0.236981,-0.099065,0.310883,-0.083526,-0.350728,-0.040281,-0.034617,0.470086,0.228752,0.079411
01001,1983,-0.149157,0.132369,0.097314,0.175996,-0.060151,0.661689,-0.256687,0.112449,-0.437265,0.122067,...,0.383564,0.378099,0.112667,-0.035934,-0.182292,0.074087,0.122803,0.379216,0.156546,0.118308
01001,1984,0.395735,0.371018,0.170113,-0.019915,-0.635229,-0.558020,0.659460,-0.308308,-0.067164,0.380060,...,0.269042,0.760097,0.253481,0.481925,-0.386453,0.041066,0.121272,0.480057,0.277460,0.136390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56043,2001,0.763966,0.993298,0.101507,0.701224,0.735057,1.010449,0.025573,1.958123,0.782047,-0.620176,...,-0.756403,-6.594317,1.054594,0.664738,0.099830,-3.346852,-1.457950,-0.396248,-1.501729,-0.251092
56043,2002,1.648770,2.110328,-2.933634,1.408807,0.514469,0.080320,0.002048,-1.702206,1.442609,1.150252,...,2.539150,1.984466,1.286004,1.054566,0.270938,1.395649,1.779655,-2.877421,-1.040659,0.467491
56043,2003,0.712387,-0.273865,0.759676,-0.638331,-0.470466,1.446708,0.254933,-0.247904,1.531078,0.924398,...,-5.869018,0.585746,-0.967000,-1.284929,-0.474088,2.148209,-0.481415,-0.316611,2.023430,-0.013640
56043,2004,1.192312,-0.510287,-0.640062,-0.360104,1.295999,0.694885,-0.396246,1.618299,1.711336,1.632815,...,1.794624,2.248427,-4.644727,-0.007257,1.590469,1.861803,-3.458497,-1.339775,1.029934,0.278442


In [65]:
# Merge CMIP with GMFD and USDA (note that we only take the overlapping data here which is NOT the full possible range)
cmip_all = pd.merge(cmip.reset_index(), gmfd.query('Year >= 1980 and Year <= 2005').reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna(thresh = 5)
cmip_all = pd.merge(cmip_all, usda.query('Year >= 1980 and Year <= 2005').reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna(thresh = 5)

In [66]:
cmip_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,1980,0.217945,0.324584,0.036494,0.415429,0.133706,-0.272076,0.638334,-0.068478,...,0.180868,0.384607,-0.058644,0.166164,-0.274501,0.074272,0.222134,0.121953,-0.419677,-0.685777
1,01001,1981,-0.767354,-0.543273,-0.469901,0.003948,-0.291829,0.447149,0.140263,0.429684,...,-0.168930,-0.089842,0.416293,0.361969,0.176834,0.301758,-0.291715,-0.013740,-0.335762,-0.888240
2,01001,1982,0.086228,0.242365,0.441585,-0.000753,0.316431,-0.327199,0.743273,0.179636,...,0.310883,-0.083526,-0.350728,-0.040281,-0.034617,0.470086,0.228752,0.079411,0.053755,0.322779
3,01001,1983,-0.149157,0.132369,0.097314,0.175996,-0.060151,0.661689,-0.256687,0.112449,...,0.112667,-0.035934,-0.182292,0.074087,0.122803,0.379216,0.156546,0.118308,-0.342909,-0.135128
4,01001,1984,0.395735,0.371018,0.170113,-0.019915,-0.635229,-0.558020,0.659460,-0.308308,...,0.253481,0.481925,-0.386453,0.041066,0.121272,0.480057,0.277460,0.136390,-0.076105,-0.069189
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67855,56043,2001,0.763966,0.993298,0.101507,0.701224,0.735057,1.010449,0.025573,1.958123,...,1.054594,0.664738,0.099830,-3.346852,-1.457950,-0.396248,-1.501729,-0.251092,0.105862,-0.153083
67856,56043,2002,1.648770,2.110328,-2.933634,1.408807,0.514469,0.080320,0.002048,-1.702206,...,1.286004,1.054566,0.270938,1.395649,1.779655,-2.877421,-1.040659,0.467491,0.093150,0.088714
67857,56043,2003,0.712387,-0.273865,0.759676,-0.638331,-0.470466,1.446708,0.254933,-0.247904,...,-0.967000,-1.284929,-0.474088,2.148209,-0.481415,-0.316611,2.023430,-0.013640,-0.003285,-0.054395
67858,56043,2004,1.192312,-0.510287,-0.640062,-0.360104,1.295999,0.694885,-0.396246,1.618299,...,-4.644727,-0.007257,1.590469,1.861803,-3.458497,-1.339775,1.029934,0.278442,0.096246,0.727188


In [67]:
cmip_all.to_csv('./output/cmip_yield_80-05.csv', index = False)