# Merging all models into one DataFrame for ease of analysis

**Note** The NEX agvar projections still need to be sorted!

In [1]:
import numpy as np
import pandas as pd

## AgVar

### GMFD

In [11]:
# GMFD
gmfd = pd.read_csv("../../data/ACI_output/raw/GMFD/agvar_historical_gmfd.csv")
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)

In [12]:
gmfd.head()

Unnamed: 0,GEOID,Year,gdd,egdd,prcp
0,31039,1956,1511.181607,57.201641,0.362867
1,31039,1957,1436.631185,46.470627,0.498077
2,31039,1958,1327.679602,21.887867,0.479633
3,31039,1959,1510.308176,38.535865,0.644508
4,31039,1960,1377.63826,28.172491,0.561391


In [13]:
# Split ag variables
temp1 = gmfd.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : "GMFD"}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = gmfd.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : "GMFD"}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = gmfd.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : "GMFD"}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
gmfd = temp1.append(temp2).append(temp3)

In [15]:
gmfd.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GMFD
AgVar,GEOID,Year,Unnamed: 3_level_1
prcp,30019,2012,0.277723
prcp,30019,2013,0.378371
prcp,30019,2014,0.247598
prcp,30019,2015,0.217586
prcp,30019,2016,0.312571


### NEX

In [2]:
# Historical hindcasts of all models
nex_hind = ["agvar_historical_r1i1p1_ACCESS1-0.csv",
"agvar_historical_r1i1p1_BNU-ESM.csv",
"agvar_historical_r1i1p1_CCSM4.csv",
"agvar_historical_r1i1p1_CESM1-BGC.csv",
"agvar_historical_r1i1p1_CNRM-CM5.csv",
"agvar_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"agvar_historical_r1i1p1_CanESM2.csv",
"agvar_historical_r1i1p1_GFDL-CM3.csv",
"agvar_historical_r1i1p1_GFDL-ESM2G.csv",
"agvar_historical_r1i1p1_GFDL-ESM2M.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-LR.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-MR.csv",
"agvar_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"agvar_historical_r1i1p1_MIROC-ESM.csv",
"agvar_historical_r1i1p1_MIROC5.csv",
"agvar_historical_r1i1p1_MPI-ESM-LR.csv",
"agvar_historical_r1i1p1_MPI-ESM-MR.csv",
"agvar_historical_r1i1p1_MRI-CGCM3.csv",
"agvar_historical_r1i1p1_NorESM1-M.csv",
"agvar_historical_r1i1p1_bcc-csm1-1.csv",
"agvar_historical_r1i1p1_inmcm4.csv"]

nex_proj = ["agvar_rcp85_r1i1p1_ACCESS1-0.csv",
"agvar_rcp85_r1i1p1_BNU-ESM.csv",
"agvar_rcp85_r1i1p1_CCSM4.csv",
"agvar_rcp85_r1i1p1_CESM1-BGC.csv",
"agvar_rcp85_r1i1p1_CNRM-CM5.csv",
"agvar_rcp85_r1i1p1_CSIRO-Mk3-6-0.csv",
"agvar_rcp85_r1i1p1_CanESM2.csv",
"agvar_rcp85_r1i1p1_GFDL-CM3.csv",
"agvar_rcp85_r1i1p1_GFDL-ESM2G.csv",
"agvar_rcp85_r1i1p1_GFDL-ESM2M.csv",
"agvar_rcp85_r1i1p1_IPSL-CM5A-LR.csv",
"agvar_rcp85_r1i1p1_IPSL-CM5A-MR.csv",
"agvar_rcp85_r1i1p1_MIROC-ESM-CHEM.csv",
"agvar_rcp85_r1i1p1_MIROC-ESM.csv",
"agvar_rcp85_r1i1p1_MIROC5.csv",
"agvar_rcp85_r1i1p1_MPI-ESM-LR.csv",
"agvar_rcp85_r1i1p1_MPI-ESM-MR.csv",
"agvar_rcp85_r1i1p1_MRI-CGCM3.csv",
"agvar_rcp85_r1i1p1_NorESM1-M.csv",
"agvar_rcp85_r1i1p1_bcc-csm1-1.csv",
"agvar_rcp85_r1i1p1_inmcm4.csv"]

In [102]:
# HISTORICAL
# Get all NEX models
nex  = pd.read_csv("../../data/ACI_output/raw/nex/" + nex_hind[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = nex.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = nex.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = nex.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
nex = temp1.append(temp2).append(temp3)

for name in nex_hind[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/nex/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    nex = pd.merge(nex, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [103]:
# Add ensemble mean
nex["ensemble_mean"] = nex.mean(axis = 1)

In [104]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,2304.698882,2144.184945,2203.868464,...,2163.953122,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434
gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,2250.477755,2220.229067,2272.217969,...,2243.69976,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462
gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,2279.947845,2173.38348,2205.599153,...,2248.739139,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059
gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,2342.238382,2207.901947,2214.32942,...,2264.593954,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658
gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,2223.339429,2114.466966,2254.202858,...,2195.122728,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214


In [105]:
# Merge NEX with GMFD
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [107]:
nex_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,...,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434,
1,gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,...,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462,
2,gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,...,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059,
3,gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,...,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658,
4,gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,...,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214,


In [113]:
# Save
nex_all.to_csv('./output/nex_agvar_hist.csv', index = False)

In [18]:
# PROJECTIONS
# Get all NEX models
nex  = pd.read_csv("../../data/ACI_output/raw/nex/" + nex_proj[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = nex.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = nex.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = nex.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
nex = temp1.append(temp2).append(temp3)

for name in nex_proj[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/nex/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    nex = pd.merge(nex, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [19]:
# Add ensemble mean
nex["ensemble_mean"] = nex.mean(axis = 1)

In [20]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,2006,2325.130059,2286.232581,2307.074297,2320.688738,2232.79922,2401.872415,2202.5667,2389.745686,2391.696876,2255.543814,...,2370.165762,2305.173924,2285.980455,2342.086556,2283.531162,2296.103275,2266.373036,2341.669431,2160.662399,2298.629473
gdd,1001,2007,2271.837926,2356.919469,2317.260985,2463.460594,2223.033869,2335.987557,2258.015144,2254.8485,2531.581088,2113.559232,...,2345.031656,2298.06528,2351.098118,2501.815485,2407.822669,2296.341264,2306.522941,2396.513029,2390.112164,2337.802758
gdd,1001,2008,2205.801395,2338.032948,2350.467949,2365.429182,2345.934722,2310.478919,2479.270641,2301.53128,2368.415332,2330.463835,...,2318.314692,2239.043805,2328.599987,2420.261222,2503.052912,2285.294563,2270.581815,2402.81166,2387.046351,2350.637714
gdd,1001,2009,2211.224021,2272.288928,2379.835081,2422.008058,2203.121539,2382.916302,2277.040993,2181.753889,2351.479814,2298.946033,...,2355.662591,2348.152914,2352.936067,2279.470987,2234.687039,2327.734785,2267.761774,2398.466628,2187.344823,2296.156181
gdd,1001,2010,2209.728179,2279.807493,2357.513542,2404.346112,2177.148911,2520.497502,2288.569497,2338.223941,2351.215833,2169.306921,...,2401.735051,2303.815623,2285.770664,2283.069522,2398.363872,2404.276062,2216.382722,2360.163005,2308.715651,2316.828923


In [21]:
# Merge NEX with GMFD
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [26]:
nex_all.tail()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
1353970,prcp,30019,2001,,,,,,,,...,,,,,,,,,,0.289242
1353971,prcp,30019,2002,,,,,,,,...,,,,,,,,,,0.308968
1353972,prcp,30019,2003,,,,,,,,...,,,,,,,,,,0.240231
1353973,prcp,30019,2004,,,,,,,,...,,,,,,,,,,0.282631
1353974,prcp,30019,2005,,,,,,,,...,,,,,,,,,,0.260183


In [27]:
# Save
nex_all.to_csv('./output/nex_agvar_proj.csv', index = False)

### CMIP

In [28]:
# Hindcasts & projections of all models
cmip_all = ["agvar_ACCESS1-0.historical+rcp85.csv",
"agvar_BNU-ESM.historical+rcp85.csv",
"agvar_CCSM4_historical+rcp85.csv",
"agvar_CESM1-BGC.historical+rcp85.csv",
"agvar_CNRM-CM5.historical+rcp85.csv",
"agvar_CSIRO-Mk3-6-0.historical+rcp85.csv",
"agvar_CanESM2.historical+rcp85.csv",
"agvar_GFDL-CM3.historical+rcp85.csv",
"agvar_GFDL-ESM2G.historical+rcp85.csv",
"agvar_GFDL-ESM2M.historical+rcp85.csv",
"agvar_IPSL-CM5A-LR.historical+rcp85.csv",
"agvar_IPSL-CM5A-MR.historical+rcp85.csv",
"agvar_MIROC-ESM-CHEM.historical+rcp85.csv",
"agvar_MIROC-ESM.historical+rcp85.csv",
"agvar_MIROC5.historical+rcp85.csv",
"agvar_MPI-ESM-LR.historical+rcp85.csv",
"agvar_MPI-ESM-MR.historical+rcp85.csv",
"agvar_MRI-CGCM3.historical+rcp85.csv",
"agvar_NorESM1-M.historical+rcp85.csv",
"agvar_bcc-csm1-1_historical+rcp85.csv",
"agvar_inmcm4.historical+rcp85.csv"]

In [28]:
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/raw/cmip/" + cmip_all[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = cmip.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = cmip.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = cmip.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
cmip = temp1.append(temp2).append(temp3)

for name in cmip_all[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/cmip/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace(".historical+rcp85","").replace(".csv","").replace("agvar_","").replace("_historical+rcp85","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    cmip = pd.merge(cmip, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [93]:
# Add ensemble mean
cmip["ensemble_mean"] = cmip.mean(axis = 1)

In [110]:
cmip.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,1867.806602,1664.26396,1944.34855,...,2338.944396,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209
gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,1899.49826,1893.855123,1865.913943,...,2379.578363,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468
gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,1995.655442,1678.066385,1763.861486,...,2417.045808,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734
gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,2050.906396,1801.231739,1833.24215,...,2443.542263,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665
gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,1926.30831,1510.14475,2015.395665,...,2355.680809,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851


In [111]:
# Merge CMIP with GMFD
cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [112]:
cmip_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,...,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209,
1,gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,...,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468,
2,gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,...,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734,
3,gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,...,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665,
4,gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,...,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851,


In [114]:
cmip_all.to_csv('./output/cmip_agvar_all.csv', index = False)

## Yields

### USDA

In [64]:
# Read in USDA data
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 1980 and Year <= 2005')
usda = usda.filter(['USDA'])

In [65]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1980,-0.765648
1001,1981,-0.968111
1001,1982,0.242908
1001,1983,-0.214999
1001,1984,-0.14906


In [66]:
# Subtract means
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')

In [67]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1980,-0.62212
1001,1981,-0.824582
1001,1982,0.386437
1001,1983,-0.071471
1001,1984,-0.005532


In [68]:
usda.mean(level = 'GEOID')

Unnamed: 0_level_0,USDA
GEOID,Unnamed: 1_level_1
01001,5.764620e-17
01003,-3.736327e-18
01005,0.000000e+00
01007,-2.643388e-18
01009,-2.001604e-17
...,...
56037,0.000000e+00
56039,0.000000e+00
56041,0.000000e+00
56043,2.455301e-17


In [69]:
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_80-05_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

In [70]:
gmfd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GMFD
GEOID,Year,Unnamed: 2_level_1
1001,1980,-0.447267
1001,1981,-0.325558
1001,1982,0.138165
1001,1983,-0.264391
1001,1984,0.085115


In [71]:
gmfd.mean(level = 'GEOID')

Unnamed: 0_level_0,GMFD
GEOID,Unnamed: 1_level_1
01001,-2.209771e-16
01003,-5.684555e-17
01005,-9.351494e-16
01007,-1.627971e-16
01009,-1.152924e-16
...,...
56035,
56037,
56039,
56041,


### NEX historical

In [38]:
nex_hist = ["yield_historical_r1i1p1_ACCESS1-0.csv",
"yield_historical_r1i1p1_BNU-ESM.csv",
"yield_historical_r1i1p1_CCSM4.csv",
"yield_historical_r1i1p1_CESM1-BGC.csv",
"yield_historical_r1i1p1_CNRM-CM5.csv",
"yield_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_historical_r1i1p1_CanESM2.csv",
"yield_historical_r1i1p1_GFDL-CM3.csv",
"yield_historical_r1i1p1_GFDL-ESM2G.csv",
"yield_historical_r1i1p1_GFDL-ESM2M.csv",
"yield_historical_r1i1p1_IPSL-CM5A-LR.csv",
"yield_historical_r1i1p1_IPSL-CM5A-MR.csv",
"yield_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_historical_r1i1p1_MIROC-ESM.csv",
"yield_historical_r1i1p1_MIROC5.csv",
"yield_historical_r1i1p1_MPI-ESM-LR.csv",
"yield_historical_r1i1p1_MPI-ESM-MR.csv",
"yield_historical_r1i1p1_MRI-CGCM3.csv",
"yield_historical_r1i1p1_NorESM1-M.csv",
"yield_historical_r1i1p1_bcc-csm1-1.csv",
"yield_historical_r1i1p1_inmcm4.csv"]

In [40]:
# HISTORICAL or OUT OF SAMPLE
# Get nex models
nex  = pd.read_csv("../../data/ACI_output/final/NEX/res_80-05_" + nex_hist[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex = nex[nex.Year <= 2005]
nex.rename(columns = {"yield" : nex_hist[0].replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_hist[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_80-05_" + name)
    data = data[data.Year <= 2005]
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

Read in: BNU-ESM. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 4)
Read in: CCSM4. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 5)
Read in: CESM1-BGC. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 6)
Read in: CNRM-CM5. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 7)
Read in: CSIRO-Mk3-6-0. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 8)
Read in: CanESM2. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 9)
Read in: GFDL-CM3. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 10)
Read in: GFDL-ESM2G. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 11)
Read in: GFDL-ESM2M. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 12)
Read in: IPSL-CM5A-LR. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 13)
Read in: IPSL-CM5A-MR. Shape: (80990, 3). Merging now...
Merge complete. New 

In [41]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1001,1980,0.169515,0.29469,0.074264,0.544695,0.237401,-0.221776,0.242482,0.161173,0.171375,0.275581,...,-0.08972,-0.251858,0.147886,0.505906,-0.261461,0.170337,-0.165047,0.015839,0.333367,0.113723
1001,1981,-0.650255,-0.45957,-0.535573,0.108177,-0.795622,0.305928,0.052353,0.270386,0.597981,-1.059506,...,0.140883,0.073008,-0.070041,0.099308,0.586349,0.121516,0.253145,0.09295,-0.156763,-0.016502
1001,1982,0.044141,0.168013,0.578145,0.131941,0.277596,-0.292247,0.269612,0.407882,-0.329558,-0.319957,...,0.171202,-0.066279,0.303238,0.240003,-0.437391,0.000263,-0.081429,0.231309,0.534565,0.121897
1001,1983,-0.130967,0.141507,0.278859,0.313937,-0.032706,0.712358,-0.14916,0.262759,-0.317201,0.20024,...,0.445797,0.231787,0.094451,0.307045,-0.099489,0.070118,0.223871,0.147447,0.469267,0.205147
1001,1984,0.283384,0.306558,0.348087,-0.103729,-0.438092,-0.434618,0.285097,0.015296,-0.030713,0.577867,...,0.193259,0.460201,0.225564,0.604424,-0.559712,0.227628,0.315318,0.249078,0.322612,0.16063


In [55]:
# Merge CMIP with GMFD and USDA
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
nex_all = pd.merge(nex_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [56]:
nex_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,2006,-0.053113,0.156062,0.438843,0.059714,-0.247919,-0.459426,0.265016,0.537092,...,-0.012350,-0.245809,-0.135917,-0.184711,0.031798,0.252011,0.192799,0.085580,-0.314571,-0.144302
1,01001,2007,0.144018,-0.127080,-0.344528,-0.072780,0.125157,0.815145,0.341701,-0.172031,...,0.234608,-0.598292,-0.427020,0.153373,0.139512,-0.054751,-0.154964,0.070835,-0.431354,-0.688478
2,01001,2008,0.417339,-0.098824,-0.035324,-0.620277,-0.060107,0.013049,0.025700,0.085009,...,0.252103,-0.850549,0.195398,-0.215250,-0.084488,0.187836,0.287141,-0.008099,0.086355,0.223311
3,01001,2009,0.377718,0.375751,0.296882,0.613558,0.180394,0.268751,-0.466486,0.655539,...,-0.079749,0.452302,-0.423970,0.014960,0.168191,0.112184,0.453329,0.146798,0.335996,0.000827
4,01001,2010,0.121191,0.277524,0.125041,0.229378,0.252797,-0.295458,0.435369,-0.967191,...,0.028764,-0.154279,-0.366521,0.044791,0.185479,0.385990,-1.196517,0.000523,-0.355994,-0.013974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28703,56043,2010,0.138229,-0.580535,-0.161504,0.043189,0.085195,0.064869,0.063850,0.003248,...,0.039520,-0.119125,-0.043411,0.036326,0.116947,-0.134134,0.082058,-0.020668,0.072592,-0.288145
28704,56043,2011,-0.002342,0.104911,-0.054549,0.058599,-0.093748,-0.025367,-0.227823,0.002817,...,-0.009087,0.031814,0.020008,-0.003082,-0.230813,-0.116304,0.087289,0.001133,-0.056420,-0.179748
28706,56043,2013,-0.088479,0.220787,-0.089386,0.048720,0.088698,0.069626,0.094899,-0.052231,...,-0.036537,-0.002183,0.035160,-0.031930,0.008836,0.075832,0.088958,0.023697,0.051299,-0.060797
28707,56043,2014,0.061982,0.161090,0.131026,0.105301,-0.134572,0.057959,0.066059,-0.049659,...,0.026226,0.029115,-0.031277,-0.147707,0.046899,0.010876,0.083590,0.002652,0.105522,-0.077102


In [57]:
# Save with correct name
nex_all.to_csv('./output/nex_yield_80-05.csv', index = False)

## NEX projection

In [46]:
nex_proj = ["yield_rcp85_r1i1p1_ACCESS1-0.csv",
"yield_rcp85_r1i1p1_BNU-ESM.csv",
"yield_rcp85_r1i1p1_CCSM4.csv",
"yield_rcp85_r1i1p1_CESM1-BGC.csv",
"yield_rcp85_r1i1p1_CNRM-CM5.csv",
"yield_rcp85_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_rcp85_r1i1p1_CanESM2.csv",
"yield_rcp85_r1i1p1_GFDL-CM3.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2G.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2M.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-LR.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-MR.csv",
"yield_rcp85_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_rcp85_r1i1p1_MIROC-ESM.csv",
"yield_rcp85_r1i1p1_MIROC5.csv",
"yield_rcp85_r1i1p1_MPI-ESM-LR.csv",
"yield_rcp85_r1i1p1_MPI-ESM-MR.csv",
"yield_rcp85_r1i1p1_MRI-CGCM3.csv",
"yield_rcp85_r1i1p1_NorESM1-M.csv",
"yield_rcp85_r1i1p1_bcc-csm1-1.csv",
"yield_rcp85_r1i1p1_inmcm4.csv"]

In [51]:
# Get nex models
nex = pd.read_csv("../../data/ACI_output/final/NEX/res_06-16_" + nex_proj[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex.rename(columns = {"yield" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_proj[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_06-16_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

Read in: BNU-ESM. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 4)
Read in: CCSM4. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 5)
Read in: CESM1-BGC. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 6)
Read in: CNRM-CM5. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 7)
Read in: CSIRO-Mk3-6-0. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 8)
Read in: CanESM2. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 9)
Read in: GFDL-CM3. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 10)
Read in: GFDL-ESM2G. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 11)
Read in: GFDL-ESM2M. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 12)
Read in: IPSL-CM5A-LR. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 13)
Read in: IPSL-CM5A-MR. Shape: (34265, 3). Merging now...
Merge complete. New 

In [59]:
nex.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
56043,2012,-0.030094,-0.011861,0.005088,0.038046,0.112345,0.065459,0.145253,-0.008512,0.067861,0.135689,...,-0.089574,0.000507,-0.138731,0.105174,0.043936,0.046092,-0.013143,0.064958,-0.093638,0.025672
56043,2013,-0.088479,0.220787,-0.089386,0.04872,0.088698,0.069626,0.094899,-0.052231,-0.096395,0.091183,...,0.005417,0.035831,-0.036537,-0.002183,0.03516,-0.03193,0.008836,0.075832,0.088958,0.023697
56043,2014,0.061982,0.16109,0.131026,0.105301,-0.134572,0.057959,0.066059,-0.049659,0.052519,-0.184726,...,-0.161211,0.024552,0.026226,0.029115,-0.031277,-0.147707,0.046899,0.010876,0.08359,0.002652
56043,2015,-0.028352,0.00529,0.037863,0.063627,0.017988,0.049355,0.043831,-0.007831,0.091429,-0.938642,...,0.002868,0.0025,0.089227,-0.111367,0.000981,0.103408,-0.111197,0.004755,0.035811,-0.034597
56043,2016,-0.070023,0.119976,-0.025826,0.033114,0.035794,-0.147551,0.054605,0.038563,-0.15507,0.00165,...,0.083237,-0.099549,0.014602,-0.059926,-0.030653,0.036145,-0.005492,-0.016063,-0.025544,-0.006345


In [None]:
# Merge CMIP with GMFD and USDA ###################### OUT OF SAMPLE ONLY ################
# USDA
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 2006 and Year <= 2016')
usda = usda.filter(['USDA'])
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')
# GMFD
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_06-16_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
nex_all = pd.merge(nex_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [60]:
nex_all.head()

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,1001,2006,-0.053113,0.156062,0.438843,0.059714,-0.247919,-0.459426,0.265016,0.537092,...,-0.01235,-0.245809,-0.135917,-0.184711,0.031798,0.252011,0.192799,0.08558,-0.314571,-0.144302
1,1001,2007,0.144018,-0.12708,-0.344528,-0.07278,0.125157,0.815145,0.341701,-0.172031,...,0.234608,-0.598292,-0.42702,0.153373,0.139512,-0.054751,-0.154964,0.070835,-0.431354,-0.688478
2,1001,2008,0.417339,-0.098824,-0.035324,-0.620277,-0.060107,0.013049,0.0257,0.085009,...,0.252103,-0.850549,0.195398,-0.21525,-0.084488,0.187836,0.287141,-0.008099,0.086355,0.223311
3,1001,2009,0.377718,0.375751,0.296882,0.613558,0.180394,0.268751,-0.466486,0.655539,...,-0.079749,0.452302,-0.42397,0.01496,0.168191,0.112184,0.453329,0.146798,0.335996,0.000827
4,1001,2010,0.121191,0.277524,0.125041,0.229378,0.252797,-0.295458,0.435369,-0.967191,...,0.028764,-0.154279,-0.366521,0.044791,0.185479,0.38599,-1.196517,0.000523,-0.355994,-0.013974


In [50]:
nex_all.to_csv('./output/nex_yield_06-16.csv')

### CMIP

In [61]:
cmip_names = ["yield_ACCESS1-0.historical+rcp85.csv",
"yield_BNU-ESM.historical+rcp85.csv",
"yield_CCSM4_historical+rcp85.csv",
"yield_CESM1-BGC.historical+rcp85.csv",
"yield_CNRM-CM5.historical+rcp85.csv",
"yield_CSIRO-Mk3-6-0.historical+rcp85.csv",
"yield_CanESM2.historical+rcp85.csv",
"yield_GFDL-CM3.historical+rcp85.csv",
"yield_GFDL-ESM2G.historical+rcp85.csv",
"yield_GFDL-ESM2M.historical+rcp85.csv",
"yield_IPSL-CM5A-LR.historical+rcp85.csv",
"yield_IPSL-CM5A-MR.historical+rcp85.csv",
"yield_MIROC-ESM-CHEM.historical+rcp85.csv",
"yield_MIROC-ESM.historical+rcp85.csv",
"yield_MIROC5.historical+rcp85.csv",
"yield_MPI-ESM-LR.historical+rcp85.csv",
"yield_MPI-ESM-MR.historical+rcp85.csv",
"yield_MRI-CGCM3.historical+rcp85.csv",
"yield_NorESM1-M.historical+rcp85.csv",
"yield_bcc-csm1-1_historical+rcp85.csv",
"yield_inmcm4.historical+rcp85.csv"]

In [62]:
# HISTORICAL
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_80-05_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip = cmip.query('Year >= 1980 and Year <= 2005')
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_80-05_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    data = data.query('Year >= 1980 and Year <= 2005')
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 4)
Read in: CCSM4. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 5)
Read in: CESM1-BGC. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 6)
Read in: CNRM-CM5. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 7)
Read in: CSIRO-Mk3-6-0. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 8)
Read in: CanESM2. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 9)
Read in: GFDL-CM3. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 10)
Read in: GFDL-ESM2G. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 11)
Read in: GFDL-ESM2M. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 12)
Read in: IPSL-CM5A-LR. Shape: (80990, 3). Merging now...
Merge complete. New shape: (80990, 13)
Read in: IPSL-CM5A-MR. Shape: (80990, 3). Merging now...
Merge complete. New 

In [63]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,1980,0.217945,0.324584,0.036494,0.415429,0.133706,-0.272076,0.638334,-0.068478,0.154726,0.595946,...,0.016617,-0.265607,0.180868,0.384607,-0.058644,0.166164,-0.274501,0.074272,0.222134,0.121953
01001,1981,-0.767354,-0.543273,-0.469901,0.003948,-0.291829,0.447149,0.140263,0.429684,0.286003,-1.325654,...,0.384566,0.199951,-0.168930,-0.089842,0.416293,0.361969,0.176834,0.301758,-0.291715,-0.013740
01001,1982,0.086228,0.242365,0.441585,-0.000753,0.316431,-0.327199,0.743273,0.179636,-0.272347,-0.558281,...,0.236981,-0.099065,0.310883,-0.083526,-0.350728,-0.040281,-0.034617,0.470086,0.228752,0.079411
01001,1983,-0.149157,0.132369,0.097314,0.175996,-0.060151,0.661689,-0.256687,0.112449,-0.437265,0.122067,...,0.383564,0.378099,0.112667,-0.035934,-0.182292,0.074087,0.122803,0.379216,0.156546,0.118308
01001,1984,0.395735,0.371018,0.170113,-0.019915,-0.635229,-0.558020,0.659460,-0.308308,-0.067164,0.380060,...,0.269042,0.760097,0.253481,0.481925,-0.386453,0.041066,0.121272,0.480057,0.277460,0.136390
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56043,2001,0.763966,0.993298,0.101507,0.701224,0.735057,1.010449,0.025573,1.958123,0.782047,-0.620176,...,-0.756403,-6.594317,1.054594,0.664738,0.099830,-3.346852,-1.457950,-0.396248,-1.501729,-0.251092
56043,2002,1.648770,2.110328,-2.933634,1.408807,0.514469,0.080320,0.002048,-1.702206,1.442609,1.150252,...,2.539150,1.984466,1.286004,1.054566,0.270938,1.395649,1.779655,-2.877421,-1.040659,0.467491
56043,2003,0.712387,-0.273865,0.759676,-0.638331,-0.470466,1.446708,0.254933,-0.247904,1.531078,0.924398,...,-5.869018,0.585746,-0.967000,-1.284929,-0.474088,2.148209,-0.481415,-0.316611,2.023430,-0.013640
56043,2004,1.192312,-0.510287,-0.640062,-0.360104,1.295999,0.694885,-0.396246,1.618299,1.711336,1.632815,...,1.794624,2.248427,-4.644727,-0.007257,1.590469,1.861803,-3.458497,-1.339775,1.029934,0.278442


In [72]:
# Merge CMIP with GMFD and USDA
cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
cmip_all = pd.merge(cmip_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [73]:
cmip_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,1980,0.217945,0.324584,0.036494,0.415429,0.133706,-0.272076,0.638334,-0.068478,...,0.180868,0.384607,-0.058644,0.166164,-0.274501,0.074272,0.222134,0.121953,-0.447267,-0.622120
1,01001,1981,-0.767354,-0.543273,-0.469901,0.003948,-0.291829,0.447149,0.140263,0.429684,...,-0.168930,-0.089842,0.416293,0.361969,0.176834,0.301758,-0.291715,-0.013740,-0.325558,-0.824582
2,01001,1982,0.086228,0.242365,0.441585,-0.000753,0.316431,-0.327199,0.743273,0.179636,...,0.310883,-0.083526,-0.350728,-0.040281,-0.034617,0.470086,0.228752,0.079411,0.138165,0.386437
3,01001,1983,-0.149157,0.132369,0.097314,0.175996,-0.060151,0.661689,-0.256687,0.112449,...,0.112667,-0.035934,-0.182292,0.074087,0.122803,0.379216,0.156546,0.118308,-0.264391,-0.071471
4,01001,1984,0.395735,0.371018,0.170113,-0.019915,-0.635229,-0.558020,0.659460,-0.308308,...,0.253481,0.481925,-0.386453,0.041066,0.121272,0.480057,0.277460,0.136390,0.085115,-0.005532
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67855,56043,2001,0.763966,0.993298,0.101507,0.701224,0.735057,1.010449,0.025573,1.958123,...,1.054594,0.664738,0.099830,-3.346852,-1.457950,-0.396248,-1.501729,-0.251092,-0.050744,-0.213286
67856,56043,2002,1.648770,2.110328,-2.933634,1.408807,0.514469,0.080320,0.002048,-1.702206,...,1.286004,1.054566,0.270938,1.395649,1.779655,-2.877421,-1.040659,0.467491,-0.015502,0.028511
67857,56043,2003,0.712387,-0.273865,0.759676,-0.638331,-0.470466,1.446708,0.254933,-0.247904,...,-0.967000,-1.284929,-0.474088,2.148209,-0.481415,-0.316611,2.023430,-0.013640,-0.096664,-0.114598
67858,56043,2004,1.192312,-0.510287,-0.640062,-0.360104,1.295999,0.694885,-0.396246,1.618299,...,-4.644727,-0.007257,1.590469,1.861803,-3.458497,-1.339775,1.029934,0.278442,0.109517,0.666985


In [74]:
# Save with correct timestamp
cmip_all.to_csv('./output/cmip_yield_80-05.csv', index = False)

In [76]:
# OUT OF SAMPLE
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_06-16_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_06-16_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 4)
Read in: CCSM4. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 5)
Read in: CESM1-BGC. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 6)
Read in: CNRM-CM5. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 7)
Read in: CSIRO-Mk3-6-0. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 8)
Read in: CanESM2. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 9)
Read in: GFDL-CM3. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 10)
Read in: GFDL-ESM2G. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 11)
Read in: GFDL-ESM2M. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 12)
Read in: IPSL-CM5A-LR. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 13)
Read in: IPSL-CM5A-MR. Shape: (34265, 3). Merging now...
Merge complete. New 

In [77]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,2006,-0.166261,0.054965,0.318238,-0.010385,-0.281281,-0.673266,0.689906,0.433933,0.851789,-0.159573,...,0.451918,0.514090,-0.021164,-0.074718,-0.234269,-0.056300,-0.045703,0.424391,0.158848,0.088665
01001,2007,0.149681,-0.084939,-0.262010,-0.048662,0.145525,0.905726,0.690508,-0.261747,-0.195390,0.491978,...,0.218238,0.558931,0.296499,0.022456,-0.226489,0.096722,0.072402,-0.221209,0.193690,0.139788
01001,2008,0.555981,-0.126418,-0.047406,-0.423698,-0.274697,0.010019,-0.139468,-0.167089,-0.162811,-0.623516,...,-0.022721,0.378831,0.221428,-0.473283,0.427181,-0.090674,-0.111956,0.265051,0.282632,0.006866
01001,2009,0.450164,0.384105,0.185412,0.434624,0.112908,0.176427,-0.795566,0.400402,-0.310905,0.890685,...,-0.012197,-0.052920,-0.043253,0.242985,-0.494374,0.003950,-0.051007,0.152959,0.061562,0.074545
01001,2010,0.092152,0.318262,0.049654,0.199761,0.226894,-0.345209,0.965881,-0.667287,0.367764,0.630046,...,-0.067123,0.219761,0.062808,-0.242050,-0.328773,0.147282,-0.056340,0.997905,-0.657596,0.091956
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56043,2012,-1.531797,4.129492,0.118259,-1.046773,0.064211,-1.391595,0.676637,-1.716786,0.564065,2.321798,...,-3.047044,2.962456,-1.202780,1.023695,0.215478,0.146187,2.631749,-0.670269,3.016461,0.354579
56043,2013,0.489204,1.558996,2.063664,-0.147170,1.124332,-1.910181,0.848237,-0.031065,-2.215808,2.420032,...,2.377435,-0.338949,-0.953284,-0.317955,0.625675,1.536323,-1.867634,0.721479,-1.226560,0.181859
56043,2014,-0.582153,1.687540,0.261073,-0.071948,-3.087456,0.098347,0.489475,1.764373,0.878358,-5.114542,...,-3.520191,-2.466074,0.832920,0.614980,-0.730840,-2.234380,0.996908,1.376743,-1.648757,-0.488855
56043,2015,-0.215180,1.793284,-2.043582,-1.708233,0.972637,-0.200621,0.081433,0.291430,-0.696742,-10.186386,...,2.098400,1.537279,0.838387,-2.361951,0.167702,0.369260,-1.143659,-1.524591,0.430073,-0.550652


In [78]:
# Merge CMIP with GMFD and USDA ###################### OUT OF SAMPLE ONLY ################
# USDA
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 2006 and Year <= 2016')
usda = usda.filter(['USDA'])
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')
# GMFD
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_06-16_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
cmip_all = pd.merge(cmip_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [79]:
cmip_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,2006,-0.166261,0.054965,0.318238,-0.010385,-0.281281,-0.673266,0.689906,0.433933,...,-0.021164,-0.074718,-0.234269,-0.056300,-0.045703,0.424391,0.158848,0.088665,-0.314571,-0.144302
1,01001,2007,0.149681,-0.084939,-0.262010,-0.048662,0.145525,0.905726,0.690508,-0.261747,...,0.296499,0.022456,-0.226489,0.096722,0.072402,-0.221209,0.193690,0.139788,-0.431354,-0.688478
2,01001,2008,0.555981,-0.126418,-0.047406,-0.423698,-0.274697,0.010019,-0.139468,-0.167089,...,0.221428,-0.473283,0.427181,-0.090674,-0.111956,0.265051,0.282632,0.006866,0.086355,0.223311
3,01001,2009,0.450164,0.384105,0.185412,0.434624,0.112908,0.176427,-0.795566,0.400402,...,-0.043253,0.242985,-0.494374,0.003950,-0.051007,0.152959,0.061562,0.074545,0.335996,0.000827
4,01001,2010,0.092152,0.318262,0.049654,0.199761,0.226894,-0.345209,0.965881,-0.667287,...,0.062808,-0.242050,-0.328773,0.147282,-0.056340,0.997905,-0.657596,0.091956,-0.355994,-0.013974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28703,56043,2010,-0.048925,-5.603253,-1.845540,0.047323,0.358271,0.158260,0.518516,-0.014789,...,1.019218,1.856239,0.623909,0.927180,1.204175,-0.604987,1.665107,-0.115252,0.072592,-0.288145
28704,56043,2011,1.085388,2.017115,1.989705,0.568153,1.692425,0.736401,-0.471488,1.464071,...,1.200955,-0.885166,-0.651318,-0.026115,-6.788108,-0.934814,-0.490495,0.263146,-0.056420,-0.179748
28706,56043,2013,0.489204,1.558996,2.063664,-0.147170,1.124332,-1.910181,0.848237,-0.031065,...,-0.953284,-0.317955,0.625675,1.536323,-1.867634,0.721479,-1.226560,0.181859,0.051299,-0.060797
28707,56043,2014,-0.582153,1.687540,0.261073,-0.071948,-3.087456,0.098347,0.489475,1.764373,...,0.832920,0.614980,-0.730840,-2.234380,0.996908,1.376743,-1.648757,-0.488855,0.105522,-0.077102


In [80]:
# Save with timestamp
cmip_all.to_csv('./output/cmip_yield_06-16.csv')

In [81]:
# PROJECTION
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_50-80_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_50-80_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 4)
Read in: CCSM4. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 5)
Read in: CESM1-BGC. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 6)
Read in: CNRM-CM5. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 7)
Read in: CSIRO-Mk3-6-0. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 8)
Read in: CanESM2. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 9)
Read in: GFDL-CM3. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 10)
Read in: GFDL-ESM2G. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 11)
Read in: GFDL-ESM2M. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 12)
Read in: IPSL-CM5A-LR. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 13)
Read in: IPSL-CM5A-MR. Shape: (96565, 3). Merging now...
Merge complete. New 

In [82]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,2050,0.197771,-0.071104,0.679381,0.531380,0.874244,0.890010,1.550078,0.070077,-0.037944,1.225413,...,1.897753,0.630610,0.377539,0.657888,0.289063,0.008974,0.372363,1.176112,0.181849,0.648219
01001,2051,1.066147,-0.016735,0.332174,0.521778,-0.612847,1.028171,-0.245521,0.108610,-0.070380,1.314968,...,-0.718128,1.885922,0.119057,0.908674,1.017131,-0.188439,0.305964,0.698121,0.530105,0.422444
01001,2052,-0.081820,0.876375,0.541227,0.662593,-0.190944,0.306384,0.909089,0.468199,-1.860304,-1.098680,...,0.844058,2.013724,-0.253559,0.401463,0.959288,0.154758,0.015359,0.558758,0.439356,0.331810
01001,2053,0.848430,0.166882,-0.271894,0.267681,-0.530708,0.600059,0.255291,-0.628680,0.313991,1.066539,...,0.997543,0.997236,-0.168782,0.055033,-0.331228,-0.059186,0.466292,0.616387,0.571254,0.271909
01001,2054,1.181770,0.100046,0.668311,0.128500,0.725371,-0.997709,1.317887,0.546829,1.364038,-0.765810,...,1.116079,1.094799,-0.150031,0.403240,0.182018,0.033641,0.179364,-1.500560,0.113326,0.335609
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56043,2076,-0.351168,-5.636102,1.393084,0.337570,0.463262,0.202298,-0.229032,2.604159,0.198094,-1.937078,...,-0.884173,1.676794,-1.439069,-0.389913,0.075437,-1.154576,-0.123223,-0.301269,-5.627051,-0.495925
56043,2077,0.299504,-1.745967,1.496727,-2.434344,0.305226,-1.894949,-0.135712,0.603772,1.452860,-0.174276,...,-1.520973,-1.457977,1.618366,-0.456602,-0.047247,-0.396555,2.737530,0.505871,0.169956,-0.077601
56043,2078,-0.278084,0.247961,-4.691947,0.799005,1.719546,1.584409,0.323857,-1.700574,0.321073,-3.140346,...,-2.700966,2.984994,-0.329919,0.692134,-10.582425,-1.120838,0.373581,-1.142274,1.423511,-0.723499
56043,2079,-0.589701,1.574525,-5.719917,0.643797,1.912096,-2.308338,-0.112258,-1.439129,-1.210382,2.752866,...,-1.223998,3.147941,0.622125,1.190926,1.674004,2.960874,3.859712,0.318322,-3.446630,0.199317


In [83]:
# Save with timestamp
cmip.to_csv('./output/cmip_yield_50-80.csv')