# Merging all models into one DataFrame for ease of analysis

In [3]:
import numpy as np
import pandas as pd

## AgVar

### GMFD

In [11]:
# GMFD
gmfd = pd.read_csv("../../data/ACI_output/raw/GMFD/agvar_historical_gmfd.csv")
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)

In [12]:
gmfd.head()

Unnamed: 0,GEOID,Year,gdd,egdd,prcp
0,31039,1956,1511.181607,57.201641,0.362867
1,31039,1957,1436.631185,46.470627,0.498077
2,31039,1958,1327.679602,21.887867,0.479633
3,31039,1959,1510.308176,38.535865,0.644508
4,31039,1960,1377.63826,28.172491,0.561391


In [13]:
# Split ag variables
temp1 = gmfd.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : "GMFD"}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = gmfd.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : "GMFD"}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = gmfd.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : "GMFD"}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
gmfd = temp1.append(temp2).append(temp3)

In [15]:
gmfd.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,GMFD
AgVar,GEOID,Year,Unnamed: 3_level_1
prcp,30019,2012,0.277723
prcp,30019,2013,0.378371
prcp,30019,2014,0.247598
prcp,30019,2015,0.217586
prcp,30019,2016,0.312571


### NEX

In [2]:
# Historical hindcasts of all models
nex_hind = ["agvar_historical_r1i1p1_ACCESS1-0.csv",
"agvar_historical_r1i1p1_BNU-ESM.csv",
"agvar_historical_r1i1p1_CCSM4.csv",
"agvar_historical_r1i1p1_CESM1-BGC.csv",
"agvar_historical_r1i1p1_CNRM-CM5.csv",
"agvar_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"agvar_historical_r1i1p1_CanESM2.csv",
"agvar_historical_r1i1p1_GFDL-CM3.csv",
"agvar_historical_r1i1p1_GFDL-ESM2G.csv",
"agvar_historical_r1i1p1_GFDL-ESM2M.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-LR.csv",
"agvar_historical_r1i1p1_IPSL-CM5A-MR.csv",
"agvar_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"agvar_historical_r1i1p1_MIROC-ESM.csv",
"agvar_historical_r1i1p1_MIROC5.csv",
"agvar_historical_r1i1p1_MPI-ESM-LR.csv",
"agvar_historical_r1i1p1_MPI-ESM-MR.csv",
"agvar_historical_r1i1p1_MRI-CGCM3.csv",
"agvar_historical_r1i1p1_NorESM1-M.csv",
"agvar_historical_r1i1p1_bcc-csm1-1.csv",
"agvar_historical_r1i1p1_inmcm4.csv"]

nex_proj = ["agvar_rcp85_r1i1p1_ACCESS1-0.csv",
"agvar_rcp85_r1i1p1_BNU-ESM.csv",
"agvar_rcp85_r1i1p1_CCSM4.csv",
"agvar_rcp85_r1i1p1_CESM1-BGC.csv",
"agvar_rcp85_r1i1p1_CNRM-CM5.csv",
"agvar_rcp85_r1i1p1_CSIRO-Mk3-6-0.csv",
"agvar_rcp85_r1i1p1_CanESM2.csv",
"agvar_rcp85_r1i1p1_GFDL-CM3.csv",
"agvar_rcp85_r1i1p1_GFDL-ESM2G.csv",
"agvar_rcp85_r1i1p1_GFDL-ESM2M.csv",
"agvar_rcp85_r1i1p1_IPSL-CM5A-LR.csv",
"agvar_rcp85_r1i1p1_IPSL-CM5A-MR.csv",
"agvar_rcp85_r1i1p1_MIROC-ESM-CHEM.csv",
"agvar_rcp85_r1i1p1_MIROC-ESM.csv",
"agvar_rcp85_r1i1p1_MIROC5.csv",
"agvar_rcp85_r1i1p1_MPI-ESM-LR.csv",
"agvar_rcp85_r1i1p1_MPI-ESM-MR.csv",
"agvar_rcp85_r1i1p1_MRI-CGCM3.csv",
"agvar_rcp85_r1i1p1_NorESM1-M.csv",
"agvar_rcp85_r1i1p1_bcc-csm1-1.csv",
"agvar_rcp85_r1i1p1_inmcm4.csv"]

In [102]:
# HISTORICAL
# Get all NEX models
nex  = pd.read_csv("../../data/ACI_output/raw/nex/" + nex_hind[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = nex.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = nex.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = nex.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : nex_hind[0].replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
nex = temp1.append(temp2).append(temp3)

for name in nex_hind[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/nex/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    nex = pd.merge(nex, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [103]:
# Add ensemble mean
nex["ensemble_mean"] = nex.mean(axis = 1)

In [104]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,2304.698882,2144.184945,2203.868464,...,2163.953122,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434
gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,2250.477755,2220.229067,2272.217969,...,2243.69976,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462
gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,2279.947845,2173.38348,2205.599153,...,2248.739139,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059
gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,2342.238382,2207.901947,2214.32942,...,2264.593954,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658
gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,2223.339429,2114.466966,2254.202858,...,2195.122728,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214


In [105]:
# Merge NEX with GMFD
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [107]:
nex_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2109.770592,2174.381798,2183.688694,2143.70477,2236.25027,2161.552775,2206.542597,...,2354.833273,2241.253801,2160.393831,2233.827017,2233.665415,2266.95628,2202.766778,2195.853563,2208.046434,
1,gdd,1001,1951,2079.099594,2349.450045,2200.962649,2261.496891,2169.984926,2330.687487,2241.795279,...,2266.518324,2238.01953,2214.386724,2023.149438,2133.301591,2305.631215,2222.027783,2158.598503,2218.417462,
2,gdd,1001,1952,2208.289702,2150.458015,2106.791066,2122.460259,2308.626567,2191.618086,2268.738175,...,2291.93998,2194.144646,2270.338141,2076.902692,2338.905386,2072.083338,2262.025682,2149.442342,2205.837059,
3,gdd,1001,1953,2173.759404,2175.372819,2161.596638,2115.286115,2189.850886,2268.893735,2189.080091,...,2265.379467,2202.93606,2137.68463,2166.971385,2149.617036,2139.303773,2230.215083,2291.530978,2205.359658,
4,gdd,1001,1954,2236.753594,2242.716342,2171.605271,2091.608539,2330.817915,2359.93573,2261.907654,...,2219.665457,2183.627715,2209.710871,2164.821387,2279.759508,2121.81937,2230.023707,2053.605975,2205.199214,


In [113]:
# Save
nex_all.to_csv('./output/nex_agvar_hist.csv', index = False)

In [18]:
# PROJECTIONS
# Get all NEX models
nex  = pd.read_csv("../../data/ACI_output/raw/nex/" + nex_proj[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = nex.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = nex.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = nex.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
nex = temp1.append(temp2).append(temp3)

for name in nex_proj[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/nex/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("rcp85_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    nex = pd.merge(nex, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [19]:
# Add ensemble mean
nex["ensemble_mean"] = nex.mean(axis = 1)

In [20]:
nex.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,2006,2325.130059,2286.232581,2307.074297,2320.688738,2232.79922,2401.872415,2202.5667,2389.745686,2391.696876,2255.543814,...,2370.165762,2305.173924,2285.980455,2342.086556,2283.531162,2296.103275,2266.373036,2341.669431,2160.662399,2298.629473
gdd,1001,2007,2271.837926,2356.919469,2317.260985,2463.460594,2223.033869,2335.987557,2258.015144,2254.8485,2531.581088,2113.559232,...,2345.031656,2298.06528,2351.098118,2501.815485,2407.822669,2296.341264,2306.522941,2396.513029,2390.112164,2337.802758
gdd,1001,2008,2205.801395,2338.032948,2350.467949,2365.429182,2345.934722,2310.478919,2479.270641,2301.53128,2368.415332,2330.463835,...,2318.314692,2239.043805,2328.599987,2420.261222,2503.052912,2285.294563,2270.581815,2402.81166,2387.046351,2350.637714
gdd,1001,2009,2211.224021,2272.288928,2379.835081,2422.008058,2203.121539,2382.916302,2277.040993,2181.753889,2351.479814,2298.946033,...,2355.662591,2348.152914,2352.936067,2279.470987,2234.687039,2327.734785,2267.761774,2398.466628,2187.344823,2296.156181
gdd,1001,2010,2209.728179,2279.807493,2357.513542,2404.346112,2177.148911,2520.497502,2288.569497,2338.223941,2351.215833,2169.306921,...,2401.735051,2303.815623,2285.770664,2283.069522,2398.363872,2404.276062,2216.382722,2360.163005,2308.715651,2316.828923


In [21]:
# Merge NEX with GMFD
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [26]:
nex_all.tail()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
1353970,prcp,30019,2001,,,,,,,,...,,,,,,,,,,0.289242
1353971,prcp,30019,2002,,,,,,,,...,,,,,,,,,,0.308968
1353972,prcp,30019,2003,,,,,,,,...,,,,,,,,,,0.240231
1353973,prcp,30019,2004,,,,,,,,...,,,,,,,,,,0.282631
1353974,prcp,30019,2005,,,,,,,,...,,,,,,,,,,0.260183


In [27]:
# Save
nex_all.to_csv('./output/nex_agvar_proj.csv', index = False)

### CMIP

In [28]:
# Hindcasts & projections of all models
cmip_all = ["agvar_ACCESS1-0.historical+rcp85.csv",
"agvar_BNU-ESM.historical+rcp85.csv",
"agvar_CCSM4_historical+rcp85.csv",
"agvar_CESM1-BGC.historical+rcp85.csv",
"agvar_CNRM-CM5.historical+rcp85.csv",
"agvar_CSIRO-Mk3-6-0.historical+rcp85.csv",
"agvar_CanESM2.historical+rcp85.csv",
"agvar_GFDL-CM3.historical+rcp85.csv",
"agvar_GFDL-ESM2G.historical+rcp85.csv",
"agvar_GFDL-ESM2M.historical+rcp85.csv",
"agvar_IPSL-CM5A-LR.historical+rcp85.csv",
"agvar_IPSL-CM5A-MR.historical+rcp85.csv",
"agvar_MIROC-ESM-CHEM.historical+rcp85.csv",
"agvar_MIROC-ESM.historical+rcp85.csv",
"agvar_MIROC5.historical+rcp85.csv",
"agvar_MPI-ESM-LR.historical+rcp85.csv",
"agvar_MPI-ESM-MR.historical+rcp85.csv",
"agvar_MRI-CGCM3.historical+rcp85.csv",
"agvar_NorESM1-M.historical+rcp85.csv",
"agvar_bcc-csm1-1_historical+rcp85.csv",
"agvar_inmcm4.historical+rcp85.csv"]

In [28]:
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/raw/cmip/" + cmip_all[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)

# Split ag variables
temp1 = cmip.drop(columns = ["egdd","prcp"])
temp1["AgVar"] = "gdd"
temp1.rename(columns = {"gdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp2 = cmip.drop(columns = ["gdd","prcp"])
temp2["AgVar"] = "egdd"
temp2.rename(columns = {"egdd" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)

temp3 = cmip.drop(columns = ["gdd","egdd"])
temp3["AgVar"] = "prcp"
temp3.rename(columns = {"prcp" : cmip_all[0].replace(".historical+rcp85","").replace(".csv","").replace("agvar_","")}, inplace = True)
temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

# Join with updated indexing
cmip = temp1.append(temp2).append(temp3)

for name in cmip_all[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/raw/cmip/" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    model = name.replace(".historical+rcp85","").replace(".csv","").replace("agvar_","").replace("_historical+rcp85","")
    
    # Split & join
    temp1 = data.drop(columns = ["egdd","prcp"])
    temp1["AgVar"] = "gdd"
    temp1.rename(columns = {"gdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp1.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp2 = data.drop(columns = ["gdd","prcp"])
    temp2["AgVar"] = "egdd"
    temp2.rename(columns = {"egdd" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp2.set_index(["AgVar", "GEOID", "Year"], inplace = True)
    
    temp3 = data.drop(columns = ["gdd","egdd"])
    temp3["AgVar"] = "prcp"
    temp3.rename(columns = {"prcp" : model.replace("historical_r1i1p1_","").replace(".csv","").replace("agvar_","")}, inplace = True)
    temp3.set_index(["AgVar", "GEOID", "Year"], inplace = True)

    temp = temp1.append(temp2).append(temp3)
    
    # Do the merge
    print("Now merging... " + model)
    cmip = pd.merge(cmip, temp, on = ["AgVar", "GEOID", "Year"], how = "outer")
    print("Merge complete.")

Now merging... BNU-ESM
Merge complete.
Now merging... CCSM4
Merge complete.
Now merging... CESM1-BGC
Merge complete.
Now merging... CNRM-CM5
Merge complete.
Now merging... CSIRO-Mk3-6-0
Merge complete.
Now merging... CanESM2
Merge complete.
Now merging... GFDL-CM3
Merge complete.
Now merging... GFDL-ESM2G
Merge complete.
Now merging... GFDL-ESM2M
Merge complete.
Now merging... IPSL-CM5A-LR
Merge complete.
Now merging... IPSL-CM5A-MR
Merge complete.
Now merging... MIROC-ESM-CHEM
Merge complete.
Now merging... MIROC-ESM
Merge complete.
Now merging... MIROC5
Merge complete.
Now merging... MPI-ESM-LR
Merge complete.
Now merging... MPI-ESM-MR
Merge complete.
Now merging... MRI-CGCM3
Merge complete.
Now merging... NorESM1-M
Merge complete.
Now merging... bcc-csm1-1
Merge complete.
Now merging... inmcm4
Merge complete.


In [93]:
# Add ensemble mean
cmip["ensemble_mean"] = cmip.mean(axis = 1)

In [110]:
cmip.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
AgVar,GEOID,Year,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,1867.806602,1664.26396,1944.34855,...,2338.944396,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209
gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,1899.49826,1893.855123,1865.913943,...,2379.578363,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468
gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,1995.655442,1678.066385,1763.861486,...,2417.045808,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734
gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,2050.906396,1801.231739,1833.24215,...,2443.542263,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665
gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,1926.30831,1510.14475,2015.395665,...,2355.680809,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851


In [111]:
# Merge CMIP with GMFD
cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["AgVar", "GEOID", "Year"], how = 'outer')

In [112]:
cmip_all.head()

Unnamed: 0,AgVar,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,...,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD
0,gdd,1001,1950,2267.175832,2287.555461,2120.185987,2064.444662,2033.532109,1973.663267,2472.745998,...,2665.613566,2358.148552,2052.908103,2219.875546,1811.416517,2058.842476,2303.873728,1716.55905,2122.450209,
1,gdd,1001,1951,2177.585261,2537.017849,2130.017178,2195.435172,1915.095858,2267.903252,2558.112487,...,2579.354271,2360.522876,2107.530068,1897.988467,1693.634247,2052.198305,2359.761689,1636.0425,2144.127468,
2,gdd,1001,1952,2420.979676,2239.258861,2046.755249,2063.649876,2075.135154,1994.597164,2578.596717,...,2590.564064,2306.502782,2244.96515,2049.042541,1893.242987,1843.979126,2413.868131,1668.652587,2130.548734,
3,gdd,1001,1953,2348.566813,2302.861538,2100.558088,2044.20439,1992.914361,2092.60896,2469.529788,...,2573.890973,2311.887288,1992.673818,2144.986851,1709.117958,1939.459152,2378.093016,1833.816947,2134.226665,
4,gdd,1001,1954,2392.682737,2412.767475,2116.639863,2037.716727,2104.555846,2225.417175,2565.672882,...,2472.930624,2305.516223,2010.2166,2151.260526,1904.98298,1903.273252,2375.582297,1578.71718,2129.384851,


In [114]:
cmip_all.to_csv('./output/cmip_agvar_all.csv', index = False)

## Yields

### USDA

In [4]:
# Read in USDA data
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 1960 and Year <= 2005')
usda = usda.filter(['USDA'])

In [5]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1960,0.074776
1001,1961,0.162394
1001,1962,-0.130232
1001,1963,0.188834
1001,1964,0.056579


In [6]:
# Subtract means
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')

In [7]:
usda.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,USDA
GEOID,Year,Unnamed: 2_level_1
1001,1960,0.155557
1001,1961,0.243176
1001,1962,-0.049451
1001,1963,0.269616
1001,1964,0.137361


In [8]:
usda.mean(level = 'GEOID')

Unnamed: 0_level_0,USDA
GEOID,Unnamed: 1_level_1
01001,1.448117e-17
01003,9.352422e-18
01005,3.016910e-17
01007,1.110223e-17
01009,-8.145658e-18
...,...
56037,0.000000e+00
56039,0.000000e+00
56041,0.000000e+00
56043,5.430439e-18


In [14]:
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_06-16_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

In [15]:
gmfd.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,GMFD
GEOID,Year,Unnamed: 2_level_1
1001,2006,-0.289972
1001,2007,-0.4067
1001,2008,0.159519
1001,2009,0.330444
1001,2010,-0.330964


In [16]:
gmfd.mean(level = 'GEOID')

Unnamed: 0_level_0,GMFD
GEOID,Unnamed: 1_level_1
01001,1.034526e-16
01003,-1.103915e-16
01005,-4.062407e-16
01007,3.229740e-16
01009,-4.466124e-16
...,...
56033,
56035,
56037,
56039,


### NEX historical

In [17]:
nex_hist = ["yield_historical_r1i1p1_ACCESS1-0.csv",
"yield_historical_r1i1p1_BNU-ESM.csv",
"yield_historical_r1i1p1_CCSM4.csv",
"yield_historical_r1i1p1_CESM1-BGC.csv",
"yield_historical_r1i1p1_CNRM-CM5.csv",
"yield_historical_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_historical_r1i1p1_CanESM2.csv",
"yield_historical_r1i1p1_GFDL-CM3.csv",
"yield_historical_r1i1p1_GFDL-ESM2G.csv",
"yield_historical_r1i1p1_GFDL-ESM2M.csv",
"yield_historical_r1i1p1_IPSL-CM5A-LR.csv",
"yield_historical_r1i1p1_IPSL-CM5A-MR.csv",
"yield_historical_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_historical_r1i1p1_MIROC-ESM.csv",
"yield_historical_r1i1p1_MIROC5.csv",
"yield_historical_r1i1p1_MPI-ESM-LR.csv",
"yield_historical_r1i1p1_MPI-ESM-MR.csv",
"yield_historical_r1i1p1_MRI-CGCM3.csv",
"yield_historical_r1i1p1_NorESM1-M.csv",
"yield_historical_r1i1p1_bcc-csm1-1.csv",
"yield_historical_r1i1p1_inmcm4.csv"]

In [19]:
# HISTORICAL
# Get nex models
nex  = pd.read_csv("../../data/ACI_output/final/NEX/res_60-05_" + nex_hist[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex = nex[nex.Year <= 2005]
nex.rename(columns = {"yield" : nex_hist[0].replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_hist[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_60-05_" + name)
    data = data[data.Year <= 2005]
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("historical_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

Read in: BNU-ESM. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 4)
Read in: CCSM4. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 5)
Read in: CESM1-BGC. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 6)
Read in: CNRM-CM5. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 7)
Read in: CSIRO-Mk3-6-0. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 8)
Read in: CanESM2. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 9)
Read in: GFDL-CM3. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 10)
Read in: GFDL-ESM2G. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 11)
Read in: GFDL-ESM2M. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 12)
Read in: IPSL-CM5A-LR. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 13)
Read in: IPSL-CM5A-MR. Shape: (143290, 3). Merging now...

In [20]:
nex.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
56045,2001,0.077687,0.001563,0.07549,-0.133914,0.034538,0.011546,0.087074,0.130679,0.018522,-0.112396,...,0.067212,-0.21556,-0.053178,-0.047478,0.000876,-0.130958,-0.163688,-0.095565,-0.221294,-0.028458
56045,2002,0.124202,0.084463,-0.331387,0.037171,-0.142468,-0.158613,-0.002364,-0.012855,0.124621,-0.048872,...,0.0869,0.060119,0.111346,0.108313,0.063748,0.146645,0.108633,-0.375386,-0.024069,0.000885
56045,2003,-0.028533,-0.180581,0.129418,0.00868,0.056355,0.143647,0.127229,-0.011161,0.065953,0.018306,...,-0.227386,-0.011049,-0.049419,-0.109679,-0.079253,0.062367,-0.021333,0.024789,0.143029,0.013301
56045,2004,0.022366,-0.10394,0.14957,0.024173,0.126783,0.161166,-0.037644,0.07441,0.106684,0.087852,...,0.040026,-0.027851,-0.146493,-0.028835,0.124466,-0.011507,-0.119131,-0.038735,0.114306,0.032052
56045,2005,-0.045094,0.020878,0.019304,-0.345306,0.149339,-0.077876,0.084795,-0.026444,0.096967,0.103277,...,-0.102186,0.109887,-0.027287,0.158981,-0.154625,-0.380159,-0.061328,-0.107839,-0.0327,-0.018493


In [14]:
# Merge CMIP with GMFD and USDA
nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
nex_all = pd.merge(nex_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [15]:
nex_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,1960,0.048327,0.183922,0.370325,0.236384,0.592613,0.384332,0.281084,0.281008,...,-0.406983,0.219937,0.429245,0.310112,0.326478,-0.373064,0.311170,0.191710,-0.149831,0.155557
1,01001,1961,0.212679,-0.070401,0.387145,0.196735,0.159242,0.392873,0.166964,-0.110233,...,-0.083515,-0.327861,-0.196735,0.283416,0.271092,-0.314018,-0.756463,0.014843,0.231540,0.243176
2,01001,1962,0.110997,0.205748,0.365178,-0.182600,-0.019531,-0.233259,0.057558,0.488140,...,0.021881,0.442699,-0.113261,0.227891,0.074869,0.067798,0.164327,0.105540,-0.489502,-0.049451
3,01001,1963,0.019497,0.254117,-0.164712,-0.187798,0.656209,-0.159989,0.392742,-0.002031,...,-0.156791,0.475829,0.079900,0.111615,0.246271,0.298552,0.184275,0.102313,-0.091325,0.269616
4,01001,1964,-0.932580,0.160052,0.032009,0.070877,-0.665175,0.065082,0.300072,-0.441497,...,-0.399045,0.466572,-0.460163,-0.356704,-0.099736,0.296367,0.331374,-0.023581,0.108570,0.137361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127308,56045,1986,0.150280,-0.084658,-0.392213,0.144139,-0.108251,0.141531,0.037702,-0.280754,...,-0.044985,0.047177,-0.012537,0.034856,-0.030018,0.092410,-0.016909,-0.005762,-0.053967,0.197197
127317,56045,1995,0.005377,-0.080288,0.128554,-0.243632,0.065685,0.092161,-0.036130,-0.046765,...,0.023971,0.000907,0.169949,-0.014050,0.044635,-0.005166,0.103064,0.007025,-0.108794,0.264533
127318,56045,1996,0.143051,-0.126556,0.135943,0.113642,0.041388,0.057584,0.153595,0.095094,...,0.066611,0.047061,-0.022398,0.171213,0.008678,-0.088593,-0.007120,0.014983,-0.133737,-0.464434
127319,56045,1997,-0.190685,-0.045044,0.082208,-0.184289,-0.266766,-0.320167,-0.129586,0.015968,...,-0.219296,-0.025076,-0.096130,-0.050175,0.014664,0.003181,0.071570,-0.083009,-0.090742,0.275241


In [16]:
# Save with correct name
nex_all.to_csv('./output/nex_yield_60-05.csv', index = False)

## NEX projection

In [21]:
nex_proj = ["yield_rcp85_r1i1p1_ACCESS1-0.csv",
"yield_rcp85_r1i1p1_BNU-ESM.csv",
"yield_rcp85_r1i1p1_CCSM4.csv",
"yield_rcp85_r1i1p1_CESM1-BGC.csv",
"yield_rcp85_r1i1p1_CNRM-CM5.csv",
"yield_rcp85_r1i1p1_CSIRO-Mk3-6-0.csv",
"yield_rcp85_r1i1p1_CanESM2.csv",
"yield_rcp85_r1i1p1_GFDL-CM3.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2G.csv",
"yield_rcp85_r1i1p1_GFDL-ESM2M.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-LR.csv",
"yield_rcp85_r1i1p1_IPSL-CM5A-MR.csv",
"yield_rcp85_r1i1p1_MIROC-ESM-CHEM.csv",
"yield_rcp85_r1i1p1_MIROC-ESM.csv",
"yield_rcp85_r1i1p1_MIROC5.csv",
"yield_rcp85_r1i1p1_MPI-ESM-LR.csv",
"yield_rcp85_r1i1p1_MPI-ESM-MR.csv",
"yield_rcp85_r1i1p1_MRI-CGCM3.csv",
"yield_rcp85_r1i1p1_NorESM1-M.csv",
"yield_rcp85_r1i1p1_bcc-csm1-1.csv",
"yield_rcp85_r1i1p1_inmcm4.csv"]

In [38]:
# Get nex models PROJECTION  or OUT OF SAMPLE
nex = pd.read_csv("../../data/ACI_output/final/NEX/res_50-80_" + nex_proj[0])
nex["GEOID"] = nex["GEOID"].astype(str).str.zfill(5)
nex.rename(columns = {"yield" : nex_proj[0].replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in nex_proj[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/NEX/res_50-80_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace("rcp85_r1i1p1_","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    nex = pd.merge(nex, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(nex.shape))
    
# Drop NaNs and zeros (they are all at the same location)
nex.dropna(inplace = True)
nex = nex[nex.inmcm4 != 0]

# Add ensemble mean
nex.set_index(["GEOID", "Year"], inplace = True)
nex["ensemble_mean"] = nex.mean(axis = 1)

Read in: BNU-ESM. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 4)
Read in: CCSM4. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 5)
Read in: CESM1-BGC. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 6)
Read in: CNRM-CM5. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 7)
Read in: CSIRO-Mk3-6-0. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 8)
Read in: CanESM2. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 9)
Read in: GFDL-CM3. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 10)
Read in: GFDL-ESM2G. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 11)
Read in: GFDL-ESM2M. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 12)
Read in: IPSL-CM5A-LR. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 13)
Read in: IPSL-CM5A-MR. Shape: (96565, 3). Merging now...
Merge complete. New 

In [39]:
nex.tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
56045,2076,0.012722,-0.184722,0.111147,0.117322,0.109522,0.063714,-0.111913,0.176205,0.122086,-0.223102,...,-0.165135,0.029456,-0.132345,-0.170179,-0.020075,0.005449,0.020227,-0.131642,-0.088906,-0.013145
56045,2077,-0.005953,-0.117627,0.191353,-0.03702,-0.036728,-0.122015,-0.15187,-0.080831,0.114526,0.065434,...,-0.060901,0.083072,0.036012,-0.177134,-0.0635,0.04588,0.169514,0.002849,0.081789,0.000981
56045,2078,-0.140879,-0.055702,-0.283104,-0.170867,0.176075,0.164027,0.049802,0.062649,-0.183178,-0.42545,...,-0.07262,0.106104,0.013955,0.204995,-1.171039,-0.065763,0.099022,-0.120372,-0.052731,-0.082278
56045,2079,-0.007163,0.142942,-0.651166,0.047597,0.213991,-0.12641,-0.139912,-0.069974,-0.057296,0.156535,...,-0.029579,0.151579,0.002659,0.198013,0.089751,0.224184,0.268129,0.056529,-0.077744,0.006996
56045,2080,0.030507,-0.180321,-0.036991,0.055758,0.041431,0.029982,-0.05584,0.036269,-0.088157,0.00982,...,0.057043,0.143995,0.111922,0.189195,0.08503,0.087456,0.117165,0.021226,0.02408,0.0314


In [24]:
# Merge CMIP with GMFD and USDA ###################### OUT OF SAMPLE ONLY ################
# USDA
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 2006 and Year <= 2016')
usda = usda.filter(['USDA'])
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')
# GMFD
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_06-16_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

nex_all = pd.merge(nex.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
nex_all = pd.merge(nex_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [25]:
nex_all.head()

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,1001,2006,-0.042378,0.205446,0.436286,0.125913,-0.190837,-0.429734,0.353472,0.442885,...,0.050294,-0.216548,-0.078212,-0.160682,0.084927,0.262343,0.245234,0.112301,-0.289972,-0.144302
1,1001,2007,0.193014,-0.117178,-0.270864,-0.119949,0.162198,0.784355,0.389557,-0.081497,...,0.216971,-0.685225,-0.457464,0.139935,0.147569,-0.062713,-0.268691,0.062908,-0.4067,-0.688478
2,1001,2008,0.377047,-0.072319,-0.01618,-0.543021,-0.135405,0.089469,-0.088069,0.116119,...,0.248019,-0.847303,0.029712,-0.18547,-0.021398,0.155409,0.143068,-0.02524,0.159519,0.223311
3,1001,2009,0.35547,0.405216,0.258911,0.527351,0.166474,0.255105,-0.367187,0.657656,...,-0.075808,0.46754,-0.323911,-0.010951,0.201354,0.088138,0.469699,0.155954,0.330444,0.000827
4,1001,2010,0.171541,0.304568,0.122573,0.200644,0.303106,-0.381976,0.41995,-0.889544,...,0.070516,-0.078061,-0.393313,-0.042999,0.258524,0.366379,-1.167724,0.011848,-0.330964,-0.013974


In [40]:
nex_all.to_csv('./output/nex_yield_50-80.csv', index=False)

### CMIP

In [27]:
cmip_names = ["yield_ACCESS1-0.historical+rcp85.csv",
"yield_BNU-ESM.historical+rcp85.csv",
"yield_CCSM4_historical+rcp85.csv",
"yield_CESM1-BGC.historical+rcp85.csv",
"yield_CNRM-CM5.historical+rcp85.csv",
"yield_CSIRO-Mk3-6-0.historical+rcp85.csv",
"yield_CanESM2.historical+rcp85.csv",
"yield_GFDL-CM3.historical+rcp85.csv",
"yield_GFDL-ESM2G.historical+rcp85.csv",
"yield_GFDL-ESM2M.historical+rcp85.csv",
"yield_IPSL-CM5A-LR.historical+rcp85.csv",
"yield_IPSL-CM5A-MR.historical+rcp85.csv",
"yield_MIROC-ESM-CHEM.historical+rcp85.csv",
"yield_MIROC-ESM.historical+rcp85.csv",
"yield_MIROC5.historical+rcp85.csv",
"yield_MPI-ESM-LR.historical+rcp85.csv",
"yield_MPI-ESM-MR.historical+rcp85.csv",
"yield_MRI-CGCM3.historical+rcp85.csv",
"yield_NorESM1-M.historical+rcp85.csv",
"yield_bcc-csm1-1_historical+rcp85.csv",
"yield_inmcm4.historical+rcp85.csv"]

In [28]:
# HISTORICAL
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_60-05_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip = cmip.query('Year >= 1960 and Year <= 2005')
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_60-05_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    data = data.query('Year >= 1960 and Year <= 2005')
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 4)
Read in: CCSM4. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 5)
Read in: CESM1-BGC. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 6)
Read in: CNRM-CM5. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 7)
Read in: CSIRO-Mk3-6-0. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 8)
Read in: CanESM2. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 9)
Read in: GFDL-CM3. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 10)
Read in: GFDL-ESM2G. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 11)
Read in: GFDL-ESM2M. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 12)
Read in: IPSL-CM5A-LR. Shape: (143290, 3). Merging now...
Merge complete. New shape: (143290, 13)
Read in: IPSL-CM5A-MR. Shape: (143290, 3). Merging now...

In [29]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,1960,0.031431,0.137308,0.303418,0.127891,0.587930,0.517215,0.452640,0.179814,-0.116597,0.228263,...,0.308475,0.103410,-0.404563,0.103805,0.507671,0.095694,0.100254,-0.633677,0.208773,0.145706
01001,1961,0.290491,-0.044347,0.287446,0.135537,0.119941,0.456225,0.361735,0.012181,-0.071905,0.597162,...,0.298559,0.001844,-0.108023,-0.116611,-0.159876,0.100398,0.058861,-0.653024,-0.973155,-0.002257
01001,1962,0.104367,0.282827,0.278351,-0.089817,0.048418,-0.197852,0.133902,0.402963,0.405823,-0.073341,...,0.481256,-0.115897,0.029384,0.280434,-0.201643,0.113143,0.151304,0.208978,0.161480,0.101278
01001,1963,0.085830,0.337405,-0.061774,-0.106296,0.549104,-0.118054,0.869874,0.108349,0.234902,-1.135854,...,0.477776,-0.191648,-0.178951,0.288157,0.074325,-0.029744,0.043890,0.638725,0.101639,0.096593
01001,1964,-1.093267,0.171420,-0.034627,0.019757,-1.034017,0.137688,0.663294,-0.395230,0.051556,-0.077829,...,0.137765,0.348038,-0.343187,0.192552,-0.296134,0.027077,0.010837,0.628000,0.227590,-0.010806
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2001,0.234378,-0.132148,0.072099,-0.093896,0.010739,0.062711,0.057440,0.488846,0.212468,-0.285066,...,0.109197,-0.713834,-0.051882,-0.006561,-0.007472,-0.245081,-0.728438,-0.322246,-0.528991,-0.085041
56045,2002,0.373725,0.367072,-0.411699,0.040501,-0.486613,-0.437072,0.013313,0.000762,0.354108,0.122983,...,0.419910,0.247555,0.271819,0.379349,0.089632,0.337902,0.235078,-0.802379,0.193160,0.067896
56045,2003,-0.200340,-0.944108,0.208245,-0.018343,0.190291,0.400155,0.141266,-0.018604,0.286719,0.107324,...,-0.791168,-0.310756,-0.157490,-0.066029,-0.485121,0.059409,0.005524,0.009907,0.294853,-0.041806
56045,2004,0.134449,-0.750756,0.208608,0.043009,0.367972,0.303516,0.035525,0.362262,0.329895,0.385258,...,0.391222,-0.224986,-0.317342,-0.103563,0.506754,-0.065253,-0.401179,0.003121,0.250261,0.084596


In [20]:
# Merge CMIP with GMFD and USDA
cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
cmip_all = pd.merge(cmip_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [21]:
cmip_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,1960,0.031431,0.137308,0.303418,0.127891,0.587930,0.517215,0.452640,0.179814,...,-0.404563,0.103805,0.507671,0.095694,0.100254,-0.633677,0.208773,0.145706,-0.149831,0.155557
1,01001,1961,0.290491,-0.044347,0.287446,0.135537,0.119941,0.456225,0.361735,0.012181,...,-0.108023,-0.116611,-0.159876,0.100398,0.058861,-0.653024,-0.973155,-0.002257,0.231540,0.243176
2,01001,1962,0.104367,0.282827,0.278351,-0.089817,0.048418,-0.197852,0.133902,0.402963,...,0.029384,0.280434,-0.201643,0.113143,0.151304,0.208978,0.161480,0.101278,-0.489502,-0.049451
3,01001,1963,0.085830,0.337405,-0.061774,-0.106296,0.549104,-0.118054,0.869874,0.108349,...,-0.178951,0.288157,0.074325,-0.029744,0.043890,0.638725,0.101639,0.096593,-0.091325,0.269616
4,01001,1964,-1.093267,0.171420,-0.034627,0.019757,-1.034017,0.137688,0.663294,-0.395230,...,-0.343187,0.192552,-0.296134,0.027077,0.010837,0.628000,0.227590,-0.010806,0.108570,0.137361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
127308,56045,1986,0.531851,-1.010490,-0.599647,0.284118,-0.253619,0.376313,0.036765,-0.982551,...,-0.251518,0.001504,0.004778,0.028228,-0.194586,0.299956,0.189467,-0.030065,-0.053967,0.197197
127317,56045,1995,0.233666,-0.450719,0.327382,-0.464902,0.093476,0.102781,0.084355,-0.135190,...,0.124742,0.065909,0.497504,0.002513,-0.052036,-0.023143,0.309330,0.032145,-0.108794,0.264533
127318,56045,1996,0.485600,-0.397078,0.205458,0.238812,-0.040301,0.230970,0.191649,0.279585,...,0.202919,0.110796,-0.032399,0.483845,0.112119,-0.324266,0.028336,0.029948,-0.133737,-0.464434
127319,56045,1997,-0.468468,-0.259839,0.222731,-0.346112,-0.665106,-0.438127,-0.158184,0.067850,...,-0.696502,0.166618,-0.356890,-0.036136,0.184272,0.029618,0.315184,-0.210845,-0.090742,0.275241


In [22]:
# Save with correct timestamp
cmip_all.to_csv('./output/cmip_yield_60-05.csv', index = False)

In [30]:
# OUT OF SAMPLE
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_06-16_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_06-16_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 4)
Read in: CCSM4. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 5)
Read in: CESM1-BGC. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 6)
Read in: CNRM-CM5. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 7)
Read in: CSIRO-Mk3-6-0. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 8)
Read in: CanESM2. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 9)
Read in: GFDL-CM3. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 10)
Read in: GFDL-ESM2G. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 11)
Read in: GFDL-ESM2M. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 12)
Read in: IPSL-CM5A-LR. Shape: (34265, 3). Merging now...
Merge complete. New shape: (34265, 13)
Read in: IPSL-CM5A-MR. Shape: (34265, 3). Merging now...
Merge complete. New 

In [31]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,2006,-0.220143,0.124182,0.328091,0.058289,-0.250438,-0.646093,0.766967,0.364886,0.855463,-0.174979,...,0.466312,0.494943,0.044286,-0.064556,-0.169220,-0.073524,0.019061,0.398731,0.177611,0.111644
01001,2007,0.143829,-0.090878,-0.187423,-0.105788,0.199842,0.955614,0.718365,-0.194099,-0.420654,0.697682,...,0.198317,0.538061,0.262722,-0.189748,-0.287250,0.038751,0.079615,-0.263620,0.052158,0.120333
01001,2008,0.609755,-0.129021,-0.034365,-0.367265,-0.370017,0.084100,-0.282960,-0.123102,-0.108539,-0.708182,...,-0.025620,0.433002,0.231891,-0.518526,0.194864,-0.056657,-0.047707,0.235838,0.217303,-0.010389
01001,2009,0.506301,0.453328,0.144900,0.375322,0.163591,0.209542,-0.678484,0.563534,-0.254411,0.815493,...,-0.013193,-0.103665,-0.046833,0.317458,-0.385430,-0.029242,0.017138,0.105317,0.135864,0.110270
01001,2010,0.155410,0.338102,0.036502,0.175193,0.309054,-0.456775,0.939846,-0.726768,0.341716,0.736276,...,-0.101656,0.235876,0.116839,-0.167133,-0.383601,0.037125,0.037135,0.984472,-0.687668,0.095563
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2012,-0.029103,0.847468,0.056793,-0.107929,0.064731,-0.237963,0.126756,-0.069656,0.150881,1.233558,...,-0.198152,0.531603,0.041216,0.306977,0.054177,-0.068193,0.208532,-0.143126,0.393875,0.155174
56045,2013,-0.030761,1.001947,0.295379,-0.064988,0.247877,-0.554931,0.207604,0.249131,-0.504420,0.157911,...,0.446414,-0.145439,-0.381803,-0.203823,0.311110,0.155653,-0.229305,0.091355,-0.633848,0.017511
56045,2014,-0.410384,0.273973,0.130462,-0.618726,-0.173726,-0.024686,0.129058,0.275035,-0.065384,-1.386236,...,-0.769400,-0.316248,0.171144,0.256325,-0.071330,-0.348338,0.111646,0.350040,0.348383,-0.096097
56045,2015,-0.384938,0.309396,-0.343232,-0.047996,0.130683,-0.018149,0.162485,-0.075635,-0.493195,-2.522699,...,0.407717,-0.026703,0.262211,-0.324417,0.122669,0.053265,-0.228030,-0.358648,0.117968,-0.157756


In [32]:
# Merge CMIP with GMFD and USDA ###################### OUT OF SAMPLE ONLY ################
# USDA
usda = pd.read_csv("../../data/USDA/final/USDA_county_yields_w_county_quad_trends.csv")
usda["state_fips_code"] = usda["state_fips_code"].astype(str).str.zfill(2)
usda["county_code"] = usda["county_code"].astype(str).str.zfill(3)
usda["GEOID"] = usda["state_fips_code"] + usda["county_code"]
usda["GEOID"] = usda["GEOID"].astype(str).str.zfill(5)
usda = usda.rename(columns = {'year' : 'Year', 'target_Value' : 'USDA'})
usda.sort_values(by = ['GEOID', 'Year'], inplace=True)
usda.set_index(["GEOID", "Year"], inplace = True)
usda = usda.query('Year >= 2006 and Year <= 2016')
usda = usda.filter(['USDA'])
usda['USDA'] = usda['USDA'] - usda['USDA'].mean(level = 'GEOID')
# GMFD
# Read in GMFD data
gmfd = pd.read_csv('../../data/ACI_output/final/GMFD/res_yield_06-16_gmfd.csv')
gmfd["GEOID"] = gmfd["GEOID"].astype(str).str.zfill(5)
gmfd.set_index(["GEOID", "Year"], inplace = True)

cmip_all = pd.merge(cmip.reset_index(), gmfd.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()
cmip_all = pd.merge(cmip_all, usda.reset_index(), on = ["GEOID", "Year"], how = 'outer').dropna()

In [33]:
cmip_all

Unnamed: 0,GEOID,Year,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,...,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean,GMFD,USDA
0,01001,2006,-0.220143,0.124182,0.328091,0.058289,-0.250438,-0.646093,0.766967,0.364886,...,0.044286,-0.064556,-0.169220,-0.073524,0.019061,0.398731,0.177611,0.111644,-0.289972,-0.144302
1,01001,2007,0.143829,-0.090878,-0.187423,-0.105788,0.199842,0.955614,0.718365,-0.194099,...,0.262722,-0.189748,-0.287250,0.038751,0.079615,-0.263620,0.052158,0.120333,-0.406700,-0.688478
2,01001,2008,0.609755,-0.129021,-0.034365,-0.367265,-0.370017,0.084100,-0.282960,-0.123102,...,0.231891,-0.518526,0.194864,-0.056657,-0.047707,0.235838,0.217303,-0.010389,0.159519,0.223311
3,01001,2009,0.506301,0.453328,0.144900,0.375322,0.163591,0.209542,-0.678484,0.563534,...,-0.046833,0.317458,-0.385430,-0.029242,0.017138,0.105317,0.135864,0.110270,0.330444,0.000827
4,01001,2010,0.155410,0.338102,0.036502,0.175193,0.309054,-0.456775,0.939846,-0.726768,...,0.116839,-0.167133,-0.383601,0.037125,0.037135,0.984472,-0.687668,0.095563,-0.330964,-0.013974
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30434,56043,2014,-0.370064,0.961779,0.128213,-0.066468,-1.838784,0.018521,0.366118,1.029256,...,0.492434,0.358090,-0.426379,-1.301267,0.580298,0.850231,-0.962142,-0.286984,0.026353,-0.077102
30435,56043,2015,-0.115578,1.121947,-1.232922,-1.036280,0.583945,-0.130015,0.093222,0.169224,...,0.478924,-1.377003,0.098253,0.215011,-0.666152,-0.926433,0.250949,-0.320702,-0.037886,-0.026397
30437,56045,2006,-0.064623,-1.502687,0.008494,0.331907,0.329026,0.056378,-0.354639,-0.344311,...,0.170375,0.058777,-0.313382,0.084448,0.231339,-0.038124,-0.044027,-0.015494,0.068632,0.123523
30438,56045,2007,0.246664,-0.034859,0.193733,0.311396,-0.037388,-0.020963,0.127932,0.219276,...,-0.044946,0.515870,-0.068399,-0.196962,-0.291720,-0.099366,-0.363743,0.068468,0.016903,0.302333


In [34]:
# Save with timestamp
cmip_all.to_csv('./output/cmip_yield_06-16.csv', index=False)

In [35]:
# PROJECTION
# Get cmip models
cmip  = pd.read_csv("../../data/ACI_output/final/CMIP/res_50-80_" + cmip_names[0])
cmip["GEOID"] = cmip["GEOID"].astype(str).str.zfill(5)
cmip.rename(columns = {"yield" : cmip_names[0].replace(".historical+rcp85","").replace(".csv","").replace("yield_","")}, inplace = True)

for name in cmip_names[1:]:
    # Read in product
    data = pd.read_csv("../../data/ACI_output/final/CMIP/res_50-80_" + name)
    data["GEOID"] = data["GEOID"].astype(str).str.zfill(5)
    # Model name
    model = name.replace(".historical+rcp85","").replace("_historical+rcp85","").replace(".csv","").replace("yield_","")
    data.rename(columns = {"yield" : model}, inplace = True)
    # Do the merge
    print("Read in: " + model + ". Shape: " + str(data.shape) + ". Merging now...")
    cmip = pd.merge(cmip, data, on = ["GEOID", "Year"], how = "outer")
    print("Merge complete. New shape: " + str(cmip.shape))
    
# Drop NaNs and zeros (they are all at the same location)
cmip.dropna(inplace = True)
cmip = cmip[cmip.inmcm4 != 0]

# Add ensemble mean
cmip.set_index(["GEOID", "Year"], inplace = True)
cmip["ensemble_mean"] = cmip.mean(axis = 1)

Read in: BNU-ESM. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 4)
Read in: CCSM4. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 5)
Read in: CESM1-BGC. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 6)
Read in: CNRM-CM5. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 7)
Read in: CSIRO-Mk3-6-0. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 8)
Read in: CanESM2. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 9)
Read in: GFDL-CM3. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 10)
Read in: GFDL-ESM2G. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 11)
Read in: GFDL-ESM2M. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 12)
Read in: IPSL-CM5A-LR. Shape: (96565, 3). Merging now...
Merge complete. New shape: (96565, 13)
Read in: IPSL-CM5A-MR. Shape: (96565, 3). Merging now...
Merge complete. New 

In [36]:
cmip

Unnamed: 0_level_0,Unnamed: 1_level_0,ACCESS1-0,BNU-ESM,CCSM4,CESM1-BGC,CNRM-CM5,CSIRO-Mk3-6-0,CanESM2,GFDL-CM3,GFDL-ESM2G,GFDL-ESM2M,...,MIROC-ESM-CHEM,MIROC-ESM,MIROC5,MPI-ESM-LR,MPI-ESM-MR,MRI-CGCM3,NorESM1-M,bcc-csm1-1,inmcm4,ensemble_mean
GEOID,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
01001,2050,0.279605,0.036413,0.688401,0.551298,0.969854,0.830950,1.599010,0.343057,0.032337,1.225600,...,1.780440,0.656500,0.345138,0.815563,0.401106,-0.025718,0.404430,1.071627,0.216552,0.685968
01001,2051,1.126119,0.139396,0.388746,0.513427,-0.643256,0.962464,-0.231359,0.208508,-0.098745,1.493623,...,-0.657779,1.853141,0.075231,0.968644,0.962912,-0.110020,0.344750,0.798491,0.586248,0.463355
01001,2052,-0.003248,0.886794,0.606738,0.760754,-0.008856,0.428426,0.897634,0.526529,-1.793935,-1.150766,...,0.836044,2.012735,-0.083677,0.408108,0.916646,0.090694,0.156043,0.623070,0.579407,0.388080
01001,2053,0.935517,0.196194,-0.315870,0.287216,-0.514021,0.749947,0.296678,-0.384693,0.336974,1.065993,...,1.067890,0.996269,-0.135531,-0.001389,-0.074111,-0.090267,0.467697,0.608897,0.590418,0.315574
01001,2054,1.048099,0.105952,0.605420,0.230686,0.742886,-0.977000,1.297241,0.394890,1.426202,-0.596461,...,1.121405,1.077081,-0.161128,0.649054,0.297873,0.015362,0.164602,-1.410425,0.166269,0.352461
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56045,2076,-0.159170,-0.408267,0.286294,0.214213,0.161945,0.116298,-0.047377,0.441142,0.569005,-0.293941,...,-0.186546,0.222644,-0.380717,-0.254728,0.158267,-0.059326,0.014170,-0.184825,-0.477107,-0.000160
56045,2077,-0.255052,-0.548205,0.393346,-0.152581,-0.062304,-0.297319,-0.039098,-0.225026,0.621180,0.085074,...,-0.031175,0.351572,0.026490,-0.621952,-0.264973,0.143926,0.313631,0.074959,0.497913,0.006805
56045,2078,-0.133498,-0.301078,-0.576426,-0.189972,0.461917,0.396882,0.052484,0.174209,-0.458643,-1.730151,...,-0.186721,0.542501,-0.115153,0.599543,-2.296781,-0.117356,0.300685,-0.206100,-0.437902,-0.187768
56045,2079,-0.115161,0.746012,-0.835547,0.082160,0.528014,-0.358498,-0.114551,-0.376879,-0.135903,0.716101,...,-0.092988,0.522525,-0.047229,0.542055,0.403570,0.552810,0.720751,0.136452,-0.644747,0.087737


In [37]:
# Save with timestamp
cmip.to_csv('./output/cmip_yield_50-80.csv')