In [3]:
import rasterio as rio
import matplotlib.pyplot as plt 
from matplotlib.colors import Normalize
import numpy as np
import numpy.matlib
from scipy import interpolate
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats as st
import scipy
import os, sys, pickle, gzip
import datetime
import geopy.distance
import xarray as xr
import pandas as pd
import geopandas as gpd
import shapely.geometry
import shapely.ops
import cartopy
import cartopy.crs as ccrs
from cartopy.io.shapereader import Reader
from cartopy.feature import ShapelyFeature
import itertools
import random
import metpy
from metpy.plots import USCOUNTIES

import warnings
warnings.filterwarnings('ignore')

dirAgData = '/home/edcoffel/drive/MAX-Filer/Research/Climate-01/Personal-F20/edcoffel-F20/data/projects/ag-land-climate'

In [4]:
run ../util/setupConsole_su

In [5]:
# low and high temps for gdd/kdd calcs, taken from Butler, et al, 2015, ERL
t_low = 9
t_high = 29

crop = 'Maize'
wxData = 'era5'

useTrendMethod = True

yearRange = [1981, 2019]

In [6]:
sacksLat = np.linspace(90, -90, 360)
sacksLon = np.linspace(0, 360, 720)

In [7]:
faoIrrigationFile = '%s/fao-irrigation/gmia_v5_aei_pct.asc'%dirAgData
irrigationData = np.genfromtxt(faoIrrigationFile, skip_header=6)
irrigationLat = np.linspace(90,-90,irrigationData.shape[0])
irrigationLon = np.linspace(-180,180,irrigationData.shape[1])
irrigationLon[irrigationLon<0] += 360

In [8]:
sacksMaizeNc = xr.open_dataset('%s/sacks/Maize.crop.calendar.fill.nc'%dirAgData)
sacksMaizeStart = sacksMaizeNc['plant'].values + 1
sacksMaizeStart = np.roll(sacksMaizeStart, -int(sacksMaizeStart.shape[1]/2), axis=1)
sacksMaizeStart[sacksMaizeStart < 0] = np.nan
sacksMaizeEnd = sacksMaizeNc['harvest'].values + 1
sacksMaizeEnd = np.roll(sacksMaizeEnd, -int(sacksMaizeEnd.shape[1]/2), axis=1)
sacksMaizeEnd[sacksMaizeEnd < 0] = np.nan

sacksLat = np.linspace(90, -90, 360)
sacksLon = np.linspace(0, 360, 720)

sacksSoybeanNc = xr.open_dataset('%s/sacks/Soybeans.crop.calendar.fill.nc'%dirAgData)
sacksSoybeanStart = sacksSoybeanNc['plant'].values + 1
sacksSoybeanStart = np.roll(sacksSoybeanStart, -int(sacksSoybeanStart.shape[1]/2), axis=1)
sacksSoybeanStart[sacksSoybeanStart < 0] = np.nan
sacksSoybeanEnd = sacksSoybeanNc['harvest'].values + 1
sacksSoybeanEnd = np.roll(sacksSoybeanEnd, -int(sacksSoybeanEnd.shape[1]/2), axis=1)
sacksSoybeanEnd[sacksSoybeanEnd < 0] = np.nan


In [9]:
# load NASS yield data

nassCountyArea = pd.read_csv('%s/nass/maize-county-area.csv'%dirAgData)

nassMaizePlantedArea1 = pd.read_csv('%s/nass/maize-area-planted-1961-1974.csv'%dirAgData)
nassMaizePlantedArea2 = pd.read_csv('%s/nass/maize-area-planted-1975-1994.csv'%dirAgData)
nassMaizePlantedArea3 = pd.read_csv('%s/nass/maize-area-planted-1995-2019.csv'%dirAgData)
nassMaizePlantedArea = pd.concat([nassMaizePlantedArea1[::-1], nassMaizePlantedArea2[::-1], nassMaizePlantedArea3[::-1]])

nassMaizeHarvestedArea1 = pd.read_csv('%s/nass/maize-area-harvested-1961-1969.csv'%dirAgData)
nassMaizeHarvestedArea2 = pd.read_csv('%s/nass/maize-area-harvested-1970-1981.csv'%dirAgData)
nassMaizeHarvestedArea3 = pd.read_csv('%s/nass/maize-area-harvested-1982-2001.csv'%dirAgData)
nassMaizeHarvestedArea4 = pd.read_csv('%s/nass/maize-area-harvested-2002-2019.csv'%dirAgData)
nassMaizeHarvestedArea = pd.concat([nassMaizeHarvestedArea1[::-1], nassMaizeHarvestedArea2[::-1], \
                                    nassMaizeHarvestedArea3[::-1], nassMaizeHarvestedArea4[::-1]])

nassSoybeanHarvestedArea1 = pd.read_csv('%s/nass/soybean-area-harvested-1961-1969.csv'%dirAgData)
nassSoybeanHarvestedArea2 = pd.read_csv('%s/nass/soybean-area-harvested-1970-1995.csv'%dirAgData)
nassSoybeanHarvestedArea3 = pd.read_csv('%s/nass/soybean-area-harvested-1996-2019.csv'%dirAgData)
nassSoybeanHarvestedArea = pd.concat([nassSoybeanHarvestedArea1[::-1], nassSoybeanHarvestedArea2[::-1], \
                                    nassSoybeanHarvestedArea3[::-1]])

nassMaizeProd1 = pd.read_csv('%s/nass/nass-maize-production-1961-1974.csv'%dirAgData)
nassMaizeProd2 = pd.read_csv('%s/nass/nass-maize-production-1975-1994.csv'%dirAgData)
nassMaizeProd3 = pd.read_csv('%s/nass/nass-maize-production-1995-2019.csv'%dirAgData)
nassMaizeProd = pd.concat([nassMaizeProd1[::-1], nassMaizeProd2[::-1], nassMaizeProd3[::-1]])

nassMaizeYield1 = pd.read_csv('%s/nass/maize-yield-1961-1978.csv'%dirAgData)
nassMaizeYield2 = pd.read_csv('%s/nass/maize-yield-1979-1998.csv'%dirAgData)
nassMaizeYield3 = pd.read_csv('%s/nass/maize-yield-1999-2019.csv'%dirAgData)
nassMaizeYield = pd.concat([nassMaizeYield1[::-1], nassMaizeYield2[::-1], nassMaizeYield3[::-1]])

nassSoybeanYield1 = pd.read_csv('%s/nass/soybean-yield-1961-1989.csv'%dirAgData)
nassSoybeanYield2 = pd.read_csv('%s/nass/soybean-yield-1990-2019.csv'%dirAgData)
nassSoybeanYield = pd.concat([nassSoybeanYield1[::-1], nassSoybeanYield2[::-1]])



In [10]:
usStateAbbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Palau': 'PW',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [11]:
nassMaizeStates = list(nassMaizeYield['State'])
nassMaizeStateAbvs = []
for s in nassMaizeStates:
    nassMaizeStateAbvs.append(usStateAbbrev[s.lower().title()])
nassMaizeYield['StateAbvs'] = nassMaizeStateAbvs

nassSoybeanStates = list(nassSoybeanYield['State'])
nassSoybeanStateAbvs = []
for s in nassSoybeanStates:
    nassSoybeanStateAbvs.append(usStateAbbrev[s.lower().title()])
nassSoybeanYield['StateAbvs'] = nassSoybeanStateAbvs

nassMaizeProdStates = list(nassMaizeProd['State'])
nassMaizeProdStateAbvs = []
for s in nassMaizeProdStates:
    nassMaizeProdStateAbvs.append(usStateAbbrev[s.lower().title()])
nassMaizeProd['StateAbvs'] = nassMaizeProdStateAbvs

nassMaizePlantedAreaStates = list(nassMaizePlantedArea['State'])
nassMaizePlantedAreaStateAbvs = []
for s in nassMaizePlantedAreaStates:
    nassMaizePlantedAreaStateAbvs.append(usStateAbbrev[s.lower().title()])
nassMaizePlantedArea['StateAbvs'] = nassMaizePlantedAreaStateAbvs

nassMaizeHarvestedAreaStates = list(nassMaizeHarvestedArea['State'])
nassMaizeHarvestedAreaStateAbvs = []
for s in nassMaizeHarvestedAreaStates:
    nassMaizeHarvestedAreaStateAbvs.append(usStateAbbrev[s.lower().title()])
nassMaizeHarvestedArea['StateAbvs'] = nassMaizeHarvestedAreaStateAbvs

nassSoybeanHarvestedAreaStates = list(nassSoybeanHarvestedArea['State'])
nassSoybeanHarvestedAreaStateAbvs = []
for s in nassSoybeanHarvestedAreaStates:
    nassSoybeanHarvestedAreaStateAbvs.append(usStateAbbrev[s.lower().title()])
nassSoybeanHarvestedArea['StateAbvs'] = nassSoybeanHarvestedAreaStateAbvs

nassCountyAreaStates = list(nassCountyArea['State'])
nassCountyAreaStateAbvs = []
for s in nassCountyAreaStates:
    nassCountyAreaStateAbvs.append(usStateAbbrev[s.lower().title()])
nassCountyArea['StateAbvs'] = nassCountyAreaStateAbvs

In [12]:
# maizeYieldDeepak = []
# for year in range(1981, 2013+1):
#     curMaizeYield = xr.open_dataset('%s/deepak/Maize_yield_1970_2013/Maize_areaweightedyield_%d_ver12b.nc'%(dirAgData, year), decode_cf=False)
    
#     if len(maizeYieldDeepak) == 0:
#         maizeYieldDeepak = curMaizeYield
#     else:
#         maizeYieldDeepak = xr.concat([maizeYieldDeepak, curMaizeYield], dim='time')
    
# maizeYieldDeepak.load()

# # flip latitude axis so top is +90
# latDeepak = np.flipud(maizeYieldDeepak.latitude)
# lonDeepak = np.roll(maizeYieldDeepak.longitude, int(len(maizeYieldDeepak.longitude)/2), axis=0)
# lonDeepak[lonDeepak<0] += 360
# maizeYieldDeepak['Data'] = maizeYieldDeepak.Data.transpose('latitude', 'longitude', 'time', 'level')
# maizeYieldDeepakData = np.roll(np.flip(maizeYieldDeepak.Data, axis=0), int(len(maizeYieldDeepak.longitude)/2), axis=1)

# maizeYieldDeepak['latitude'] = latDeepak
# maizeYieldDeepak['longitude'] = lonDeepak
# maizeYieldDeepak['Data'] = (('latitude', 'longitude', 'time'), np.squeeze(maizeYieldDeepakData))

In [28]:
def findConsec(data):
    # find longest consequtative sequence of years with yield data
    ptMax = (-1, -1)
    ptCur = (-1, -1)
    for i, val in enumerate(data):
        # start sequence
        if ~np.isnan(val) and ptCur[0] == -1:
            ptCur = (i, -1)
        #end sequence
        elif (np.isnan(val) and ptCur[0] >= 0):
            ptCur = (ptCur[0], i)
            if ptCur[1]-ptCur[0] > ptMax[1]-ptMax[0] or ptMax == (-1, -1):
                ptMax = ptCur
            ptCur = (-1, -1)
        # reached end of sequence
        elif i >= len(data)-1 and ptCur[0] >= 0:
            ptCur = (ptCur[0], i)
            if ptCur[1]-ptCur[0] > ptMax[1]-ptMax[0] or ptMax == (-1, -1):
                ptMax = ptCur
    return ptMax

In [30]:
minCropYears = 10

rebuild=True

if os.path.isfile('%s/us-county-yield-gdd-kdd-%s-%s'%(dirAgData, crop, wxData)) and not rebuild:
    usCounties = pd.read_pickle('%s/us-county-yield-gdd-kdd-%s-%s'%(dirAgData, crop, wxData))
else:
    usCounties = gpd.read_file('%s/us-county-lat-long/c_02jn20.shp'%dirAgData)

    usCounties['prLatInds'] = np.full(len(usCounties), np.nan)
    usCounties['prLonInds'] = np.full(len(usCounties), np.nan)
    
    usCounties['seasonalSeconds'] = np.full(len(usCounties), np.nan)
    
    usCounties['maizePlantingDate'] = np.full(len(usCounties), np.nan)
    usCounties['maizeHarvestDate'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanPlantingDate'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanHarvestDate'] = np.full(len(usCounties), np.nan)
    
    usCounties['maizeCountyArea'] = np.full(len(usCounties), np.nan)
    usCounties['maizeCountyIrrigationFraction'] = np.full(len(usCounties), np.nan)
    
    usCounties['maizeHarvestedArea'] = np.full(len(usCounties), np.nan)
    usCounties['maizeHarvestedAreaFraction'] = np.full(len(usCounties), np.nan)
    usCounties['maizeHarvestedAreaYears'] = np.full(len(usCounties), np.nan)
    usCounties['maizeHarvestedAreaTrend'] = np.full(len(usCounties), np.nan)
    
    usCounties['soybeanHarvestedArea'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanHarvestedAreaFraction'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanHarvestedAreaYears'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanHarvestedAreaTrend'] = np.full(len(usCounties), np.nan)
    
    usCounties['maizeYield'] = np.full(len(usCounties), np.nan)
    usCounties['maizeYieldYears'] = np.full(len(usCounties), np.nan)
    
    usCounties['soybeanYield'] = np.full(len(usCounties), np.nan)
    usCounties['soybeanYieldYears'] = np.full(len(usCounties), np.nan)
    
    # this is needed to store list in dataframe... kind of hacky
    usCounties = usCounties.astype(object)

    for c in range(usCounties.shape[0]):
        
        if c % 100 == 0:
            print('%.0f %% done'%(c/usCounties.shape[0]*100))
        
        bounds = usCounties['geometry'][c].bounds
        lat1 = bounds[1]
        lat2 = bounds[3]
        lon1 = bounds[0]
        lon2 = bounds[2]

        if lon1 < 0: lon1 += 360
        if lon2 < 0: lon2 += 360
            
        
        prLatInds = [np.where(abs(sacksLat-lat2) == np.nanmin(abs(sacksLat-lat2)))[0][0],
                   np.where(abs(sacksLat-lat1) == np.nanmin(abs(sacksLat-lat1)))[0][0]]
        prLonInds = [np.where(abs(sacksLon-lon1) == np.nanmin(abs(sacksLon-lon1)))[0][0],
                   np.where(abs(sacksLon-lon2) == np.nanmin(abs(sacksLon-lon2)))[0][0]]
    
        irrLatInds = [np.where(abs(irrigationLat-lat2) == np.nanmin(abs(irrigationLat-lat2)))[0][0],
                   np.where(abs(irrigationLat-lat1) == np.nanmin(abs(irrigationLat-lat1)))[0][0]]
        irrLonInds = [np.where(abs(irrigationLon-lon1) == np.nanmin(abs(irrigationLon-lon1)))[0][0],
                   np.where(abs(irrigationLon-lon2) == np.nanmin(abs(irrigationLon-lon2)))[0][0]]
        
        usCounties['prLatInds'][c] = prLatInds
        usCounties['prLonInds'][c] = prLonInds
        
        prLatIndsRange = np.arange(min(prLatInds), max(prLatInds)+1)
        prLonIndsRange = np.arange(min(prLonInds), max(prLonInds)+1)
        
        irrLatIndsRange = np.arange(min(irrLatInds), max(irrLatInds)+1)
        irrLonIndsRange = np.arange(min(irrLonInds), max(irrLonInds)+1)

        
        if len(prLatIndsRange) > 1:    
            countyMaizePlantDate = np.nanmean(sacksMaizeStart[prLatIndsRange, :], axis=0)
            countySoybeanPlantDate = np.nanmean(sacksSoybeanStart[prLatIndsRange, :], axis=0)
            countyMaizeHarvestDate = np.nanmean(sacksMaizeEnd[prLatIndsRange, :], axis=0)
            countySoybeanHarvestDate = np.nanmean(sacksSoybeanEnd[prLatIndsRange, :], axis=0)
        else:
            countyMaizePlantDate = np.squeeze(sacksMaizeStart[prLatIndsRange, :])
            countySoybeanPlantDate = np.squeeze(sacksSoybeanStart[prLatIndsRange, :])
            countyMaizeHarvestDate = np.squeeze(sacksMaizeEnd[prLatIndsRange, :])
            countySoybeanHarvestDate = np.squeeze(sacksSoybeanEnd[prLatIndsRange, :])

        if len(prLonIndsRange) > 1:
            countyMaizePlantDate = np.nanmean(countyMaizePlantDate[prLonIndsRange], axis=0)
            countySoybeanPlantDate = np.nanmean(countySoybeanPlantDate[prLonIndsRange], axis=0)
            countyMaizeHarvestDate = np.nanmean(countyMaizeHarvestDate[prLonIndsRange], axis=0)
            countySoybeanHarvestDate = np.nanmean(countySoybeanHarvestDate[prLonIndsRange], axis=0)
        else:
            countyMaizePlantDate = np.squeeze(countyMaizePlantDate[prLonIndsRange])
            countySoybeanPlantDate = np.squeeze(countySoybeanPlantDate[prLonIndsRange])
            countyMaizeHarvestDate = np.squeeze(countyMaizeHarvestDate[prLonIndsRange])
            countySoybeanHarvestDate = np.squeeze(countySoybeanHarvestDate[prLonIndsRange])

        usCounties['maizePlantingDate'][c] = countyMaizePlantDate
        usCounties['soybeanPlantingDate'][c] = countySoybeanPlantDate
        usCounties['maizeHarvestDate'][c] = countyMaizeHarvestDate
        usCounties['soybeanHarvestDate'][c] = countySoybeanHarvestDate
            
        if len(irrLatIndsRange) > 1:
            countyIrrigation = np.nanmean(irrigationData[irrLatIndsRange, :], axis=0)
        else:
            countyIrrigation = np.squeeze(irrigationData[irrLatIndsRange, :])
        
        if len(irrLonIndsRange) > 1:
            countyIrrigation = np.nanmean(countyIrrigation[irrLonIndsRange])
        else:
            countyIrrigation = np.squeeze(countyIrrigation[irrLonIndsRange])
        
        usCounties['maizeCountyIrrigationFraction'][c] = countyIrrigation
        
        curCountyName = usCounties['COUNTYNAME'][c].lower()
        nassMaizeYieldCounties = np.array(list(map(str.lower, nassMaizeYield['County'])))
        nassMaizeHarvestedAreaCounties = np.array(list(map(str.lower, nassMaizeHarvestedArea['County'])))
        nassCountyAreaCounties = np.array(list(map(str.lower, nassCountyArea['County'])))
        
        curStateName = usCounties['STATE'][c].lower()
        nassMaizeYieldStates = np.array(list(map(str.lower, nassMaizeYield['StateAbvs'])))
        nassMaizeHarvestedAreaStates = np.array(list(map(str.lower, nassMaizeHarvestedArea['StateAbvs'])))
        nassCountyAreaStates = np.array(list(map(str.lower, nassCountyArea['StateAbvs'])))

        countyIndMaizeYield = np.where((curCountyName == nassMaizeYieldCounties) & (curStateName == nassMaizeYieldStates))[0]
        countyIndMaizeHarvestedArea = np.where((curCountyName == nassMaizeHarvestedAreaCounties) & (curStateName == nassMaizeHarvestedAreaStates))[0]
        countyIndCountyArea = np.where((curCountyName == nassCountyAreaCounties) & (curStateName == nassCountyAreaStates))[0]

        curCountyMaizeYieldYears = np.array(list(nassMaizeYield['Year']))
        curCountyMaizeYieldYears = curCountyMaizeYieldYears[countyIndMaizeYield]

        curCountyMaizeYield = np.array(list(nassMaizeYield['Value']))
        curCountyMaizeYield = curCountyMaizeYield[countyIndMaizeYield]
        
        curCountyMaizeHarvestedAreaYears = np.array(list(nassMaizeHarvestedArea['Year']))
        curCountyMaizeHarvestedAreaYears = curCountyMaizeHarvestedAreaYears[countyIndMaizeHarvestedArea]
        
        curCountyMaizeHarvestedArea = np.array(list(nassMaizeHarvestedArea['Value']))
        curCountyMaizeHarvestedArea = curCountyMaizeHarvestedArea[countyIndMaizeHarvestedArea]
        
        
        nassSoybeanYieldCounties = np.array(list(map(str.lower, nassSoybeanYield['County'])))
        nassSoybeanHarvestedAreaCounties = np.array(list(map(str.lower, nassSoybeanHarvestedArea['County'])))

        nassSoybeanYieldStates = np.array(list(map(str.lower, nassSoybeanYield['StateAbvs'])))
        nassSoybeanHarvestedAreaStates = np.array(list(map(str.lower, nassSoybeanHarvestedArea['StateAbvs'])))
        
        countyIndSoybeanYield = np.where((curCountyName == nassSoybeanYieldCounties) & (curStateName == nassSoybeanYieldStates))[0]
        countyIndSoybeanHarvestedArea = np.where((curCountyName == nassSoybeanHarvestedAreaCounties) & (curStateName == nassSoybeanHarvestedAreaStates))[0]
        
        curCountySoybeanYieldYears = np.array(list(nassSoybeanYield['Year']))
        curCountySoybeanYieldYears = curCountySoybeanYieldYears[countyIndSoybeanYield]

        curCountySoybeanYield = np.array(list(nassSoybeanYield['Value']))
        curCountySoybeanYield = curCountySoybeanYield[countyIndSoybeanYield]
        
        curCountySoybeanHarvestedAreaYears = np.array(list(nassSoybeanHarvestedArea['Year']))
        curCountySoybeanHarvestedAreaYears = curCountySoybeanHarvestedAreaYears[countyIndSoybeanHarvestedArea]
        
        curCountySoybeanHarvestedArea = np.array(list(nassSoybeanHarvestedArea['Value']))
        curCountySoybeanHarvestedArea = curCountySoybeanHarvestedArea[countyIndSoybeanHarvestedArea]
        
        
        curCountyArea = np.array(list(nassCountyArea['Value']))
        curCountyArea = curCountyArea[countyIndCountyArea]
        curCountyArea = np.nanmean([float(a.replace(',','')) for a in curCountyArea])
        usCounties['maizeCountyArea'][c] = curCountyArea

        
        # MAIZE-------------------------------------------------------
        
        maizeYieldYearsUniform = np.arange(1981, 2019+1)
        maizeYieldUniform = np.full(maizeYieldYearsUniform.shape, np.nan)

        for y, year in enumerate(curCountyMaizeYieldYears):
            indUniform = year-1981

            if indUniform >= 0 and indUniform < len(maizeYieldYearsUniform):
                maizeYieldUniform[indUniform] = curCountyMaizeYield[y]
        
        maizeHarvestedAreaYearsUniform = np.arange(1981, 2019+1)
        maizeHarvestedAreaUniform = np.full(maizeHarvestedAreaYearsUniform.shape, np.nan)

        for y, year in enumerate(curCountyMaizeHarvestedAreaYears):
            indUniform = year-1981

            if indUniform >= 0 and indUniform < len(maizeHarvestedAreaYearsUniform):
                tmpStr = curCountyMaizeHarvestedArea[y].replace(',', '').replace('(D)', '').strip()
                if tmpStr != '':
                    maizeHarvestedAreaUniform[indUniform] = float(tmpStr)
                else:
                    maizeHarvestedAreaUniform[indUniform] = np.nan
        
        # find longest consequtative sequence of years with yield data
        ptMax = findConsec(maizeYieldUniform)
        
        if ptMax[1]-ptMax[0]+1 >= minCropYears:
            
            indsNass = np.arange(ptMax[0], ptMax[1])
            maizeYieldUniform[:ptMax[0]] = np.nan
            maizeYieldUniform[ptMax[1]:] = np.nan
            
        
        
        ptMaxHarvestedArea = findConsec(maizeHarvestedAreaUniform)
        if ptMaxHarvestedArea[1]-ptMaxHarvestedArea[0]+1 >= minCropYears:
            maizeHarvestedAreaUniform[:ptMaxHarvestedArea[0]] = np.nan
            maizeHarvestedAreaUniform[ptMaxHarvestedArea[1]:] = np.nan
        
        usCounties['maizeYieldYears'][c] = maizeYieldYearsUniform
        usCounties['maizeYield'][c] = maizeYieldUniform
        usCounties['maizeHarvestedAreaYears'][c] = maizeHarvestedAreaYearsUniform
        usCounties['maizeHarvestedArea'][c] = maizeHarvestedAreaUniform
        usCounties['maizeHarvestedAreaFraction'][c] = maizeHarvestedAreaUniform/curCountyArea
        
        # SOYBEAN-------------------------------------------------------
        
        soybeanYieldYearsUniform = np.arange(1981, 2019+1)
        soybeanYieldUniform = np.full(soybeanYieldYearsUniform.shape, np.nan)
        
        for y, year in enumerate(curCountySoybeanYieldYears):
            indUniform = year-1981

            if indUniform >= 0 and indUniform < len(soybeanYieldYearsUniform):
                soybeanYieldUniform[indUniform] = curCountySoybeanYield[y]
        
        soybeanHarvestedAreaYearsUniform = np.arange(1981, 2019+1)
        soybeanHarvestedAreaUniform = np.full(soybeanHarvestedAreaYearsUniform.shape, np.nan)

        for y, year in enumerate(curCountySoybeanHarvestedAreaYears):
            indUniform = year-1981

            if indUniform >= 0 and indUniform < len(soybeanHarvestedAreaYearsUniform):
                tmpStr = curCountySoybeanHarvestedArea[y].replace(',', '').replace('(D)', '').strip()
                if tmpStr != '':
                    soybeanHarvestedAreaUniform[indUniform] = float(tmpStr)
                else:
                    soybeanHarvestedAreaUniform[indUniform] = np.nan
        
        # find longest consequtative sequence of years with yield data
        ptMax = findConsec(soybeanYieldUniform)
        
        if ptMax[1]-ptMax[0]+1 >= minCropYears:
            
            indsNass = np.arange(ptMax[0], ptMax[1])
            
            soybeanYieldUniform[:ptMax[0]] = np.nan
            soybeanYieldUniform[ptMax[1]:] = np.nan
        
        ptMaxHarvestedArea = findConsec(soybeanHarvestedAreaUniform)
        if ptMaxHarvestedArea[1]-ptMaxHarvestedArea[0]+1 >= minCropYears:
            soybeanHarvestedAreaUniform[:ptMaxHarvestedArea[0]] = np.nan
            soybeanHarvestedAreaUniform[ptMaxHarvestedArea[1]:] = np.nan
        
        usCounties['soybeanYieldYears'][c] = soybeanYieldYearsUniform
        usCounties['soybeanYield'][c] = soybeanYieldUniform
        usCounties['soybeanHarvestedAreaYears'][c] = soybeanHarvestedAreaYearsUniform
        usCounties['soybeanHarvestedArea'][c] = soybeanHarvestedAreaUniform
        usCounties['soybeanHarvestedAreaFraction'][c] = soybeanHarvestedAreaUniform/curCountyArea
        
#         # WX VARIABLES -------------------------------------------------------------------
#         countyGdd[countyGdd < 0] = np.nan
#         indsGdd = np.where(~np.isnan(countyGdd))[0]
#         usCounties['gdd'][c] = np.full(countyGdd.shape, np.nan)
#         usCounties['gdd'][c] = countyGdd[indsGdd]
#         usCounties['gddDetrend'][c] = np.full(countyGdd.shape, np.nan)
#         usCounties['gddDetrend'][c][indsGdd] = scipy.signal.detrend(countyGdd[indsGdd]) 
#         usCounties['gddDetrendNorm'][c] = np.full(countyGdd.shape, np.nan)
#         usCounties['gddDetrendNorm'][c][indsGdd] = scipy.signal.detrend(countyGdd[indsGdd]) / np.linalg.norm(scipy.signal.detrend(countyGdd[indsGdd]))
#         X = sm.add_constant(range(len(countyGdd[indsGdd])))
#         mdl = sm.OLS(countyGdd[indsGdd], X).fit()
#         usCounties['gddTrend'][c] = mdl.params[1]

        
#     usCounties.to_pickle('%s/us-county-yield-gdd-kdd-%s-%s'%(dataDirDiscovery, crop, wxData))

0 % done
3 % done


KeyboardInterrupt: 

In [19]:
usCounties = usCounties.drop(columns=['CWA', 'TIME_ZONE', 'FE_AREA'])

In [20]:
# drop all counties with any nans
yieldNans = np.array(list(map(np.isnan, usCounties['maizeYield'])))
yieldTrendNans = np.array(list(map(np.isnan, usCounties['maizeYieldTrend'])))
gddNans = np.array(list(map(np.isnan, usCounties['gdd'])))
kddNans = np.array(list(map(np.isnan, usCounties['kdd'])))
# inds1 = np.where( (np.array([len(np.where((yieldNans[i]==False) & ((gddNans[i]==True) | (kddNans[i]==True)))[0]) for i in range(len(yieldNans))]) > 0))[0]
inds = np.where((yieldTrendNans == True))[0]
# inds = np.union1d(inds1, inds2)

In [21]:
usCounties = usCounties.drop(index=inds)

In [22]:
usCounties

Unnamed: 0,STATE,COUNTYNAME,FIPS,LON,LAT,geometry,temp,tempLatInds,tempLonInds,prLatInds,...,soybeanYieldTrendDeepak,soybeanYieldDetrendDeepak,soybeanYieldDetrendPlusMeanDeepak,soybeanYieldDetrendNormDeepak,soybeanYieldYears,maizeProd,maizeProdTrend,maizeProdDetrend,maizeProdDetrendNorm,maizeProdYears
2,GA,Liberty,13179,-81.2103,31.7093,"POLYGON ((-81.30807 31.79454, -81.30546 31.791...",,"[233, 233]","[1115, 1115]","[116, 116]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[39400.0, 38400.0, 51100.0, 40800.0, 28500.0, ...",,"[-8208.888888888898, -4520.555555555562, 12867...","[-0.42580910064089694, -0.23448894504026663, 0...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
95,MD,Garrett,24023,-79.2739,39.5286,"POLYGON ((-78.99060 39.72251, -78.93130 39.722...",,"[201, 203]","[1122, 1124]","[100, 101]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[513300.0, 538200.0, 310000.0, 530700.0, 61100...",,"[21200.000000000175, 47705.71428571449, -17888...","[0.05719940111765059, 0.12871407014304692, -0....","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
101,NY,Yates,36123,-77.1055,42.6335,"POLYGON ((-76.94759 42.76431, -76.94759 42.758...",,"[189, 190]","[1131, 1132]","[94, 95]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[1710800.0, 1537200.0, 963600.0, 1411200.0, 11...",,"[474089.0476190483, 295259.43977591116, -28357...","[0.25860784412461263, 0.16105920936449278, -0....","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
102,NY,Allegany,36003,-78.0276,42.2574,"POLYGON ((-78.20480 42.52161, -78.19659 42.521...",,"[190, 192]","[1127, 1129]","[95, 96]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[639200.0, 500600.0, 522000.0, 367500.0, 43700...",,"[210719.21182266023, 75690.8045977013, 100662....","[0.28451516862834714, 0.10219847467849354, 0.1...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
103,NY,Broome,36007,-75.8196,42.1602,"POLYGON ((-75.86359 42.41161, -75.86240 42.398...",,"[190, 192]","[1135, 1139]","[95, 96]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[299200.0, 220500.0, 267000.0, 273000.0, 33000...",,"[21494.827586207015, -51796.05911330035, 113.0...","[0.060043687876430094, -0.14468719947415615, 0...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3277,TX,Jackson,48239,-96.5787,28.9557,"MULTIPOLYGON (((-96.44796 28.75027, -96.44695 ...",,"[243, 245]","[1052, 1055]","[121, 122]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[1289500.0, 1538700.0, 2623600.0, 3862600.0, 3...",,"[-717526.315789473, -616955.3342816494, 319315...","[-0.06717636886935306, -0.05776069560043745, 0...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
3286,VA,Accomack,51001,-75.6472,37.766,"MULTIPOLYGON (((-75.37632 38.00901, -75.37540 ...",,"[208, 210]","[1136, 1139]","[104, 105]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[585000.0, 770000.0, 261700.0, 540000.0, 63840...",,"[321302.0242914984, 415872.46963562793, -18285...","[0.08442595256566181, 0.10927546899912628, -0....","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
3316,NY,Erie,36029,-78.7308,42.7621,"MULTIPOLYGON (((-78.96736 42.71326, -78.96956 ...",,"[188, 190]","[1123, 1126]","[94, 95]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[1452900.0, 1598900.0, 1349000.0, 1100800.0, 1...",,"[161738.6243386243, 322813.715913716, 87988.80...","[0.11000559240473479, 0.2195598868277362, 0.05...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
3320,TX,Willacy,48489,-97.6732,26.4706,"MULTIPOLYGON (((-97.44247 26.37530, -97.44263 ...",,"[254, 255]","[1048, 1051]","[126, 127]",...,,,,,"[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198...","[274500.0, 132800.0, 226100.0, 249300.0, 29980...",,"[-201367.52136752137, -329590.735042735, -2228...","[-0.11586325093540326, -0.1896405824579002, -0...","[1981, 1982, 1983, 1984, 1985, 1986, 1987, 198..."
