In [11]:
import pickle as pkl
import pandas as pd
import matplotlib
import os
import re

import collections
import datetime
import time

 
import geopandas as gpd

import numpy as np

import rasterio

from difflib import get_close_matches

from fuzzywuzzy import process
from fuzzywuzzy import fuzz

import dask.dataframe as dd

import gc
import sys

from multiprocessing import Pool
import multiprocessing


# Sample

In [12]:
callLocation = '../../data/companyData/relevantZips.pkl'
file   = open(callLocation,"rb")
relevantZips = list(pkl.load(file))

len(relevantZips)

4538

In [13]:
def getData(weatherType,yearRange1,yearRange2):

    if weatherType == "Tmax":
        weatherVar = "temperature"
    else: 
        weatherVar = "precipitation"
    
    year = yearRange1
    filename = "../../../../../../../Volumes/backup2/dissData/prism/zipcode" + weatherType + str(year) + ".csv"
    data = dd.read_csv(filename, assume_missing=True)[['ZIP','date',weatherVar]]
    data['ZIP'] = data.ZIP.astype('int64')

    
    callLocation = '../../data/companyData/relevantZips.pkl'
    file   = open(callLocation,"rb")
    relevantZips = list(pkl.load(file))
    
    
    data = data[data.ZIP.isin(relevantZips)]
    
    # data = data[~(data[weatherVar].isna().compute())]

    years = range(yearRange1 + 1,yearRange2 + 1)
    for year in years:
        filename = "../../../../../../../Volumes/backup2/dissData/prism/zipcode" + weatherType + str(year) + ".csv"
        tempData = dd.read_csv(filename, assume_missing=True)[['ZIP','date',weatherVar]]
        tempData['ZIP'] = tempData.ZIP.astype('int64')

        tempData = tempData[tempData.ZIP.isin(relevantZips)]
        # tempData = tempData[~(tempData[weatherVar].isna().compute())]
        data = data.append(tempData)

    data = data[~(data[weatherVar].isna())].compute()
    
    return(data)


I think only a few of these quantiles will be relevant. Let's look at 0, 0.9, 0.95, 1.0

In [14]:
quantiles = [0.0, 0.05, 0.90, 0.95, 0.9888, 0.9973, 0.9978, 0.9989, 0.9995,  0.9997, 1.0]

# need to account for fact that once a year-quarter is 90 days, not 365
quant_labels = ['quant_0.05','quant_tossThisOne','quant_0.95','quant_1xQtr','quant_1xYr',
                'quant_1x5Qtrs','quant_1x10Qtrs','quant_1x5Yrs','quant_1x10Yrs','quant_1.0']

# Precipitation

In [15]:
weatherType = "Precip" # Tmax
precipData = getData(weatherType, 1981, 2008)

In [16]:
print(sys.getsizeof(precipData)/1e6)

1372.217984


In [17]:
len(precipData.ZIP.unique())

4193

## Temperature

In [18]:
weatherType = "Tmax"
tempData = getData(weatherType, 1981, 2008)

In [19]:
print(sys.getsizeof(tempData)/1e6)

1372.217984


## Get stats

### Describe Climate

In [20]:
weatherData = tempData.merge(precipData)
weatherData['month'] = weatherData.date.astype('str').str.slice(4,6)
weatherData['year'] = weatherData.date.astype('str').str.slice(0,4)

weatherData['quarter'] = 'q1'

weatherData.loc[weatherData['month'].isin(['04','05','06']), 'quarter'] = 'q2'
weatherData.loc[weatherData['month'].isin(['07','08','09']), 'quarter'] = 'q3'
weatherData.loc[weatherData['month'].isin(['10','11','12']), 'quarter'] = 'q4'
 
weatherData['temp5Days']   = weatherData.groupby('ZIP').rolling(5)['temperature'].mean().reset_index(drop=True)
weatherData['precip5Days'] = weatherData.groupby('ZIP').rolling(5)['precipitation'].sum().reset_index(drop=True)
    
print(weatherData.shape,tempData.shape,precipData.shape)


weatherData.head()

(42881811, 9) (42881811, 3) (42881811, 3)


Unnamed: 0,ZIP,date,temperature,precipitation,month,year,quarter,temp5Days,precip5Days
0,1001,19810101.0,-5.148,0.0,1,1981,q1,,
1,1013,19810101.0,-5.249,0.0,1,1981,q1,,
2,1085,19810101.0,-5.579,0.0,1,1981,q1,,
3,1089,19810101.0,-5.378,0.0,1,1981,q1,,
4,1095,19810101.0,-5.814,0.0,1,1981,q1,-8.1044,1.613


In [21]:
del tempData
del precipData
gc.collect()

20

In [22]:
len(weatherData.ZIP.unique())

4193

Get the quarterly stats.

In [None]:
quarterlyStatsOverall = weatherData.drop(columns = {'date'}).\
    groupby(['quarter']).\
    agg(quarterly_avg_precip = ('precipitation',"mean"),
       quarterly_median_precip = ('precipitation',"median"),
       quarterly_variance_precip = ('precipitation',"var"),
       quarterly_avg_temp = ('temperature',"mean"),
       quarterly_median_temp = ('temperature',"median"),
       quarterly_variance_temp = ('temperature',"var")
       ).reset_index()

quarterlyStatsByZip = weatherData.drop(columns = {'date'}).\
    groupby(['ZIP','quarter']).\
    agg(quarterly_avg_precip = ('precipitation',"mean"),
       quarterly_median_precip = ('precipitation',"median"),
       quarterly_variance_precip = ('precipitation',"var"),
       quarterly_avg_temp = ('temperature',"mean"),
       quarterly_median_temp = ('temperature',"median"),
       quarterly_variance_temp = ('temperature',"var")
       ).reset_index()

'''
precipQuantsOverall = precipData.groupby(['month']).precipitation.quantile(q = quantiles).\
    reset_index().rename(columns = {'level_1': 'quantile'})
'''


In [None]:
quarterlyStatsByZip.to_csv("../../data/companyData/quarterlyStatsByZip.csv")

And the annual ones.

In [None]:
annualStatsOverall = weatherData.drop(columns = {'date'}).\
    groupby(['year']).\
    agg(annual_avg_precip = ('precipitation',"mean"),
       annual_median_precip = ('precipitation',"median"),
       annual_variance_precip = ('precipitation',"var"),
       annual_avg_temp = ('temperature',"mean"),
       annual_median_temp = ('temperature',"median"),
       annual_variance_temp = ('temperature',"var")
       ).reset_index()

annualStatsByZip = weatherData.drop(columns = {'date'}).\
    groupby(['ZIP','year']).\
    agg(annual_avg_precip = ('precipitation',"mean"),
       annual_median_precip = ('precipitation',"median"),
       annual_variance_precip = ('precipitation',"var"),
       annual_avg_temp = ('temperature',"mean"),
       annual_median_temp = ('temperature',"median"),
       annual_variance_temp = ('temperature',"var")
       ).reset_index()
annualStatsByZip.head()

In [None]:
quarterlyStatsByZip.head()

In [None]:
del quarterlyStatsByZip
del annualStatsByZip
gc.collect()

### Find Quartiles

In [23]:
def getPivotQuantsOverall(weatherType,weatherData):

    quants_overall  =  weatherData[weatherType].quantile(q = quantiles).reset_index().\
        rename(columns = {'index': 'quartile'})    
    
    quants_overall['quartile'] = 'q_' + quants_overall['quartile'].astype(str).str.slice(0,6)
    quants_overall = quants_overall.iloc[:,1:].values.flatten().tolist()

    return(quants_overall)

In [24]:
def getPivotQuants(weatherType, identifiers,weatherData):

    quants_overallByZip   = weatherData.groupby(identifiers)[weatherType].quantile(q = quantiles).reset_index().\
        rename(columns=lambda x: re.sub('level_[0-9]$','quartile',x))
    # rename(columns = {'level_1': 'quartile'})
    # print(quants_overallByZip.head())
    
    quants_overallByZip['quartile'] = 'q_' + quants_overallByZip['quartile'].astype(str).str.slice(0,6)

    ## 

    pivot_quants_overallByZip = quants_overallByZip.pivot(index=identifiers, 
            columns='quartile', values=str(weatherType)).reset_index().\
            rename_axis(None, axis=1)

    pivot_quants_overallByZip['quartileList']      = pivot_quants_overallByZip.iloc[:,len(identifiers):].values.tolist()
    pivot_quants_overallByZip['quartileLabelList'] = [list(pivot_quants_overallByZip.columns[2:-1])] * \
                                                                    pivot_quants_overallByZip.shape[0]

    pivot_quants_overallByZip = pivot_quants_overallByZip[pivot_quants_overallByZip.\
                                                            columns.drop(list(pivot_quants_overallByZip.filter(regex='q_')))]

    # pivot_quants_overallByZip.head()

    return(pivot_quants_overallByZip)

In [25]:
def getQuartCounts(df, weatherType):
    weatherCut = weatherType + "Cut"
    weatherCutLabels = weatherCut + 'labels'
    
    df['month'] = df.date.astype(str).str.slice(4,6)
    
    df['quarter'] = 'q1'

    df.loc[df['month'].isin(['04','05','06']), 'quarter'] = 'q2'
    df.loc[df['month'].isin(['07','08','09']), 'quarter'] = 'q3'
    df.loc[df['month'].isin(['10','11','12']), 'quarter'] = 'q4'

    df['yearQuarter'] = df.date.astype(str).str.slice(0,4) + df.quarter

    occurrences = weatherType + "Occurrences" 

    summaryDF = df.groupby(['ZIP','yearQuarter',weatherCutLabels]).size().reset_index()
    summaryDF.columns = ['zip','yearQuarter',weatherCutLabels,occurrences]
    
    return(summaryDF)

Now get all the quartile data:
    - Overall
    - By zip
    - Quarterly by zip

In [26]:
###################
# OVERALL
tempQuants   = getPivotQuantsOverall('temperature',weatherData)
precipQuants = getPivotQuantsOverall('precipitation',weatherData)

temp5DaysQuants   = getPivotQuantsOverall('temp5Days',weatherData)
precip5DaysQuants = getPivotQuantsOverall('precip5Days',weatherData)



####################
# BY ZIP
pivot_temperatureQuants_zip = getPivotQuants('temperature',['ZIP'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_temperatureQuants_zip.rename(columns = {'quartileList':     'quartileListTemperature_zip'},
                                      inplace = True)

pivot_temp5DaysQuants_zip = getPivotQuants('temp5Days',['ZIP'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_temp5DaysQuants_zip.rename(columns = {'quartileList':     'quartileListTemp5Days_zip'},
                                      inplace = True)



pivot_precipitationQuants_zip = getPivotQuants('precipitation',['ZIP'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_precipitationQuants_zip.rename(columns = {'quartileList':     'quartileListPrecipitation_zip'},
                                      inplace = True)

pivot_precip5DaysQuants_zip = getPivotQuants('precip5Days',['ZIP'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_precip5DaysQuants_zip.rename(columns = {'quartileList':     'quartileListPrecip5Days_zip'},
                                      inplace = True)


quants_zip = pivot_temperatureQuants_zip.merge(pivot_precipitationQuants_zip).merge(pivot_temp5DaysQuants_zip).merge(pivot_precip5DaysQuants_zip)



##########################################
# BY ZIP-QUARTER
pivot_temperatureQuants_quarterlyByZip = getPivotQuants('temperature',['ZIP','quarter'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_temperatureQuants_quarterlyByZip.rename(columns = {'quartileList':     'quartileListTemperature_quarterlyByZip'},
                                      inplace = True)
pivot_temp5DaysQuants_quarterlyByZip = getPivotQuants('temp5Days',['ZIP','quarter'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_temp5DaysQuants_quarterlyByZip.rename(columns = {'quartileList':     'quartileListTemp5Days_quarterlyByZip'},
                                      inplace = True)



pivot_precipitationQuants_quarterlyByZip = getPivotQuants('precipitation',['ZIP','quarter'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_precipitationQuants_quarterlyByZip.rename(columns = {'quartileList':     'quartileListPrecipitation_quarterlyByZip'},
                                      inplace = True)
pivot_precip5DaysQuants_quarterlyByZip = getPivotQuants('precip5Days',['ZIP','quarter'],weatherData).\
    drop(columns = {'quartileLabelList'})
pivot_precip5DaysQuants_quarterlyByZip.rename(columns = {'quartileList':     'quartileListPrecip5Days_quarterlyByZip'},
                                      inplace = True)


quants_quarterlyByZip = pivot_temperatureQuants_quarterlyByZip.merge(pivot_precipitationQuants_quarterlyByZip).merge(pivot_temp5DaysQuants_quarterlyByZip).merge(pivot_precip5DaysQuants_quarterlyByZip)



Construct a record of all the relevant quantiles by combining all of the above. Rough idea is:
    - Start with the zip-quarter data
    - Merge in the less-specific quarter information
    - For each row, put in the overall quartile information for each row

In [27]:
quantsAll                 = quants_quarterlyByZip.merge(quants_zip)



quantsAll['precipitationQuants'] = [precipQuants for i in quantsAll.index]
quantsAll['temperatureQuants']   = [tempQuants   for i in quantsAll.index]



quantsAll['precip5DaysQuants']   = [precip5DaysQuants for i in quantsAll.index]
quantsAll['temp5DaysQuants']     = [temp5DaysQuants   for i in quantsAll.index]



quantsAll.head()

Unnamed: 0,ZIP,quarter,quartileListTemperature_quarterlyByZip,quartileListPrecipitation_quarterlyByZip,quartileListTemp5Days_quarterlyByZip,quartileListPrecip5Days_quarterlyByZip,quartileListTemperature_zip,quartileListPrecipitation_zip,quartileListTemp5Days_zip,quartileListPrecip5Days_zip,precipitationQuants,temperatureQuants,precip5DaysQuants,temp5DaysQuants
0,1001,q1,"[-16.4400005340576, -6.171400165557857, 12.670...","[0.0, 0.0, 10.221999931335482, 17.111900169372...","[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
1,1001,q2,"[-4.23000001907349, 8.991749811172483, 29.6447...","[0.0, 0.0, 11.980000305175771, 19.715000247955...","[-13.233800125122215, 0.5024799859522551, 30.8...","[-4.256150987203e-12, -2.6290081223123707e-12,...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
2,1001,q3,"[10.0860004425049, 18.485250473022475, 32.1564...","[0.0, 0.0, 11.338500022888201, 20.034499645233...","[-19.181399917602626, 1.6346999764440695, 31.1...","[-3.936406756110955e-12, -2.7431390492438368e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
3,1001,q4,"[-13.2650003433228, -1.171749949455265, 20.800...","[0.0, 0.0, 12.02500009536745, 21.4705004692077...","[-15.576799774170066, 0.9375499844548808, 31.3...","[-4.071409875905374e-12, -2.69473332537018e-12...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
4,1013,q1,"[-16.4659996032715, -6.442399930953977, 12.349...","[0.0, 0.0, 10.115000152587905, 17.318700170898...","[-14.660599899292105, 0.8630599975584131, 31.0...","[-4.234834705130197e-12, -2.6202151559573394e-...","[-16.4659996032715, -2.163100028038028, 29.223...","[0.0, 0.0, 10.935199928283698, 19.464899063110...","[-18.097999763488996, 0.970240066051357, 31.14...","[-4.256150987203e-12, -2.6858515411731787e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."


In [28]:
# relevantZips = allCustomerData.zipcode.append(allSupplierData.zipcode).unique()
outfile =  '../../data/companyData/quantsAll.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(quantsAll, pickle_file)

In [29]:
del weatherData
gc.collect()

60

### Convert 2009-2018 data into quartiles

In [30]:
recentDecadeTmax   = getData("Tmax",2009,2019)
recentDecadePrecip = getData("Precip",2009,2019)

In [31]:
recentDecadeWeather = recentDecadeTmax.merge(recentDecadePrecip)
recentDecadeWeather.head()

Unnamed: 0,ZIP,date,temperature,precipitation
0,1001,20090101.0,-2.941,4.491
1,1013,20090101.0,-3.683,4.403
2,1085,20090101.0,-4.804,4.008
3,1089,20090101.0,-3.892,3.613
4,1095,20090101.0,-4.128,5.737


In [32]:
recentDecadeWeather['month'] = recentDecadeWeather.date.astype(str).str.slice(4,6)

recentDecadeWeather['quarter'] = 'q1'

recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['04','05','06']), 'quarter'] = 'q2'
recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['07','08','09']), 'quarter'] = 'q3'
recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['10','11','12']), 'quarter'] = 'q4'

recentDecadeWeather['yearQuarter'] = recentDecadeWeather.date.astype(str).str.slice(0,4) + recentDecadeWeather.quarter
recentDecadeWeather.head()

Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter
0,1001,20090101.0,-2.941,4.491,1,q1,2009q1
1,1013,20090101.0,-3.683,4.403,1,q1,2009q1
2,1085,20090101.0,-4.804,4.008,1,q1,2009q1
3,1089,20090101.0,-3.892,3.613,1,q1,2009q1
4,1095,20090101.0,-4.128,5.737,1,q1,2009q1


In [33]:
recentDecadeWeather['temp5Days']   = recentDecadeWeather.groupby('ZIP').rolling(5)['temperature'].mean().reset_index(drop=True)
recentDecadeWeather['precip5Days'] = recentDecadeWeather.groupby('ZIP').rolling(5)['precipitation'].sum().reset_index(drop=True)

In [34]:
del recentDecadePrecip
del recentDecadeTmax
gc.collect()

recentDecadeWeather[recentDecadeWeather.yearQuarter == '2010q1']

Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter,temp5Days,precip5Days
1530445,1001,20100101.0,-0.760,3.408,01,q1,2010q1,10.5242,-1.136868e-12
1530446,1013,20100101.0,-0.814,3.216,01,q1,2010q1,9.3236,-1.136868e-12
1530447,1085,20100101.0,-1.217,3.864,01,q1,2010q1,6.3406,2.624500e+01
1530448,1089,20100101.0,-1.172,3.066,01,q1,2010q1,3.7286,4.018400e+01
1530449,1095,20100101.0,-1.482,3.075,01,q1,2010q1,2.0320,4.018400e+01
...,...,...,...,...,...,...,...,...,...
1907810,99336,20100331.0,13.573,0.000,03,q1,2010q1,21.3488,3.246000e+00
1907811,99352,20100331.0,13.298,0.004,03,q1,2010q1,20.0190,3.246000e+00
1907812,99354,20100331.0,13.499,0.687,03,q1,2010q1,20.3110,8.943000e+00
1907813,99362,20100331.0,9.588,1.418,03,q1,2010q1,19.7226,9.060000e+00


Merge the intervals into the recent weather data.

In [35]:
quantsAll.columns

Index(['ZIP', 'quarter', 'quartileListTemperature_quarterlyByZip',
       'quartileListPrecipitation_quarterlyByZip',
       'quartileListTemp5Days_quarterlyByZip',
       'quartileListPrecip5Days_quarterlyByZip', 'quartileListTemperature_zip',
       'quartileListPrecipitation_zip', 'quartileListTemp5Days_zip',
       'quartileListPrecip5Days_zip', 'precipitationQuants',
       'temperatureQuants', 'precip5DaysQuants', 'temp5DaysQuants'],
      dtype='object')

In [36]:
recentDecadeWeather.columns

Index(['ZIP', 'date', 'temperature', 'precipitation', 'month', 'quarter',
       'yearQuarter', 'temp5Days', 'precip5Days'],
      dtype='object')

In [37]:
recentDecadeWeather = recentDecadeWeather.merge(quantsAll)

recentDecadeWeather.head()

Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter,temp5Days,precip5Days,quartileListTemperature_quarterlyByZip,...,quartileListTemp5Days_quarterlyByZip,quartileListPrecip5Days_quarterlyByZip,quartileListTemperature_zip,quartileListPrecipitation_zip,quartileListTemp5Days_zip,quartileListPrecip5Days_zip,precipitationQuants,temperatureQuants,precip5DaysQuants,temp5DaysQuants
0,1001,20090101.0,-2.941,4.491,1,q1,2009q1,,,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
1,1001,20090102.0,-7.09,0.0,1,q1,2009q1,23.3866,5.616,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
2,1001,20090103.0,-0.701,0.263,1,q1,2009q1,-0.0072,0.0,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
3,1001,20090104.0,0.804,0.0,1,q1,2009q1,19.89,45.617001,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."
4,1001,20090105.0,1.763,0.731,1,q1,2009q1,5.158,26.271,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-15.702000045776487, 0.7366500204800014, 31.2...","[-4.231281991451397e-12, -2.6148860854391386e-...","[-16.4400005340576, -1.892299997806547, 29.442...","[0.0, 0.0, 11.448999595642086, 19.800300216674...","[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0, 8.48999977111816, 16.0580005645752,...","[-34.1430015563965, 0.0410000011324883, 31.767...","[-4.284572696633404e-12, -2.6716406864579767e-...","[-29.64580039978035, 0.9051999568937892, 31.24..."


Get the annual quants.

In [40]:
recentDecadeWeather.temperatureQuants[0]

[-34.1430015563965,
 0.0410000011324883,
 31.7670001983643,
 33.5940017700195,
 37.1129989624023,
 40.4129981994629,
 40.8310012817383,
 42.0229988098145,
 43.1110000610352,
 43.7060012817383,
 49.6259994506836]

In [42]:
for i in range(0,len(recentDecadeWeather.temp_annualQuants[0])):
    recentDecadeWeather.temperatureQuants[0][i]   = recentDecadeWeather.temperatureQuants[0][i] + i/10000
    recentDecadeWeather.temp5DaysQuants[0][i]     = recentDecadeWeather.temp5DaysQuants[0][i] + i/10000
    
    recentDecadeWeather.precipitationQuants[0][i] = recentDecadeWeather.precipitationQuants[0][i] + i/10000
    recentDecadeWeather.precip5DaysQuants[0][i]   = recentDecadeWeather.precip5DaysQuants[0][i] + i/10000


In [43]:
recentDecadeWeather['temp_annualQuants'] = pd.cut(recentDecadeWeather.temperature, 
           bins = recentDecadeWeather.temperatureQuants[0],
           labels = quant_labels,include_lowest=True)
recentDecadeWeather['temp5Days_annualQuants'] = pd.cut(recentDecadeWeather.temp5Days, 
           bins = recentDecadeWeather.temp5DaysQuants[0],
           labels = quant_labels,include_lowest=True)

recentDecadeWeather['precip_annualQuants'] = pd.cut(recentDecadeWeather.precipitation, 
           bins = recentDecadeWeather.precipitationQuants[0],
           labels = quant_labels,include_lowest=True)

recentDecadeWeather['precip5Days_annualQuants'] = pd.cut(recentDecadeWeather.precip5Days, 
           bins = recentDecadeWeather.precip5DaysQuants[0],
           labels = quant_labels,include_lowest=True)

recentDecadeWeather.head()


Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter,temp5Days,precip5Days,quartileListTemperature_quarterlyByZip,...,quartileListTemp5Days_zip,quartileListPrecip5Days_zip,precipitationQuants,temperatureQuants,precip5DaysQuants,temp5DaysQuants,temp_annualQuants,temp5Days_annualQuants,precip_annualQuants,precip5Days_annualQuants
0,1001,20090101.0,-2.941,4.491,1,q1,2009q1,,,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0001, 8.49019977111816, 16.05830056457...","[-34.1430015563965, 0.0411000011324883, 31.767...","[-4.284572696633404e-12, 9.999999732835932e-05...","[-29.64580039978035, 0.9052999568937892, 31.24...",quant_0.05,,quant_tossThisOne,
1,1001,20090102.0,-7.09,0.0,1,q1,2009q1,23.3866,5.616,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0001, 8.49019977111816, 16.05830056457...","[-34.1430015563965, 0.0411000011324883, 31.767...","[-4.284572696633404e-12, 9.999999732835932e-05...","[-29.64580039978035, 0.9052999568937892, 31.24...",quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne
2,1001,20090103.0,-0.701,0.263,1,q1,2009q1,-0.0072,0.0,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0001, 8.49019977111816, 16.05830056457...","[-34.1430015563965, 0.0411000011324883, 31.767...","[-4.284572696633404e-12, 9.999999732835932e-05...","[-29.64580039978035, 0.9052999568937892, 31.24...",quant_0.05,quant_0.05,quant_tossThisOne,quant_0.05
3,1001,20090104.0,0.804,0.0,1,q1,2009q1,19.89,45.617001,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0001, 8.49019977111816, 16.05830056457...","[-34.1430015563965, 0.0411000011324883, 31.767...","[-4.284572696633404e-12, 9.999999732835932e-05...","[-29.64580039978035, 0.9052999568937892, 31.24...",quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95
4,1001,20090105.0,1.763,0.731,1,q1,2009q1,5.158,26.271,"[-16.4400005340576, -6.171400165557857, 12.670...",...,"[-19.181399917602626, 0.9387299569880044, 31.1...","[-4.256150987203e-12, -2.6787461138155777e-12,...","[0.0, 0.0001, 8.49019977111816, 16.05830056457...","[-34.1430015563965, 0.0411000011324883, 31.767...","[-4.284572696633404e-12, 9.999999732835932e-05...","[-29.64580039978035, 0.9052999568937892, 31.24...",quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne


Now do the same but for zips. For some reason the filtering operations are like 10x faster with making the ZIP and quarter out to be categories.

In [44]:
recentDecadeWeather = recentDecadeWeather.astype({'ZIP': 'category', 'quarter': 'category'})

In [50]:
def getZipQuarts(zipcode):
    
    tempData = recentDecadeWeather[recentDecadeWeather.ZIP == zipcode].reset_index()
    
    if tempData.shape[0] > 0:


        for i in range(0,len(tempData.quartileListPrecip5Days_zip[0])):
                tempData.quartileListPrecipitation_zip[0][i] = tempData.quartileListPrecipitation_zip[0][i] + i/10000
                tempData.quartileListPrecip5Days_zip[0][i]   = tempData.quartileListPrecip5Days_zip[0][i] + i/10000


        tempData['temp_zipQuants'] = pd.cut(tempData.temperature, 
               bins = tempData.quartileListTemperature_zip[0],
               labels = quant_labels,include_lowest=True)
        tempData['temp5Days_zipQuants'] = pd.cut(tempData.temp5Days, 
               bins = tempData.quartileListTemp5Days_zip[0],
               labels = quant_labels,include_lowest=True)


        tempData['precip_zipQuants'] = pd.cut(tempData.precipitation, 
               bins = tempData.quartileListPrecipitation_zip[0],
               labels = quant_labels,include_lowest=True)

        tempData['precip5Days_zipQuants'] = pd.cut(tempData.precip5Days, 
               bins = tempData.quartileListPrecip5Days_zip[0], 
               labels = quant_labels,include_lowest=True)
        
    
    
    return(tempData)

In [51]:
def getZipQuarterQuarts(zipcode):

    weatherByZipByQuarter = pd.DataFrame()

    quarters = recentDecadeWeather.quarter.unique()
    

    for quarter in quarters:
        tempData = recentDecadeWeather[(recentDecadeWeather.ZIP  == zipcode) & 
                        (recentDecadeWeather.quarter == quarter)].reset_index()
        if tempData.shape[0] > 0:

            for i in range(0,len(tempData.quartileListPrecip5Days_quarterlyByZip[0])):
                tempData.quartileListPrecipitation_quarterlyByZip[0][i] = tempData.quartileListPrecipitation_quarterlyByZip[0][i] + i/10000
                tempData.quartileListPrecip5Days_quarterlyByZip[0][i]   = tempData.quartileListPrecip5Days_quarterlyByZip[0][i] + i/10000
                tempData.quartileListTemperature_quarterlyByZip[0][i] = tempData.quartileListTemperature_quarterlyByZip[0][i] + i/10000
                tempData.quartileListTemp5Days_quarterlyByZip[0][i]   = tempData.quartileListTemp5Days_quarterlyByZip[0][i] + i/10000


            tempData['temp_zipQuarterQuants'] = pd.cut(tempData.temperature, 
                   bins = tempData.quartileListTemperature_quarterlyByZip[0],
                   labels = quant_labels,include_lowest=True)
            tempData['temp5Days_zipQuarterQuants'] = pd.cut(tempData.temp5Days, 
                   bins = tempData.quartileListTemp5Days_quarterlyByZip[0],
                   labels = quant_labels,include_lowest=True)


            tempData['precip_zipQuarterQuants'] = pd.cut(tempData.precipitation, 
                   bins = tempData.quartileListPrecipitation_quarterlyByZip[0],
                   labels = quant_labels,include_lowest=True)
            tempData['precip5Days_zipQuarterQuants'] = pd.cut(tempData.precip5Days, 
                   bins = tempData.quartileListPrecip5Days_quarterlyByZip[0], 
                   labels = quant_labels,include_lowest=True) # 


            weatherByZipByQuarter = weatherByZipByQuarter.append(tempData)

    return(weatherByZipByQuarter)

In [47]:
ZIPs = recentDecadeWeather.ZIP.unique()
len(ZIPs)

4193

In [52]:
start = time.time()




with multiprocessing.Pool() as pool:
    zipQuarts = pool.map(getZipQuarts, ZIPs)

    
    
weatherByZip = pd.concat(zipQuarts)


print(time.time() - start)

87.93848586082458


In [53]:
start = time.time()



with multiprocessing.Pool() as pool:
    zipQuarterQuarts = pool.map(getZipQuarterQuarts, ZIPs)

    
    
weatherByZipQuarter = pd.concat(zipQuarterQuarts)


print(time.time() - start)

184.8950002193451


In [54]:
weatherByZipQuarter['yearQuarter'] = weatherByZipQuarter.date.astype('str').str.slice(0,4) + weatherByZipQuarter.quarter.astype('str')
weatherByZip['yearQuarter']        = weatherByZip.date.astype('str').str.slice(0,4) + weatherByZip.quarter.astype('str')

In [55]:
pt1 = weatherByZip[['ZIP','date','quarter','yearQuarter',
              'temp_annualQuants', 'temp5Days_annualQuants',
              'precip_annualQuants', 'precip5Days_annualQuants', 
              'temp_zipQuants', 'temp5Days_zipQuants',
              'precip_zipQuants', 'precip5Days_zipQuants']]

pt2 = weatherByZipQuarter[['ZIP','date','quarter','yearQuarter',
                           'temp_zipQuarterQuants','temp5Days_zipQuarterQuants', 
                     'precip_zipQuarterQuants','precip5Days_zipQuarterQuants']]


outfile =  '../../data/companyData/pt1.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(pt1, pickle_file)
    
outfile =  '../../data/companyData/pt2.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(pt2, pickle_file)




In [56]:
allQuarts = pt1.merge(pt2)

In [57]:
allQuarts.head()

Unnamed: 0,ZIP,date,quarter,yearQuarter,temp_annualQuants,temp5Days_annualQuants,precip_annualQuants,precip5Days_annualQuants,temp_zipQuants,temp5Days_zipQuants,precip_zipQuants,precip5Days_zipQuants,temp_zipQuarterQuants,temp5Days_zipQuarterQuants,precip_zipQuarterQuants,precip5Days_zipQuarterQuants
0,1001,20090101.0,q1,2009q1,quant_0.05,,quant_tossThisOne,,quant_0.05,,quant_tossThisOne,,quant_tossThisOne,,quant_tossThisOne,
1,1001,20090102.0,q1,2009q1,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne
2,1001,20090103.0,q1,2009q1,quant_0.05,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05
3,1001,20090104.0,q1,2009q1,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95
4,1001,20090105.0,q1,2009q1,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne


------------------

### Now go through the data from 2010 - 2019 and find time spent in each bin

In [58]:
def getCounts(quartType,df):

    test = df.groupby(['ZIP','yearQuarter',quartType]).size().reset_index()
    test = test.drop_duplicates()
    test.columns = ['zip', 'yearQuarter', quartType, 'occurrences']

    # print(test.head())


    testPivot = test.pivot(index=['zip','yearQuarter'], 
            columns = quartType, values= 'occurrences').reset_index().\
            rename_axis(None, axis=1)

    testPivot.columns.values[2:] = quartType[:-6] + testPivot.columns.values[2:]

    # print(testPivot.head())
    return(testPivot)

In [59]:
allQuarts

Unnamed: 0,ZIP,date,quarter,yearQuarter,temp_annualQuants,temp5Days_annualQuants,precip_annualQuants,precip5Days_annualQuants,temp_zipQuants,temp5Days_zipQuants,precip_zipQuants,precip5Days_zipQuants,temp_zipQuarterQuants,temp5Days_zipQuarterQuants,precip_zipQuarterQuants,precip5Days_zipQuarterQuants
0,1001,20090101.0,q1,2009q1,quant_0.05,,quant_tossThisOne,,quant_0.05,,quant_tossThisOne,,quant_tossThisOne,,quant_tossThisOne,
1,1001,20090102.0,q1,2009q1,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne
2,1001,20090103.0,q1,2009q1,quant_0.05,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05
3,1001,20090104.0,q1,2009q1,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.95
4,1001,20090105.0,q1,2009q1,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16843276,99403,20191227.0,q4,2019q4,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_tossThisOne
16843277,99403,20191228.0,q4,2019q4,quant_tossThisOne,quant_1xQtr,quant_0.05,quant_0.05,quant_0.05,quant_1xQtr,quant_0.05,quant_0.05,quant_tossThisOne,quant_1xQtr,quant_0.05,quant_0.05
16843278,99403,20191229.0,q4,2019q4,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_tossThisOne,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_tossThisOne
16843279,99403,20191230.0,q4,2019q4,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.05,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.05,quant_tossThisOne,quant_tossThisOne,quant_0.05,quant_0.05


In [60]:
countData = getCounts(allQuarts.columns[4],allQuarts)

for quart in allQuarts.columns[5:]:
    print(quart)
    countData = countData.merge(getCounts(quart,allQuarts))

temp5Days_annualQuants
precip_annualQuants
precip5Days_annualQuants
temp_zipQuants
temp5Days_zipQuants
precip_zipQuants
precip5Days_zipQuants
temp_zipQuarterQuants
temp5Days_zipQuarterQuants
precip_zipQuarterQuants
precip5Days_zipQuarterQuants


In [61]:
print(countData.columns)

Index(['zip', 'yearQuarter', 'temp_annualquant_0.05',
       'temp_annualquant_tossThisOne', 'temp_annualquant_0.95',
       'temp_annualquant_1xQtr', 'temp_annualquant_1xYr',
       'temp_annualquant_1x5Qtrs', 'temp_annualquant_1x10Qtrs',
       'temp_annualquant_1x5Yrs',
       ...
       'precip5Days_zipQuarterquant_0.05',
       'precip5Days_zipQuarterquant_tossThisOne',
       'precip5Days_zipQuarterquant_0.95', 'precip5Days_zipQuarterquant_1xQtr',
       'precip5Days_zipQuarterquant_1xYr',
       'precip5Days_zipQuarterquant_1x5Qtrs',
       'precip5Days_zipQuarterquant_1x10Qtrs',
       'precip5Days_zipQuarterquant_1x5Yrs',
       'precip5Days_zipQuarterquant_1x10Yrs',
       'precip5Days_zipQuarterquant_1.0'],
      dtype='object', length=122)


Reformat the column names so they're consistent.

In [62]:
countData['year'] = countData.yearQuarter.str.slice(0,4)
countData['qtr']  = countData.yearQuarter.str.slice(5,6).astype('float')
countData['zipcode']  = countData.zip.astype('int64')

In [63]:
countData.zipcode.min()

1001

In [64]:
countData.head()

Unnamed: 0,zip,yearQuarter,temp_annualquant_0.05,temp_annualquant_tossThisOne,temp_annualquant_0.95,temp_annualquant_1xQtr,temp_annualquant_1xYr,temp_annualquant_1x5Qtrs,temp_annualquant_1x10Qtrs,temp_annualquant_1x5Yrs,...,precip5Days_zipQuarterquant_1xQtr,precip5Days_zipQuarterquant_1xYr,precip5Days_zipQuarterquant_1x5Qtrs,precip5Days_zipQuarterquant_1x10Qtrs,precip5Days_zipQuarterquant_1x5Yrs,precip5Days_zipQuarterquant_1x10Yrs,precip5Days_zipQuarterquant_1.0,year,qtr,zipcode
0,1001,2009q1,31,59,0,0,0,0,0,0,...,6,0,0,0,0,0,0,2009,1.0,1001
1,1001,2009q2,0,88,2,1,0,0,0,0,...,5,1,0,0,0,0,0,2009,2.0,1001
2,1001,2009q3,0,84,7,1,0,0,0,0,...,7,2,0,0,0,0,0,2009,3.0,1001
3,1001,2009q4,10,82,0,0,0,0,0,0,...,4,0,0,0,0,0,0,2009,4.0,1001
4,1001,2010q1,18,72,0,0,0,0,0,0,...,2,0,0,0,0,0,0,2010,1.0,1001


In [65]:
countData.to_csv("../../data/companyData/allWeatherBins_2009to2019.csv")

In [66]:
len(countData.zipcode.unique())

4193

In [67]:
countData.columns

Index(['zip', 'yearQuarter', 'temp_annualquant_0.05',
       'temp_annualquant_tossThisOne', 'temp_annualquant_0.95',
       'temp_annualquant_1xQtr', 'temp_annualquant_1xYr',
       'temp_annualquant_1x5Qtrs', 'temp_annualquant_1x10Qtrs',
       'temp_annualquant_1x5Yrs',
       ...
       'precip5Days_zipQuarterquant_1xQtr', 'precip5Days_zipQuarterquant_1xYr',
       'precip5Days_zipQuarterquant_1x5Qtrs',
       'precip5Days_zipQuarterquant_1x10Qtrs',
       'precip5Days_zipQuarterquant_1x5Yrs',
       'precip5Days_zipQuarterquant_1x10Yrs',
       'precip5Days_zipQuarterquant_1.0', 'year', 'qtr', 'zipcode'],
      dtype='object', length=125)

In [68]:
'''countData = pd.read_csv("../../data/companyData/allWeatherBins_2009to2019.csv").drop(columns = 'Unnamed: 0')

countData.head()'''

countDataRevised = countData[['zipcode','year','qtr']]


In [69]:

cdf = {}


cdf['0.95']     = ['1xQtr','1xYr','1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1xQtr']    = ['1xYr','1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1xYr']     = ['1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1x5Qtrs']  = ['1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1x10Qtrs'] = ['1x5Yrs','1x10Yrs','1.0']
cdf['1x5Yrs']   = ['1x10Yrs','1.0']
cdf['1x10Yrs']  = ['1.0']


weatherVars = ['precip_','temp_','precip5Days_','temp5Days_']
statVars    = ['annualquant_','zipquant_','zipQuarterquant_']


for weatherVar in weatherVars:
    for statVar in statVars:
        print(weatherVar + statVar, "*************************")
        
        # 0.95
        for cutoff in list(cdf.keys()):
            
            varHere = weatherVar + statVar + cutoff
            countDataRevised[varHere] = countData[varHere]
            print(cutoff, "******")
            for greaterThanCutoffs in cdf[cutoff]:
                varCDF = weatherVar + statVar + greaterThanCutoffs 
                countDataRevised[varHere] = countDataRevised[varHere] + countData[varCDF] 
                print(weatherVar + statVar + greaterThanCutoffs)
 

precip_annualquant_ *************************
0.95 ******
precip_annualquant_1xQtr
precip_annualquant_1xYr
precip_annualquant_1x5Qtrs
precip_annualquant_1x10Qtrs
precip_annualquant_1x5Yrs
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1xQtr ******
precip_annualquant_1xYr
precip_annualquant_1x5Qtrs
precip_annualquant_1x10Qtrs
precip_annualquant_1x5Yrs
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1xYr ******
precip_annualquant_1x5Qtrs
precip_annualquant_1x10Qtrs
precip_annualquant_1x5Yrs
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1x5Qtrs ******
precip_annualquant_1x10Qtrs
precip_annualquant_1x5Yrs
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1x10Qtrs ******
precip_annualquant_1x5Yrs
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1x5Yrs ******
precip_annualquant_1x10Yrs
precip_annualquant_1.0
1x10Yrs ******
precip_annualquant_1.0
precip_zipquant_ *************************
0.95 ******
precip_zipquant_1xQtr
precip_zipquant_1xYr
precip_zipquant_1x5Qtrs
precip_zipquant_

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [70]:
countDataRevised['temp_annualquant_0.95']

0          0
1          3
2          8
3          0
4          0
          ..
184487     0
184488     0
184489     1
184490    19
184491     0
Name: temp_annualquant_0.95, Length: 184492, dtype: int64

In [71]:
countData['temp_annualquant_0.95']

0         0
1         2
2         7
3         0
4         0
         ..
184487    0
184488    0
184489    1
184490    9
184491    0
Name: temp_annualquant_0.95, Length: 184492, dtype: int64

In [72]:
countDataRevised.to_csv("../../data/companyData/revised_allWeatherBins_2009to2019.csv")