In [1]:
import pickle as pkl
import pandas as pd
import matplotlib
import os
import re

import collections
import datetime
import time

 
import geopandas as gpd

import numpy as np

import rasterio

from difflib import get_close_matches

from fuzzywuzzy import process
from fuzzywuzzy import fuzz

import dask.dataframe as dd

import gc
import sys

from multiprocessing import Pool
import multiprocessing


  import pandas.util.testing as tm


# Sample

In [2]:
callLocation = '../../data/companyData/relevantZips.pkl'
file   = open(callLocation,"rb")
relevantZips = list(pkl.load(file))

len(relevantZips)

4538

In [3]:
def getData(weatherType,yearRange1,yearRange2):

    if weatherType == "Tmax":
        weatherVar = "temperature"
    else: 
        weatherVar = "precipitation"
    
    year = yearRange1
    filename = "../../../../../../../Volumes/backup2/dissData/prism/zipcode" + weatherType + str(year) + ".csv"
    data = dd.read_csv(filename, assume_missing=True)[['ZIP','date',weatherVar]]
    data['ZIP'] = data.ZIP.astype('int64')

    
    callLocation = '../../data/companyData/relevantZips.pkl'
    file   = open(callLocation,"rb")
    relevantZips = list(pkl.load(file))
    
    
    data = data[data.ZIP.isin(relevantZips)]
    
    # data = data[~(data[weatherVar].isna().compute())]

    years = range(yearRange1 + 1,yearRange2 + 1)
    for year in years:
        filename = "../../../../../../../Volumes/backup2/dissData/prism/zipcode" + weatherType + str(year) + ".csv"
        tempData = dd.read_csv(filename, assume_missing=True)[['ZIP','date',weatherVar]]
        tempData['ZIP'] = tempData.ZIP.astype('int64')

        tempData = tempData[tempData.ZIP.isin(relevantZips)]
        # tempData = tempData[~(tempData[weatherVar].isna().compute())]
        data = data.append(tempData)

    data = data[~(data[weatherVar].isna())].compute()
    
    return(data)


I think only a few of these quantiles will be relevant. Let's look at 0, 0.9, 0.95, 1.0

In [26]:
quantiles = [0.0, 0.05, 0.90, 0.95, 0.9888, 0.9973, 0.9978, 0.9989, 0.9995,  0.9997, 1.0]

# need to account for fact that once a year-quarter is 90 days, not 365
quant_labels = ['quant_0.05','quant_tossThisOne','quant_0.95','quant_1xQtr','quant_1xYr',
                'quant_1x5Qtrs','quant_1x10Qtrs','quant_1x5Yrs','quant_1x10Yrs','quant_1.0']

# Precipitation

In [9]:
weatherType = "Precip" # Tmax
precipData = getData(weatherType, 1981, 2008)

In [10]:
print(sys.getsizeof(precipData)/1e6)

1372.217984


In [11]:
len(precipData.ZIP.unique())

4193

## Temperature

In [12]:
weatherType = "Tmax"
tempData = getData(weatherType, 1981, 2008)

In [13]:
print(sys.getsizeof(tempData)/1e6)

1372.217984


# Industry Data

In [14]:
indData = pd.read_csv("../../data/companyData/igData.csv")[['zipcode','famafrench']].drop_duplicates().reset_index(drop = True)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [15]:
indData.head()

Unnamed: 0,zipcode,famafrench
0,55344,37.0
1,76155,41.0
2,45209,21.0
3,45227,21.0
4,75254,21.0


In [16]:
indData.shape

(6794, 2)

## Get stats

### Describe Climate

In [17]:
weatherData = tempData.merge(precipData)
weatherData['month'] = weatherData.date.astype('str').str.slice(4,6)
weatherData['year'] = weatherData.date.astype('str').str.slice(0,4)

weatherData['quarter'] = 'q1'

weatherData.loc[weatherData['month'].isin(['04','05','06']), 'quarter'] = 'q2'
weatherData.loc[weatherData['month'].isin(['07','08','09']), 'quarter'] = 'q3'
weatherData.loc[weatherData['month'].isin(['10','11','12']), 'quarter'] = 'q4'
 
weatherData['temp5Days']   = weatherData.groupby('ZIP').rolling(5)['temperature'].mean().reset_index(drop=True)
weatherData['precip5Days'] = weatherData.groupby('ZIP').rolling(5)['precipitation'].sum().reset_index(drop=True)
    
print(weatherData.shape,tempData.shape,precipData.shape)


weatherData.head()

(42881811, 9) (42881811, 3) (42881811, 3)


Unnamed: 0,ZIP,date,temperature,precipitation,month,year,quarter,temp5Days,precip5Days
0,1001,19810101.0,-5.148,0.0,1,1981,q1,,
1,1013,19810101.0,-5.249,0.0,1,1981,q1,,
2,1085,19810101.0,-5.579,0.0,1,1981,q1,,
3,1089,19810101.0,-5.378,0.0,1,1981,q1,,
4,1095,19810101.0,-5.814,0.0,1,1981,q1,-8.1044,1.613


In [18]:
del tempData
del precipData
gc.collect()

20

In [19]:
print(weatherData.shape)
weatherData = weatherData[weatherData.ZIP.isin(indData.zipcode.unique())]
print(weatherData.shape)

(42881811, 9)
(31673019, 9)


In [20]:
weatherData.rename(columns = {'ZIP': 'zipcode'}, inplace = True)
weatherByInd = indData.merge(weatherData)
print(weatherByInd.shape)

(67805010, 10)


In [21]:
weatherByInd.head()

Unnamed: 0,zipcode,famafrench,date,temperature,precipitation,month,year,quarter,temp5Days,precip5Days
0,55344,37.0,19810101.0,0.579,0.98,1,1981,q1,15.7274,22.431
1,55344,37.0,19810102.0,-1.523,0.0,1,1981,q1,14.266,9.29
2,55344,37.0,19810103.0,-7.25,0.0,1,1981,q1,14.4948,29.891
3,55344,37.0,19810104.0,-16.907,0.0,1,1981,q1,14.7034,6.355
4,55344,37.0,19810105.0,-13.35,0.0,1,1981,q1,22.090001,8.254


Get the quarterly stats by industry.

In [22]:
'''quarterlyStatsBySIC = weatherByInd.drop(columns = {'date'}).\
    groupby(['sic','quarter']).\
    agg(quarterly_avg_precip = ('precipitation',"mean"),
       quarterly_median_precip = ('precipitation',"median"),
       quarterly_variance_precip = ('precipitation',"var"),
       quarterly_avg_temp = ('temperature',"mean"),
       quarterly_median_temp = ('temperature',"median"),
       quarterly_variance_temp = ('temperature',"var")
       ).reset_index()'''

quarterlyStatsByFF = weatherByInd.drop(columns = {'date'}).\
    groupby(['famafrench','quarter']).\
    agg(quarterly_avg_precip = ('precipitation',"mean"),
       quarterly_median_precip = ('precipitation',"median"),
       quarterly_variance_precip = ('precipitation',"var"),
       quarterly_avg_temp = ('temperature',"mean"),
       quarterly_median_temp = ('temperature',"median"),
       quarterly_variance_temp = ('temperature',"var")
       ).reset_index()

'''
precipQuantsOverall = precipData.groupby(['month']).precipitation.quantile(q = quantiles).\
    reset_index().rename(columns = {'level_1': 'quantile'})
'''


"\nprecipQuantsOverall = precipData.groupby(['month']).precipitation.quantile(q = quantiles).    reset_index().rename(columns = {'level_1': 'quantile'})\n"

In [23]:
# quarterlyStatsBySIC.to_csv("../../data/companyData/quarterlyStatsBySIC.csv")
quarterlyStatsByFF.to_csv("../../data/companyData/quarterlyStatsByFF.csv")

And the annual ones.

In [24]:
'''annualStatsOverallbySIC = weatherByInd.drop(columns = {'date'}).\
    groupby(['sic','quarter']).\
    agg(annual_avg_precip = ('precipitation',"mean"),
       annual_median_precip = ('precipitation',"median"),
       annual_variance_precip = ('precipitation',"var"),
       annual_avg_temp = ('temperature',"mean"),
       annual_median_temp = ('temperature',"median"),
       annual_variance_temp = ('temperature',"var")
       ).reset_index()'''

annualStatsOverallbyFF = weatherByInd.drop(columns = {'date'}).\
    groupby(['famafrench','quarter']).\
    agg(annual_avg_precip = ('precipitation',"mean"),
       annual_median_precip = ('precipitation',"median"),
       annual_variance_precip = ('precipitation',"var"),
       annual_avg_temp = ('temperature',"mean"),
       annual_median_temp = ('temperature',"median"),
       annual_variance_temp = ('temperature',"var")
       ).reset_index()

In [25]:
# annualStatsOverallbySIC.to_csv("../../data/companyData/annualStatsOverallbySIC.csv")
annualStatsOverallbyFF.to_csv("../../data/companyData/annualStatsOverallbyFF.csv")

In [26]:
#del quarterlyStatsBySIC
del quarterlyStatsByFF
# del annualStatsOverallbySIC
del annualStatsOverallbyFF
gc.collect()

80

### Find Quartiles

In [27]:
def getPivotQuants(weatherType, identifiers,weatherData):

    quants_overallByInd   = weatherData.groupby(identifiers)[weatherType].quantile(q = quantiles).reset_index().\
        rename(columns=lambda x: re.sub('level_[0-9]$','quartile',x))
    # rename(columns = {'level_1': 'quartile'})
    # print(quants_overallByZip.head())
    
    quants_overallByInd['quartile'] = 'q_' + quants_overallByInd['quartile'].astype(str).str.slice(0,6)

    ## 

    pivot_quants_overallByInd = quants_overallByInd.pivot(index=identifiers, 
            columns='quartile', values=str(weatherType)).reset_index().\
            rename_axis(None, axis=1)

    pivot_quants_overallByInd['quartileList']      = pivot_quants_overallByInd.iloc[:,len(identifiers):].values.tolist()
    pivot_quants_overallByInd['quartileLabelList'] = [list(pivot_quants_overallByInd.columns[2:-1])] * \
                                                                    pivot_quants_overallByInd.shape[0]

    pivot_quants_overallByInd = pivot_quants_overallByInd[pivot_quants_overallByInd.\
                                                            columns.drop(list(pivot_quants_overallByInd.filter(regex='q_')))]

    # pivot_quants_overallByZip.head()

    return(pivot_quants_overallByInd)

In [28]:
def getQuartCounts(df, weatherType):
    weatherCut = weatherType + "Cut"
    weatherCutLabels = weatherCut + 'labels'
    
    df['month'] = df.date.astype(str).str.slice(4,6)
    
    df['quarter'] = 'q1'

    df.loc[df['month'].isin(['04','05','06']), 'quarter'] = 'q2'
    df.loc[df['month'].isin(['07','08','09']), 'quarter'] = 'q3'
    df.loc[df['month'].isin(['10','11','12']), 'quarter'] = 'q4'

    df['yearQuarter'] = df.date.astype(str).str.slice(0,4) + df.quarter

    occurrences = weatherType + "Occurrences" 

    summaryDF = df.groupby(['famafrench','yearQuarter',weatherCutLabels]).size().reset_index()
    summaryDF.columns = ['famafrench','yearQuarter',weatherCutLabels,occurrences]
    
    return(summaryDF)

Now get all the quartile data:
    - by sic overall
    - by famafrench overall
    - by sic by quarter
    - by famafrench by quarter

In [29]:
weatherByInd.head()

Unnamed: 0,zipcode,famafrench,date,temperature,precipitation,month,year,quarter,temp5Days,precip5Days
0,55344,37.0,19810101.0,0.579,0.98,1,1981,q1,15.7274,22.431
1,55344,37.0,19810102.0,-1.523,0.0,1,1981,q1,14.266,9.29
2,55344,37.0,19810103.0,-7.25,0.0,1,1981,q1,14.4948,29.891
3,55344,37.0,19810104.0,-16.907,0.0,1,1981,q1,14.7034,6.355
4,55344,37.0,19810105.0,-13.35,0.0,1,1981,q1,22.090001,8.254


In [30]:
####################
# by famafrench
pivot_temperatureQuants_ff = getPivotQuants('temperature',['famafrench'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_temperatureQuants_ff.rename(columns = {'quartileList':     'quartileListTemperature_ff'},
                                      inplace = True)

pivot_temp5DaysQuants_ff = getPivotQuants('temp5Days',['famafrench'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_temp5DaysQuants_ff.rename(columns = {'quartileList':     'quartileListTemp5Days_ff'},
                                      inplace = True)

print("here 1")

pivot_precipitationQuants_ff = getPivotQuants('precipitation',['famafrench'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_precipitationQuants_ff.rename(columns = {'quartileList':     'quartileListPrecipitation_ff'},
                                      inplace = True)

pivot_precip5DaysQuants_ff = getPivotQuants('precip5Days',['famafrench'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_precip5DaysQuants_ff.rename(columns = {'quartileList':     'quartileListPrecip5Days_ff'},
                                      inplace = True)

print("here 2")

quants_ff = pivot_temperatureQuants_ff.merge(pivot_precipitationQuants_ff).merge(pivot_temp5DaysQuants_ff).merge(pivot_precip5DaysQuants_ff)


##########################################
# famafrench - quarterly
pivot_temperatureQuants_quarterlyByFF = getPivotQuants('temperature',['famafrench','quarter'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_temperatureQuants_quarterlyByFF.rename(columns = {'quartileList':     'quartileListTemperature_quarterlyByFF'},
                                      inplace = True)
pivot_temp5DaysQuants_quarterlyByFF = getPivotQuants('temp5Days',['famafrench','quarter'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_temp5DaysQuants_quarterlyByFF.rename(columns = {'quartileList':     'quartileListTemp5Days_quarterlyByFF'},
                                      inplace = True)

print("here 3")

pivot_precipitationQuants_quarterlyByFF = getPivotQuants('precipitation',['famafrench','quarter'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_precipitationQuants_quarterlyByFF.rename(columns = {'quartileList':     'quartileListPrecipitation_quarterlyByFF'},
                                      inplace = True)
pivot_precip5DaysQuants_quarterlyByFF = getPivotQuants('precip5Days',['famafrench','quarter'],weatherByInd).\
    drop(columns = {'quartileLabelList'})
pivot_precip5DaysQuants_quarterlyByFF.rename(columns = {'quartileList':     'quartileListPrecip5Days_quarterlyByFF'},
                                      inplace = True)

print("here 4")

quants_quarterlyByFF = pivot_temperatureQuants_quarterlyByFF.merge(pivot_precipitationQuants_quarterlyByFF).merge(pivot_temp5DaysQuants_quarterlyByFF).merge(pivot_precip5DaysQuants_quarterlyByFF)


here 1
here 2
here 3
here 4


In [31]:
quants_quarterlyByFF.head()

Unnamed: 0,famafrench,quarter,quartileListTemperature_quarterlyByFF,quartileListPrecipitation_quarterlyByFF,quartileListTemp5Days_quarterlyByFF,quartileListPrecip5Days_quarterlyByFF
0,1.0,q1,"[-22.6770000457764, 1.3059999704360976, 25.545...","[0.0, 0.0, 9.144600296020512, 17.4582000732422...","[-26.544200134277453, 0.9627199608085943, 31.1...","[-4.249045559845399e-12, -2.615685446016869e-1..."
1,1.0,q2,"[-3.81800007820129, 13.4069995880127, 32.42699...","[0.0, 0.0, 6.43080020942688, 13.56745014190671...","[-19.42639999389663, 0.9615200424193059, 31.17...","[-4.277467269275803e-12, -2.6290081223123707e-..."
2,1.0,q3,"[9.61400032043457, 18.17615051269534, 35.23540...","[0.0, 0.0, 5.640699815750129, 12.2697003364563...","[-22.782600021362395, 1.0759200000761504, 31.2...","[-3.948841253986757e-12, -2.7462476737127872e-..."
3,1.0,q4,"[-20.3290004730225, 4.950449848175052, 28.5639...","[0.0, 0.0, 6.54310007095338, 13.82840037345888...","[-22.439000129699835, 0.8106200315057652, 31.3...","[-4.078515303262975e-12, -2.6858515411731787e-..."
4,2.0,q1,"[-28.3659992218018, -4.64599990844727, 22.3519...","[0.0, 0.0, 7.37599992752075, 14.4499998092651,...","[-27.081999588012785, 0.8848000371455915, 31.1...","[-4.2521541843143495e-12, -2.6219026949547697e..."


Construct a record of all the relevant quantiles by combining all of the above. Rough idea is:
    - Start with the zip-quarter data
    - Merge in the less-specific quarter information
    - For each row, put in the overall quartile information for each row

In [32]:
quantsAll                 = quants_quarterlyByFF.merge(quants_ff)


'''
quantsAll['precipitationQuants'] = [precipQuants for i in quantsAll.index]
quantsAll['temperatureQuants']   = [tempQuants   for i in quantsAll.index]



quantsAll['precip5DaysQuants']   = [precip5DaysQuants for i in quantsAll.index]
quantsAll['temp5DaysQuants']     = [temp5DaysQuants   for i in quantsAll.index]
'''


quantsAll.head()

Unnamed: 0,famafrench,quarter,quartileListTemperature_quarterlyByFF,quartileListPrecipitation_quarterlyByFF,quartileListTemp5Days_quarterlyByFF,quartileListPrecip5Days_quarterlyByFF,quartileListTemperature_ff,quartileListPrecipitation_ff,quartileListTemp5Days_ff,quartileListPrecip5Days_ff
0,1.0,q1,"[-22.6770000457764, 1.3059999704360976, 25.545...","[0.0, 0.0, 9.144600296020512, 17.4582000732422...","[-26.544200134277453, 0.9627199608085943, 31.1...","[-4.249045559845399e-12, -2.615685446016869e-1...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
1,1.0,q2,"[-3.81800007820129, 13.4069995880127, 32.42699...","[0.0, 0.0, 6.43080020942688, 13.56745014190671...","[-19.42639999389663, 0.9615200424193059, 31.17...","[-4.277467269275803e-12, -2.6290081223123707e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
2,1.0,q3,"[9.61400032043457, 18.17615051269534, 35.23540...","[0.0, 0.0, 5.640699815750129, 12.2697003364563...","[-22.782600021362395, 1.0759200000761504, 31.2...","[-3.948841253986757e-12, -2.7462476737127872e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
3,1.0,q4,"[-20.3290004730225, 4.950449848175052, 28.5639...","[0.0, 0.0, 6.54310007095338, 13.82840037345888...","[-22.439000129699835, 0.8106200315057652, 31.3...","[-4.078515303262975e-12, -2.6858515411731787e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
4,2.0,q1,"[-28.3659992218018, -4.64599990844727, 22.3519...","[0.0, 0.0, 7.37599992752075, 14.4499998092651,...","[-27.081999588012785, 0.8848000371455915, 31.1...","[-4.2521541843143495e-12, -2.6219026949547697e...","[-28.3659992218018, 0.189999997615814, 32.1080...","[0.0, 0.0, 7.72100019454956, 15.28299999809264...","[-27.081999588012785, 0.9122300355134437, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."


In [33]:
# relevantZips = allCustomerData.zipcode.append(allSupplierData.zipcode).unique()
outfile =  '../../data/companyData/quantsAll_byFF.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(quantsAll, pickle_file)

In [34]:
del weatherByInd
del weatherData
gc.collect()

114

### Convert 2009-2018 data into quartiles

In [4]:
quantsAll = pkl.load(open('../../data/companyData/quantsAll_byFF.pkl','rb'))
quantsAll.head()

Unnamed: 0,famafrench,quarter,quartileListTemperature_quarterlyByFF,quartileListPrecipitation_quarterlyByFF,quartileListTemp5Days_quarterlyByFF,quartileListPrecip5Days_quarterlyByFF,quartileListTemperature_ff,quartileListPrecipitation_ff,quartileListTemp5Days_ff,quartileListPrecip5Days_ff
0,1.0,q1,"[-22.6770000457764, 1.3059999704360976, 25.545...","[0.0, 0.0, 9.144600296020512, 17.4582000732422...","[-26.544200134277453, 0.9627199608085943, 31.1...","[-4.249045559845399e-12, -2.615685446016869e-1...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
1,1.0,q2,"[-3.81800007820129, 13.4069995880127, 32.42699...","[0.0, 0.0, 6.43080020942688, 13.56745014190671...","[-19.42639999389663, 0.9615200424193059, 31.17...","[-4.277467269275803e-12, -2.6290081223123707e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
2,1.0,q3,"[9.61400032043457, 18.17615051269534, 35.23540...","[0.0, 0.0, 5.640699815750129, 12.2697003364563...","[-22.782600021362395, 1.0759200000761504, 31.2...","[-3.948841253986757e-12, -2.7462476737127872e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
3,1.0,q4,"[-20.3290004730225, 4.950449848175052, 28.5639...","[0.0, 0.0, 6.54310007095338, 13.82840037345888...","[-22.439000129699835, 0.8106200315057652, 31.3...","[-4.078515303262975e-12, -2.6858515411731787e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
4,2.0,q1,"[-28.3659992218018, -4.64599990844727, 22.3519...","[0.0, 0.0, 7.37599992752075, 14.4499998092651,...","[-27.081999588012785, 0.8848000371455915, 31.1...","[-4.2521541843143495e-12, -2.6219026949547697e...","[-28.3659992218018, 0.189999997615814, 32.1080...","[0.0, 0.0, 7.72100019454956, 15.28299999809264...","[-27.081999588012785, 0.9122300355134437, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."


In [5]:
recentDecadeTmax   = getData("Tmax",2009,2019)
recentDecadePrecip = getData("Precip",2009,2019)

In [6]:
recentDecadeWeather = recentDecadeTmax.merge(recentDecadePrecip)
recentDecadeWeather.head()

Unnamed: 0,ZIP,date,temperature,precipitation
0,1001,20090101.0,-2.941,4.491
1,1013,20090101.0,-3.683,4.403
2,1085,20090101.0,-4.804,4.008
3,1089,20090101.0,-3.892,3.613
4,1095,20090101.0,-4.128,5.737


In [8]:
recentDecadeWeather['month'] = recentDecadeWeather.date.astype(str).str.slice(4,6)

recentDecadeWeather['quarter'] = 'q1'

recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['04','05','06']), 'quarter'] = 'q2'
recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['07','08','09']), 'quarter'] = 'q3'
recentDecadeWeather.loc[recentDecadeWeather['month'].isin(['10','11','12']), 'quarter'] = 'q4'

recentDecadeWeather['yearQuarter'] = recentDecadeWeather.date.astype(str).str.slice(0,4) + recentDecadeWeather.quarter
recentDecadeWeather.head()

Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter
0,1001,20090101.0,-2.941,4.491,1,q1,2009q1
1,1013,20090101.0,-3.683,4.403,1,q1,2009q1
2,1085,20090101.0,-4.804,4.008,1,q1,2009q1
3,1089,20090101.0,-3.892,3.613,1,q1,2009q1
4,1095,20090101.0,-4.128,5.737,1,q1,2009q1


In [9]:
'''recentDecadeWeather['temp5Days']   = recentDecadeWeather.groupby('ZIP').rolling(5)['temperature'].mean().reset_index(drop=True)
recentDecadeWeather['precip5Days'] = recentDecadeWeather.groupby('ZIP').rolling(5)['precipitation'].sum().reset_index(drop=True)'''

"recentDecadeWeather['temp5Days']   = recentDecadeWeather.groupby('ZIP').rolling(5)['temperature'].mean().reset_index(drop=True)\nrecentDecadeWeather['precip5Days'] = recentDecadeWeather.groupby('ZIP').rolling(5)['precipitation'].sum().reset_index(drop=True)"

In [10]:
recentDecadeWeather[recentDecadeWeather.yearQuarter == '2010q1'].head()

Unnamed: 0,ZIP,date,temperature,precipitation,month,quarter,yearQuarter
1530445,1001,20100101.0,-0.76,3.408,1,q1,2010q1
1530446,1013,20100101.0,-0.814,3.216,1,q1,2010q1
1530447,1085,20100101.0,-1.217,3.864,1,q1,2010q1
1530448,1089,20100101.0,-1.172,3.066,1,q1,2010q1
1530449,1095,20100101.0,-1.482,3.075,1,q1,2010q1


In [11]:
del recentDecadePrecip
del recentDecadeTmax
gc.collect()

60

Merge the intervals into the recent weather data.

In [12]:
quantsAll.columns

Index(['famafrench', 'quarter', 'quartileListTemperature_quarterlyByFF',
       'quartileListPrecipitation_quarterlyByFF',
       'quartileListTemp5Days_quarterlyByFF',
       'quartileListPrecip5Days_quarterlyByFF', 'quartileListTemperature_ff',
       'quartileListPrecipitation_ff', 'quartileListTemp5Days_ff',
       'quartileListPrecip5Days_ff'],
      dtype='object')

In [13]:
quantsAll.head()

Unnamed: 0,famafrench,quarter,quartileListTemperature_quarterlyByFF,quartileListPrecipitation_quarterlyByFF,quartileListTemp5Days_quarterlyByFF,quartileListPrecip5Days_quarterlyByFF,quartileListTemperature_ff,quartileListPrecipitation_ff,quartileListTemp5Days_ff,quartileListPrecip5Days_ff
0,1.0,q1,"[-22.6770000457764, 1.3059999704360976, 25.545...","[0.0, 0.0, 9.144600296020512, 17.4582000732422...","[-26.544200134277453, 0.9627199608085943, 31.1...","[-4.249045559845399e-12, -2.615685446016869e-1...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
1,1.0,q2,"[-3.81800007820129, 13.4069995880127, 32.42699...","[0.0, 0.0, 6.43080020942688, 13.56745014190671...","[-19.42639999389663, 0.9615200424193059, 31.17...","[-4.277467269275803e-12, -2.6290081223123707e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
2,1.0,q3,"[9.61400032043457, 18.17615051269534, 35.23540...","[0.0, 0.0, 5.640699815750129, 12.2697003364563...","[-22.782600021362395, 1.0759200000761504, 31.2...","[-3.948841253986757e-12, -2.7462476737127872e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
3,1.0,q4,"[-20.3290004730225, 4.950449848175052, 28.5639...","[0.0, 0.0, 6.54310007095338, 13.82840037345888...","[-22.439000129699835, 0.8106200315057652, 31.3...","[-4.078515303262975e-12, -2.6858515411731787e-...","[-22.6770000457764, 6.26999998092651, 32.60200...","[0.0, 0.0, 6.92799997329712, 14.2959995269775,...","[-26.544200134277453, 0.9506699970362625, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."
4,2.0,q1,"[-28.3659992218018, -4.64599990844727, 22.3519...","[0.0, 0.0, 7.37599992752075, 14.4499998092651,...","[-27.081999588012785, 0.8848000371455915, 31.1...","[-4.2521541843143495e-12, -2.6219026949547697e...","[-28.3659992218018, 0.189999997615814, 32.1080...","[0.0, 0.0, 7.72100019454956, 15.28299999809264...","[-27.081999588012785, 0.9122300355134437, 31.2...","[-4.277467269275803e-12, -2.6716406864579767e-..."


In [14]:
recentDecadeWeather.columns

Index(['ZIP', 'date', 'temperature', 'precipitation', 'month', 'quarter',
       'yearQuarter'],
      dtype='object')

In [16]:
indData = pd.read_csv("../../data/companyData/igData.csv")[['zipcode','famafrench']].drop_duplicates().reset_index(drop = True)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [17]:
recentDecadeWeather.rename(columns = {'ZIP': 'zipcode'}, inplace = True)

recentDecadeWeatherInd = indData.merge(recentDecadeWeather).merge(quantsAll)

print(recentDecadeWeatherInd.shape,indData.shape,recentDecadeWeather.shape)

(25463763, 16) (6505, 2) (16843281, 7)


In [18]:
recentDecadeWeatherInd.tail()

Unnamed: 0,zipcode,famafrench,date,temperature,precipitation,month,quarter,yearQuarter,quartileListTemperature_quarterlyByFF,quartileListPrecipitation_quarterlyByFF,quartileListTemp5Days_quarterlyByFF,quartileListPrecip5Days_quarterlyByFF,quartileListTemperature_ff,quartileListPrecipitation_ff,quartileListTemp5Days_ff,quartileListPrecip5Days_ff
25463758,40229,5.0,20191227.0,17.162001,0.0,12,q4,2019q4,"[-17.5639991760254, 2.027299964427952, 23.9920...","[0.0, 0.0, 9.669799613952641, 18.6808498382568...","[-18.19220037460341, 0.7210599902270798, 31.51...","[-4.078515303262975e-12, -2.7000623958883807e-...","[-20.5, 2.0876999258995084, 31.2759990692139, ...","[0.0, 0.0, 10.653599834442156, 18.594600105285...","[-20.523999595642152, 0.8825799900292406, 31.2...","[-4.263256414560601e-12, -2.6645352591003757e-..."
25463759,40229,5.0,20191228.0,16.198,0.0,12,q4,2019q4,"[-17.5639991760254, 2.027299964427952, 23.9920...","[0.0, 0.0, 9.669799613952641, 18.6808498382568...","[-18.19220037460341, 0.7210599902270798, 31.51...","[-4.078515303262975e-12, -2.7000623958883807e-...","[-20.5, 2.0876999258995084, 31.2759990692139, ...","[0.0, 0.0, 10.653599834442156, 18.594600105285...","[-20.523999595642152, 0.8825799900292406, 31.2...","[-4.263256414560601e-12, -2.6645352591003757e-..."
25463760,40229,5.0,20191229.0,17.274,4.341,12,q4,2019q4,"[-17.5639991760254, 2.027299964427952, 23.9920...","[0.0, 0.0, 9.669799613952641, 18.6808498382568...","[-18.19220037460341, 0.7210599902270798, 31.51...","[-4.078515303262975e-12, -2.7000623958883807e-...","[-20.5, 2.0876999258995084, 31.2759990692139, ...","[0.0, 0.0, 10.653599834442156, 18.594600105285...","[-20.523999595642152, 0.8825799900292406, 31.2...","[-4.263256414560601e-12, -2.6645352591003757e-..."
25463761,40229,5.0,20191230.0,17.153999,42.325001,12,q4,2019q4,"[-17.5639991760254, 2.027299964427952, 23.9920...","[0.0, 0.0, 9.669799613952641, 18.6808498382568...","[-18.19220037460341, 0.7210599902270798, 31.51...","[-4.078515303262975e-12, -2.7000623958883807e-...","[-20.5, 2.0876999258995084, 31.2759990692139, ...","[0.0, 0.0, 10.653599834442156, 18.594600105285...","[-20.523999595642152, 0.8825799900292406, 31.2...","[-4.263256414560601e-12, -2.6645352591003757e-..."
25463762,40229,5.0,20191231.0,7.277,0.214,12,q4,2019q4,"[-17.5639991760254, 2.027299964427952, 23.9920...","[0.0, 0.0, 9.669799613952641, 18.6808498382568...","[-18.19220037460341, 0.7210599902270798, 31.51...","[-4.078515303262975e-12, -2.7000623958883807e-...","[-20.5, 2.0876999258995084, 31.2759990692139, ...","[0.0, 0.0, 10.653599834442156, 18.594600105285...","[-20.523999595642152, 0.8825799900292406, 31.2...","[-4.263256414560601e-12, -2.6645352591003757e-..."


Now do the counts by industry & zipcode. For some reason the filtering operations are like 10x faster with making the industry and quarter out to be categories.

In [19]:
recentDecadeWeatherInd = recentDecadeWeatherInd.astype({'famafrench': 'category', 'quarter': 'category'})

In [20]:
recentDecadeWeatherInd.quartileListPrecip5Days_ff[0]

[-4.284572696633404e-12,
 -2.673417043297377e-12,
 37.7140007019029,
 52.55999925136538,
 88.72959926386928,
 133.09732992401098,
 141.2783866303071,
 168.68262524566393,
 201.5880013632751,
 222.83625883790347,
 500.3759973049145]

In [21]:
recentDecadeWeatherInd.quartileListPrecipitation_ff[0][1:]

[0.0,
 6.98099994659424,
 14.1389999389648,
 32.33700819702155,
 52.63107807617134,
 55.73004151916514,
 67.18943485870463,
 80.59719833374095,
 93.00997969514263,
 245.828002929688]

In [22]:
def getFFQuarts(ff):
    
    tempData = recentDecadeWeatherInd[recentDecadeWeatherInd.famafrench == ff].reset_index()
    
    if tempData.shape[0] > 0:


        for i in range(0,len(tempData.quartileListPrecip5Days_ff[0])):
                tempData.quartileListPrecipitation_ff[0][i] = tempData.quartileListPrecipitation_ff[0][i] + i/10000
                tempData.quartileListPrecip5Days_ff[0][i]   = tempData.quartileListPrecip5Days_ff[0][i] + i/10000


        tempData['temp_ffQuants'] = pd.cut(tempData.temperature, 
               bins = tempData.quartileListTemperature_ff[0],
               labels = quant_labels,include_lowest=True)
        '''tempData['temp5Days_ffQuants'] = pd.cut(tempData.temp5Days, 
               bins = tempData.quartileListTemp5Days_ff[0],
               labels = quant_labels,include_lowest=True)'''


        tempData['precip_ffQuants'] = pd.cut(tempData.precipitation, 
               bins = tempData.quartileListPrecipitation_ff[0],
               labels = quant_labels,include_lowest=True)
        '''tempData['precip5Days_ffQuants'] = pd.cut(tempData.precip5Days, 
               bins = tempData.quartileListPrecip5Days_ff[0], 
               labels = quant_labels,include_lowest=True)'''
        
    
    
    return(tempData)

In [23]:
def getIndQuarterQuarts(ff):

    weatherByFFByQuarter = pd.DataFrame()

    quarters = recentDecadeWeatherInd.quarter.unique()
    

    for quarter in quarters:
        tempData = recentDecadeWeatherInd[(recentDecadeWeatherInd.famafrench == ff) & 
                        (recentDecadeWeatherInd.quarter == quarter)].reset_index()
        if tempData.shape[0] > 0:

            for i in range(0,len(tempData.quartileListPrecip5Days_quarterlyByFF[0])):
                tempData.quartileListPrecipitation_quarterlyByFF[0][i] = tempData.quartileListPrecipitation_quarterlyByFF[0][i] + i/10000
                tempData.quartileListPrecip5Days_quarterlyByFF[0][i]   = tempData.quartileListPrecip5Days_quarterlyByFF[0][i] + i/10000
                tempData.quartileListTemperature_quarterlyByFF[0][i] = tempData.quartileListTemperature_quarterlyByFF[0][i] + i/10000
                tempData.quartileListTemp5Days_quarterlyByFF[0][i]   = tempData.quartileListTemp5Days_quarterlyByFF[0][i] + i/10000


            tempData['temp_indQuarterQuants'] = pd.cut(tempData.temperature, 
                   bins = tempData.quartileListTemperature_quarterlyByFF[0],
                   labels = quant_labels,include_lowest=True)
            '''tempData['temp5Days_indQuarterQuants'] = pd.cut(tempData.temp5Days, 
                   bins = tempData.quartileListTemp5Days_quarterlyByFF[0],
                   labels = quant_labels,include_lowest=True)'''


            tempData['precip_indQuarterQuants'] = pd.cut(tempData.precipitation, 
                   bins = tempData.quartileListPrecipitation_quarterlyByFF[0],
                   labels = quant_labels,include_lowest=True)
            '''tempData['precip5Days_indQuarterQuants'] = pd.cut(tempData.precip5Days, 
                   bins = tempData.quartileListPrecip5Days_quarterlyByFF[0], 
                   labels = quant_labels,include_lowest=True) # '''


            weatherByFFByQuarter = weatherByFFByQuarter.append(tempData)

    return(weatherByFFByQuarter)

In [24]:
inds = recentDecadeWeatherInd.famafrench.unique()
len(inds)

43

In [27]:
start = time.time()




with multiprocessing.Pool() as pool:
    indQuarts = pool.map(getFFQuarts, inds)

    
    
weatherByInd = pd.concat(indQuarts)


print(time.time() - start)

224.7965407371521


In [32]:
weatherByInd['yearQuarter']        = weatherByInd.date.astype('str').str.slice(0,4) + weatherByInd.quarter.astype('str')


In [33]:
outfile =  '../../data/companyData/weatherByInd.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(weatherByInd, pickle_file)

In [None]:
sum(weatherByInd.temp_ffQuants.isna())

In [29]:
start = time.time()



with multiprocessing.Pool() as pool:
    indQuarterQuarts = pool.map(getIndQuarterQuarts, inds)

    
    
weatherByIndQuarter = pd.concat(indQuarterQuarts)


print(time.time() - start)

218.5228579044342


In [30]:
weatherByIndQuarter['yearQuarter'] = weatherByIndQuarter.date.astype('str').str.slice(0,4) + weatherByIndQuarter.quarter.astype('str')

In [31]:
outfile =  '../../data/companyData/weatherByIndQuarter.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(weatherByIndQuarter, pickle_file)


In [None]:
file   = open('../../data/companyData/weatherByIndQuarter.pkl',"rb")
weatherByIndQuarter = pkl.load(file)

file   = open('../../data/companyData/weatherByInd.pkl',"rb")
weatherByInd = pkl.load(file)

In [None]:
weatherByInd.head()

In [None]:
'''tempData['temp_ffQuants'] = pd.cut(tempData.temperature, 
       bins = tempData.quartileListTemperature_ff[0],
       labels = quant_labels,include_lowest=True)
tempData['temp5Days_ffQuants'] = pd.cut(tempData.temp5Days, 
       bins = tempData.quartileListTemp5Days_ff[0],
       labels = quant_labels,include_lowest=True)


tempData['precip_ffQuants'] = pd.cut(tempData.precipitation, 
       bins = tempData.quartileListPrecipitation_ff[0],
       labels = quant_labels,include_lowest=True)
tempData['precip5Days_ffQuants'] = pd.cut(tempData.precip5Days, 
       bins = tempData.quartileListPrecip5Days_ff[0], 
       labels = quant_labels,include_lowest=True)'''

In [34]:
pt1 = weatherByInd[['famafrench','zipcode','date','quarter','yearQuarter',
              'temp_ffQuants', 'precip_ffQuants']]

pt2 = weatherByIndQuarter[['famafrench','zipcode','date','quarter','yearQuarter',
                           'temp_indQuarterQuants','precip_indQuarterQuants']]


In [35]:
allQuarts = pt1.merge(pt2)

In [36]:
outfile =  '../../data/companyData/allQuarts_byInd.pkl'
with open(outfile, 'wb') as pickle_file:
    pkl.dump(allQuarts, pickle_file)

In [37]:
del allQuarts
gc.collect()

231

------------------

### Now go through the data from 2010 - 2019 and find time spent in each bin

In [38]:
def getCounts(quartType,df):

    test = df.groupby(['famafrench', 'zipcode', 'yearQuarter', quartType]).size().reset_index()
    test = test.drop_duplicates()
    test.columns = ['famafrench', 'zipcode', 'yearQuarter', quartType, 'occurrences']

    # print(test.head())


    testPivot = test.pivot(index=['famafrench','zipcode','yearQuarter'], 
            columns = quartType, values= 'occurrences').reset_index().\
            rename_axis(None, axis=1)

    testPivot.columns.values[3:] = quartType[:-6] + testPivot.columns.values[3:]

    # print(testPivot.head())
    return(testPivot)

In [39]:
pt2.head()

Unnamed: 0,famafrench,zipcode,date,quarter,yearQuarter,temp_indQuarterQuants,precip_indQuarterQuants
0,36.0,55344,20090101.0,q1,2009q1,quant_0.05,quant_0.05
1,36.0,55344,20090102.0,q1,2009q1,quant_tossThisOne,quant_0.05
2,36.0,55344,20090103.0,q1,2009q1,quant_0.05,quant_0.05
3,36.0,55344,20090104.0,q1,2009q1,quant_0.05,quant_tossThisOne
4,36.0,55344,20090105.0,q1,2009q1,quant_0.05,quant_0.05


In [40]:
countData_pt1 = getCounts(pt1.columns[5],pt1)
for quart in pt1.columns[6:]:
    print(quart)
    countData_pt1 = countData_pt1.merge(getCounts(quart,pt1))
    
countData_pt2 = getCounts(pt2.columns[5],pt2)
for quart in pt2.columns[6:]:
    print(quart)
    countData_pt2 = countData_pt2.merge(getCounts(quart,pt2))

precip_ffQuants
precip_indQuarterQuants


In [41]:
countData = countData_pt1.merge(countData_pt2)

Reformat the column names so they're consistent.

In [42]:
countData.head()

Unnamed: 0,famafrench,zipcode,yearQuarter,temp_ffquant_0.05,temp_ffquant_tossThisOne,temp_ffquant_0.95,temp_ffquant_1xQtr,temp_ffquant_1xYr,temp_ffquant_1x5Qtrs,temp_ffquant_1x10Qtrs,...,precip_indQuarterquant_0.05,precip_indQuarterquant_tossThisOne,precip_indQuarterquant_0.95,precip_indQuarterquant_1xQtr,precip_indQuarterquant_1xYr,precip_indQuarterquant_1x5Qtrs,precip_indQuarterquant_1x10Qtrs,precip_indQuarterquant_1x5Yrs,precip_indQuarterquant_1x10Yrs,precip_indQuarterquant_1.0
0,1.0,1013,2009q1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1.0,1013,2009q2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1.0,1013,2009q3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1.0,1013,2009q4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1.0,1013,2010q1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [43]:
countData.to_csv("../../data/companyData/allWeatherBins_2009to2019_byInd.csv")

In [44]:
countData = pd.read_csv("../../data/companyData/allWeatherBins_2009to2019_byInd.csv").drop(columns = 'Unnamed: 0')

print(countData.head())

countDataRevised = countData[['famafrench','zipcode','yearQuarter']]

In [46]:

cdf = {}


cdf['0.95']     = ['1xQtr','1xYr','1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1xQtr']    = ['1xYr','1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1xYr']     = ['1x5Qtrs','1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1x5Qtrs']  = ['1x10Qtrs','1x5Yrs','1x10Yrs','1.0']
cdf['1x10Qtrs'] = ['1x5Yrs','1x10Yrs','1.0']
cdf['1x5Yrs']   = ['1x10Yrs','1.0']
cdf['1x10Yrs']  = ['1.0']


weatherVars = ['temp_','precip_']
statVars    = ['ffquant_','indQuarterquant_']


for weatherVar in weatherVars:
    for statVar in statVars:
        print(weatherVar + statVar, "*************************")
        
        # 0.95
        for cutoff in list(cdf.keys()):
            
            varHere = weatherVar + statVar + cutoff
            countDataRevised[varHere] = countData[varHere]
            print(cutoff, "******")
            for greaterThanCutoffs in cdf[cutoff]:
                varCDF = weatherVar + statVar + greaterThanCutoffs 
                countDataRevised[varHere] = countDataRevised[varHere] + countData[varCDF] 
                print(weatherVar + statVar + greaterThanCutoffs)
 

temp_ffquant_ *************************
0.95 ******
temp_ffquant_1xQtr
temp_ffquant_1xYr
temp_ffquant_1x5Qtrs
temp_ffquant_1x10Qtrs
temp_ffquant_1x5Yrs
temp_ffquant_1x10Yrs
temp_ffquant_1.0
1xQtr ******
temp_ffquant_1xYr
temp_ffquant_1x5Qtrs
temp_ffquant_1x10Qtrs
temp_ffquant_1x5Yrs
temp_ffquant_1x10Yrs
temp_ffquant_1.0
1xYr ******
temp_ffquant_1x5Qtrs
temp_ffquant_1x10Qtrs
temp_ffquant_1x5Yrs
temp_ffquant_1x10Yrs
temp_ffquant_1.0
1x5Qtrs ******
temp_ffquant_1x10Qtrs
temp_ffquant_1x5Yrs
temp_ffquant_1x10Yrs
temp_ffquant_1.0
1x10Qtrs ******
temp_ffquant_1x5Yrs
temp_ffquant_1x10Yrs


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


temp_ffquant_1.0
1x5Yrs ******
temp_ffquant_1x10Yrs
temp_ffquant_1.0
1x10Yrs ******
temp_ffquant_1.0
temp_indQuarterquant_ *************************
0.95 ******
temp_indQuarterquant_1xQtr
temp_indQuarterquant_1xYr
temp_indQuarterquant_1x5Qtrs
temp_indQuarterquant_1x10Qtrs
temp_indQuarterquant_1x5Yrs
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1xQtr ******
temp_indQuarterquant_1xYr
temp_indQuarterquant_1x5Qtrs
temp_indQuarterquant_1x10Qtrs
temp_indQuarterquant_1x5Yrs
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1xYr ******
temp_indQuarterquant_1x5Qtrs
temp_indQuarterquant_1x10Qtrs
temp_indQuarterquant_1x5Yrs
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1x5Qtrs ******
temp_indQuarterquant_1x10Qtrs
temp_indQuarterquant_1x5Yrs
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1x10Qtrs ******
temp_indQuarterquant_1x5Yrs
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1x5Yrs ******
temp_indQuarterquant_1x10Yrs
temp_indQuarterquant_1.0
1x10Yrs ******
temp

In [16]:
countData[['precip_ffquant_0.95','precip_ffquant_1xQtr','precip_ffquant_1xYr','precip_ffquant_1x5Qtrs',
                 'precip_ffquant_1x10Qtrs','precip_ffquant_1x5Yrs','precip_ffquant_1x10Yrs']]

Unnamed: 0,precip_ffquant_0.95,precip_ffquant_1xQtr,precip_ffquant_1xYr,precip_ffquant_1x5Qtrs,precip_ffquant_1x10Qtrs,precip_ffquant_1x5Yrs,precip_ffquant_1x10Yrs
0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...
6540859,0,0,0,0,0,0,0
6540860,0,0,0,0,0,0,0
6540861,0,0,0,0,0,0,0
6540862,0,0,0,0,0,0,0


In [17]:
countDataRevised['precip_ffquant_1x5Qtrs'].describe()

count    6.540864e+06
mean     1.502233e-02
std      1.655263e-01
min      0.000000e+00
25%      0.000000e+00
50%      0.000000e+00
75%      0.000000e+00
max      1.300000e+01
Name: precip_ffquant_1x5Qtrs, dtype: float64

In [18]:
countDataRevised[['precip_ffquant_0.95','precip_ffquant_1xQtr','precip_ffquant_1xYr','precip_ffquant_1x5Qtrs',
                 'precip_ffquant_1x10Qtrs','precip_ffquant_1x5Yrs','precip_ffquant_1x10Yrs']]

Unnamed: 0,precip_ffquant_0.95,precip_ffquant_1xQtr,precip_ffquant_1xYr,precip_ffquant_1x5Qtrs,precip_ffquant_1x10Qtrs,precip_ffquant_1x5Yrs,precip_ffquant_1x10Yrs
0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...
6540859,0,0,0,0,0,0,0
6540860,0,0,0,0,0,0,0
6540861,0,0,0,0,0,0,0
6540862,0,0,0,0,0,0,0


In [None]:
countData.tail()

In [47]:
countDataRevised.to_csv("../../data/companyData/revised_allWeatherBins_2009to2019_byInd.csv")

In [4]:
countDataRevised = pd.read_csv("../../data/companyData/revised_allWeatherBins_2009to2019_byInd.csv")

In [5]:
countDataRevised.columns

Index(['Unnamed: 0', 'famafrench', 'yearQuarter', 'temp_ffquant_0.95',
       'temp_ffquant_1xQtr', 'temp_ffquant_1xYr', 'temp_ffquant_1x5Qtrs',
       'temp_ffquant_1x10Qtrs', 'temp_ffquant_1x5Yrs', 'temp_ffquant_1x10Yrs',
       'temp_indQuarterquant_0.95', 'temp_indQuarterquant_1xQtr',
       'temp_indQuarterquant_1xYr', 'temp_indQuarterquant_1x5Qtrs',
       'temp_indQuarterquant_1x10Qtrs', 'temp_indQuarterquant_1x5Yrs',
       'temp_indQuarterquant_1x10Yrs', 'temp5Days_ffquant_0.95',
       'temp5Days_ffquant_1xQtr', 'temp5Days_ffquant_1xYr',
       'temp5Days_ffquant_1x5Qtrs', 'temp5Days_ffquant_1x10Qtrs',
       'temp5Days_ffquant_1x5Yrs', 'temp5Days_ffquant_1x10Yrs',
       'temp5Days_indQuarterquant_0.95', 'temp5Days_indQuarterquant_1xQtr',
       'temp5Days_indQuarterquant_1xYr', 'temp5Days_indQuarterquant_1x5Qtrs',
       'temp5Days_indQuarterquant_1x10Qtrs',
       'temp5Days_indQuarterquant_1x5Yrs', 'temp5Days_indQuarterquant_1x10Yrs',
       'precip_ffquant_0.95', 'precip

In [None]:
countDataRevised.shape