In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymannkendall as mk
plt.rcParams['figure.figsize']=(12,8)

In [2]:
def replace_string(data, column):
    k_columns = data[data[column].str.contains("K")]
    k_columns = k_columns.loc[k_columns[column]!='K']
    k_columns[column] = k_columns[column].str.strip('K').astype('float') * 1000
    m_columns = data[data[column].str.contains("M")]
    m_columns = m_columns.loc[m_columns[column]!='M']
    m_columns[column] = m_columns[column].str.strip('M').astype('float') * 1e6
    b_columns = data[data[column].str.contains("B")]
    b_columns = b_columns.loc[b_columns[column]!='B']
    b_columns[column] = b_columns[column].str.strip('B').astype('float') * 1e9
    columns = pd.concat([k_columns, m_columns, b_columns], axis=0)
    
    columns = columns.sort_index()
    
    return columns

In [3]:
%%time
storm_events = []
init_path = '/Users/allanbellahsene/Desktop/THESIS/DATA/NCFEI/StormEvents_'
for years in list(np.arange(1962,2020)):
    path = init_path + str(years) + '.csv'
    storm_data = pd.read_csv(path, usecols=['BEGIN_DAY', 'MONTH_NAME', 'YEAR', 'STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'EVENT_TYPE', 'EPISODE_ID', 'EVENT_ID', 'DEATHS_DIRECT', 'DEATHS_INDIRECT'])
    storm_data.index = pd.to_datetime(storm_data['BEGIN_DAY'].astype('str')+ '-' + storm_data['MONTH_NAME'] + '-' + storm_data.YEAR.astype('str'))
    storm_data = storm_data[['STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'EVENT_TYPE', 'EVENT_ID', 'EPISODE_ID']] #keep only relevant columns
    storm_data.dropna(inplace=True) #drop missing values
    storm_data.sort_index(inplace=True) #sort data by date
    storm_events.append(storm_data)
    #print(years)
STORMS = pd.concat([storm_events[0], storm_events[1]], axis=0)
for i in range(2, len(storm_events)):
    STORMS = pd.concat([STORMS, storm_events[i]], axis=0)
print(STORMS.shape)
df = STORMS
df['DAMAGE_PROPERTY'] = df['DAMAGE_PROPERTY'].astype('str')
df['DAMAGE_CROPS'] = df['DAMAGE_CROPS'].astype('str')
STORMS = df
STORMS = STORMS.loc[((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS =='0')) | ((STORMS.DAMAGE_PROPERTY=='0') & (STORMS.DAMAGE_CROPS !='0')) | ((STORMS.DAMAGE_PROPERTY!='0') & (STORMS.DAMAGE_CROPS !='0'))]
STORMS = STORMS.loc[STORMS.DAMAGE_PROPERTY!='K'] #delete rows that only contain "K"
STORMS = STORMS.loc[STORMS.DAMAGE_CROPS!='K']
STORMS = replace_string(STORMS, 'DAMAGE_PROPERTY')
STORMS = replace_string(STORMS, 'DAMAGE_CROPS')
STORMS['TOTAL_DAMAGES'] = STORMS['DAMAGE_PROPERTY'] + STORMS['DAMAGE_CROPS']
STORMS['Date'] = STORMS.index
STORMS = STORMS[['Date','STATE', 'DAMAGE_PROPERTY', 'DAMAGE_CROPS', 'TOTAL_DAMAGES', 'DEATHS_DIRECT', 'DEATHS_INDIRECT', 'EVENT_TYPE', 'EVENT_ID', 'EPISODE_ID']]
df = STORMS.groupby(['STATE', 'Date']).agg({'TOTAL_DAMAGES':sum})
STORMS = STORMS.loc[STORMS['TOTAL_DAMAGES']!=0]
States = list(STORMS.STATE.unique())

(708372, 8)
CPU times: user 18.8 s, sys: 2.4 s, total: 21.2 s
Wall time: 22.1 s


# I. Ranking federal states per climate-risk exposure

In [331]:
clim_data = STORMS.loc[(STORMS.index>='1996-01-01') & (STORMS.STATE=='FLORIDA')]

In [333]:
clim_data = clim_data[['STATE', 'TOTAL_DAMAGES']]

In [334]:
clim_data

Unnamed: 0,STATE,TOTAL_DAMAGES
1996-01-02,FLORIDA,1500.0
1996-01-02,FLORIDA,5000.0
1996-01-02,FLORIDA,2000.0
1996-01-02,FLORIDA,1500.0
1996-01-03,FLORIDA,100000.0
...,...,...
2019-10-19,FLORIDA,1000.0
2019-10-22,FLORIDA,1000.0
2019-10-22,FLORIDA,1500.0
2019-10-29,FLORIDA,3000.0


In [4]:
def seasonal_trend_ranking(data, type_damage, type_disaster, min_date, max_date):
    import pymannkendall as mk
    import pandas as pd
    import numpy as np

    States = list(data.STATE.unique())
    data_ = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    
    if type_disaster == 'All':
        data_ =  data_
    else:
        data_ = data_.loc[data_.EVENT_TYPE==type_disaster]
    
    ranking = []
    for state in States:
        df = data_.loc[data_.STATE==state]
        df = df.resample('M', on='Date').sum()
        if len(df) < 10:
            pass
        else:
            df = df[type_damage]
            trend,h,p,z,Tau,s,var_s,slope,intercept = mk.seasonal_test(df, period=12)
        #sen_slope = scipy.stats.mstats.theilslopes(df)[0]
            sen_slope = mk.seasonal_sens_slope(df.values, period=12)[0]
            ranking.extend((state, trend, sen_slope, Tau, p))
    n = int(len(ranking)/5)
    ranking = np.array(ranking).reshape((n, 5))
    ranking = pd.DataFrame(data=ranking, columns=['State', 'Trend', 'Sen Slope', 'Kendall Tau', 'P-value'])
    for columns in ['Sen Slope', 'Kendall Tau', 'P-value']:
        ranking[columns] = ranking[columns].astype('float')
    ranking.sort_values(by='Sen Slope', inplace=True, ascending=False)
    ranking.reset_index(inplace=True)
    ranking.drop(columns=['index'], inplace=True)
    
    ranking = ranking.loc[ranking['P-value']<0.05]
    
    return ranking
    

In [5]:
def trend_ranking(data, type_damage, type_disaster, min_date, max_date, frequency):
    import pymannkendall as mk
    import pandas as pd
    import numpy as np
    import scipy
    States = list(data.STATE.unique())
    data_ = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    
    if type_disaster == 'All':
        data_ =  data_
    else:
        data_ = data_.loc[data_.EVENT_TYPE==type_disaster]
    
    ranking = []
    
    for state in States:
        df = data_.loc[data_.STATE==state]
        df = df.resample(frequency, on='Date').sum()
        if len(df) < 2:
            pass
        else:
            df = df[type_damage]
            trend,h,p,z,Tau,s,var_s,slope,intercept = mk.original_test(df)
            sen_slope = scipy.stats.mstats.theilslopes(df)[0]
            ranking.extend((state, trend, sen_slope, Tau, p))
    n = int(len(ranking)/5)
    ranking = np.array(ranking).reshape((n, 5))
    ranking = pd.DataFrame(data=ranking, columns=['State', 'Trend', 'Sen Slope', 'Kendall Tau', 'P-value'])
    for columns in ['Sen Slope', 'Kendall Tau', 'P-value']:
        ranking[columns] = ranking[columns].astype('float')
    ranking.sort_values(by='Sen Slope', inplace=True, ascending=False)
    ranking.reset_index(inplace=True)
    ranking.drop(columns=['index'], inplace=True)
    
    return ranking


In [6]:
def VaR_ranking(data, type_damage, type_disaster, min_date, max_date, alpha):
    
    import pandas as pd
    import numpy as np
    
    States = list(data.STATE.unique())
    data_ = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    
    if type_disaster == 'All':
        data_ =  data_
    else:
        data_ = data_.loc[data_.EVENT_TYPE==type_disaster]
    
    ranking = []
    
    for state in States:
        df = data_.loc[data_.STATE==state][type_damage]
        #if len(df) < 2:
         #   pass
        #else:
        VaR = df.quantile(alpha)
        ranking.extend((state, VaR, len(df)))
    n = int(len(ranking)/3)
    ranking = np.array(ranking).reshape((n, 3))
    ranking = pd.DataFrame(data=ranking, columns=['State', 'Value-at-Risk (VaR)', 'Number of events'])
    ranking['Value-at-Risk (VaR)'] = ranking['Value-at-Risk (VaR)'].astype('float')
    ranking['Number of events'] = ranking['Number of events'].astype('float')
    ranking.sort_values(by='Value-at-Risk (VaR)', inplace=True, ascending=False)
    ranking.reset_index(inplace=True)
    ranking.drop(columns=['index'], inplace=True)
    
    return ranking

In [7]:
def AverageLoss_ranking(data, type_damage, type_disaster, min_date, max_date):
    import pandas as pd
    import numpy as np
    
    States = list(data.STATE.unique())
    data_ = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    
    if type_disaster == 'All':
        data_ =  data_
    else:
        data_ = data_.loc[data_.EVENT_TYPE==type_disaster]
    
    ranking = []
    
    for state in States:
        df = data_.loc[data_.STATE==state][type_damage]
        #if len(df) < 2:
         #   pass
        #else:
        avg_loss = df.mean()
        ranking.extend((state, avg_loss, len(df)))
    n = int(len(ranking)/3)
    ranking = np.array(ranking).reshape((n, 3))
    ranking = pd.DataFrame(data=ranking, columns=['State', 'Average Loss', 'Number of events'])
    ranking['Average Loss'] = ranking['Average Loss'].astype('float')
    ranking['Number of events'] = ranking['Number of events'].astype('float')
    ranking.sort_values(by='Average Loss', inplace=True, ascending=False)
    ranking.reset_index(inplace=True)
    ranking.drop(columns=['index'], inplace=True)
    
    return ranking
    

In [8]:
def TotalLoss_ranking(data, type_damage, type_disaster, min_date, max_date):
    import pandas as pd
    import numpy as np
    
    States = list(data.STATE.unique())
    data_ = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    
    if type_disaster == 'All':
        data_ =  data_
    else:
        data_ = data_.loc[data_.EVENT_TYPE==type_disaster]
        
    ranking = []
    
    for state in States:
        df = data_.loc[data_.STATE==state][type_damage]
        #if len(df) < 2:
         #   pass
        #else:
        loss = df.sum()
        ranking.extend((state, loss))
            
    n = int(len(ranking)/2)
    ranking = np.array(ranking).reshape((n, 2))
    ranking = pd.DataFrame(data=ranking, columns=['State', 'Total Loss'])
    ranking['Total Loss'] = ranking['Total Loss'].astype('float')
    ranking.sort_values(by='Total Loss', inplace=True, ascending=False)
    ranking.reset_index(inplace=True)
    ranking.drop(columns=['index'], inplace=True)
    
    return ranking
    
    

In [9]:
def rank_states(data, method, type_damage, type_disaster, min_date, max_date, alpha):
    if method == 'VaR':
        ranking = VaR_ranking(data, type_damage, type_disaster, min_date, max_date, alpha)
    elif method == 'Average Loss':
        ranking = AverageLoss_ranking(data, type_damage, type_disaster, min_date, max_date)
    elif method == 'Total Loss':
        ranking = TotalLoss_ranking(data, type_damage, type_disaster, min_date, max_date)
    else:
        print('This ranking method is not available.')
    
    return ranking

In [9]:
rank_states(STORMS, 'Total Loss', 'TOTAL_DAMAGES', 'All', '1996-01-01', '2000-01-01', alpha=None)

Unnamed: 0,State,Total Loss
0,NORTH CAROLINA,5.608862e+09
1,PUERTO RICO,2.107000e+09
2,TEXAS,2.045338e+09
3,FLORIDA,1.177366e+09
4,NEBRASKA,4.685286e+08
...,...,...
59,CONNECTICUT,0.000000e+00
60,MAINE,0.000000e+00
61,NEW HAMPSHIRE,0.000000e+00
62,AMERICAN SAMOA,0.000000e+00


In [339]:
ranking = rank_states(STORMS, 'Total Loss', 'TOTAL_DAMAGES', 'All', '1996-01-01', '2015-01-01', alpha=None)
ranking.iloc[:50]

Unnamed: 0,State,Total Loss
0,NEW JERSEY,26430200000.0
1,FLORIDA,22637060000.0
2,TEXAS,17856120000.0
3,MISSISSIPPI,11802900000.0
4,ALABAMA,9067434000.0
5,IOWA,8293053000.0
6,CALIFORNIA,7907358000.0
7,NORTH CAROLINA,7560717000.0
8,MISSOURI,6158765000.0
9,LOUISIANA,5040333000.0


In [10]:
AverageLoss_ranking(STORMS, 'TOTAL_DAMAGES', 'All', '1997-01-01', '1998-01-01')

Unnamed: 0,State,Average Loss,Number of events
0,OREGON,4.140000e+07,2.0
1,OKLAHOMA,8.500000e+06,1.0
2,NORTH DAKOTA,4.212000e+06,5.0
3,HAWAII,4.100000e+06,1.0
4,WISCONSIN,3.771611e+06,27.0
...,...,...,...
59,LAKE ONTARIO,,0.0
60,ATLANTIC NORTH,,0.0
61,LAKE ST CLAIR,,0.0
62,LAKE ERIE,,0.0


In [11]:
VaR_ranking(STORMS, 'TOTAL_DAMAGES', 'All', '1996-01-01', '1997-01-01', 0.95)

Unnamed: 0,State,Value-at-Risk (VaR),Number of events
0,NORTH CAROLINA,117700000.0,38.0
1,OREGON,67550000.0,3.0
2,TEXAS,19103000.0,227.0
3,WISCONSIN,18500000.0,41.0
4,SOUTH CAROLINA,17225000.0,6.0
...,...,...,...
59,LAKE ONTARIO,,0.0
60,ATLANTIC NORTH,,0.0
61,LAKE ST CLAIR,,0.0
62,LAKE ERIE,,0.0


In [40]:
trends = trend_ranking(STORMS, 'TOTAL_DAMAGES', 'All', '1996-01-01', '2010-01-01', 'M')
trends

Unnamed: 0,State,Trend,Sen Slope,Kendall Tau,P-value
0,TEXAS,increasing,4455.445545,0.128418,0.01268556
1,DELAWARE,increasing,1666.666667,0.290148,0.009520338
2,KENTUCKY,increasing,296.296296,0.249372,1.390909e-06
3,MICHIGAN,increasing,153.846154,0.286271,3.049455e-09
4,DISTRICT OF COLUMBIA,no trend,100.0,0.242424,0.1016059
5,SOUTH CAROLINA,increasing,0.0,0.244536,8.056583e-09
6,MARYLAND,increasing,0.0,0.250134,2.115418e-09
7,MONTANA,increasing,0.0,0.201808,1.452288e-06
8,OREGON,increasing,0.0,0.215826,2.514008e-08
9,OKLAHOMA,increasing,0.0,0.383655,1.332268e-15


# II. Test if there is a correlation between companies performance and geographical exposure to climate risks

In [4]:
#Financial fundamental data

fundamentals = pd.read_csv('/Users/allanbellahsene/Desktop/THESIS/DATA/WHARTON_DATABASE/Funda_data.csv')
fundamentals[['Date']] = fundamentals[['datadate']].applymap(str).applymap(lambda s: "{}/{}/{}".format(s[4:6],s[6:], s[0:4]))
fundamentals.Date = pd.to_datetime(fundamentals.Date)

In [5]:
fin_data = fundamentals
data = fin_data.loc[fin_data.naics.astype('str').str.startswith(sector)]
data = data[['gvkey', 'fqtr', 'tic', 'conm', 'atq', 'niq', 'loc', 'state', 'naics', 'Date']]
data.index = data.Date
data = data.loc[data['loc'] == 'USA'] #keep only companies with headquarters in the US
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
data['State'] = data['state'].map(abbrev_us_state)
data.State = data.State.str.upper()
data['ROA'] = data['niq'] / data['atq']
data = data.loc[abs(data.ROA)<2]
data = data[['gvkey', 'tic', 'conm', 'ROA','State']]

NameError: name 'sector' is not defined

In [None]:
data

In [None]:
data['ROA'] = data['niq'] / data['atq']
data = data.loc[abs(data.ROA)<2]
data = data[['gvkey', 'tic', 'conm', 'ROA','State']]
df = data.loc[(data.index>=start_date) & (data.index<=end_date)]
companies = list(df.conm.unique())
ROA=[]
for comp in companies:
    subdf = df.loc[df.conm==comp]
    state = subdf.State.iloc[0]
    ROA.extend((state, subdf.ROA.mean()))
n = len(ROA)
ROA = np.array(ROA).reshape((int(n/2), 2))
ROA = pd.DataFrame(ROA, columns=['State', 'Mean ROA'])
ROA['Mean ROA'] = ROA['Mean ROA'].astype('float')
ROA = ROA.groupby(['State']).mean()


    
    


In [51]:
def ProfitabilityPerState(fin_data, sector, start_date, end_date):
    data = fin_data.loc[fin_data.naics.astype('str').str.startswith(sector)]
    data = data[['gvkey', 'fqtr', 'tic', 'conm', 'atq', 'niq', 'loc', 'state', 'naics', 'Date']]
    data.index = data.Date
    data = data.loc[data['loc'] == 'USA'] #keep only companies with headquarters in the US
    
    us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
    }
    abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
    data['State'] = data['state'].map(abbrev_us_state)
    data.State = data.State.str.upper()
    data['ROA'] = data['niq'] / data['atq']
    data = data.loc[abs(data.ROA)<2]
    data = data[['gvkey', 'tic', 'conm', 'ROA','State']]
    df = data.loc[(data.index>=start_date) & (data.index<=end_date)]
    companies = list(df.conm.unique())
    ROA=[]
    for comp in companies:
        subdf = df.loc[df.conm==comp]
        state = subdf.State.iloc[0]
        ROA.extend((state, subdf.ROA.mean()))
    n = len(ROA)
    ROA = np.array(ROA).reshape((int(n/2), 2))
    ROA = pd.DataFrame(ROA, columns=['State', 'Mean ROA'])
    ROA['Mean ROA'] = ROA['Mean ROA'].astype('float')
    ROA = ROA.groupby(['State']).mean()
    ROA = ROA.sort_values(by='Mean ROA')
    
    return ROA

In [57]:
ROA1 = ProfitabilityPerState(fin_data=fundamentals, sector='11', start_date='1996-01-01', end_date='1997-01-01')

In [56]:
ROA2 = ProfitabilityPerState(fin_data=fundamentals, sector='11', start_date='1997-01-01', end_date='1998-01-01')

In [106]:
ranking = rank_states(data=STORMS, method='Total Loss', type_damage='TOTAL_DAMAGES', type_disaster='All', min_date='1996-01-01', max_date='2020-01-01', alpha=0.99)

In [107]:
prof = ProfitabilityPerState(fin_data=fundamentals, sector='11', start_date='1996-01-01', end_date='2020-01-01')

In [113]:
states = list(prof.index)
ranking = ranking.loc[ranking.State.isin(states)]
best_states = ranking.State.iloc[-5:]
worst_states= ranking.State.iloc[:5]
best_ROA = prof.loc[prof.index.isin(best_states)]['Mean ROA'].mean()
worst_ROA = prof.loc[prof.index.isin(worst_states)]['Mean ROA'].mean()

In [114]:
best_ROA

-0.013297271103049235

In [115]:
worst_ROA

-0.19254415612883155

In [105]:
rank_states(data=STORMS, method='Total Loss', type_damage='TOTAL_DAMAGES', type_disaster='All', min_date='1996-01-01', max_date='2020-01-01', alpha=0.99)

Unnamed: 0,State,Total Loss
0,TEXAS,7.373794e+10
1,FLORIDA,3.457306e+10
2,CALIFORNIA,2.688310e+10
3,NEW JERSEY,2.649531e+10
4,PUERTO RICO,2.145991e+10
...,...,...
59,E PACIFIC,1.890100e+05
60,LAKE ST CLAIR,1.000000e+05
61,LAKE ERIE,3.000000e+04
62,LAKE ONTARIO,2.500000e+04


In [14]:
def ClimateRisk_Perf_Corr(fin_data, clim_data, sector, type_disaster, min_date, max_date):
    from sklearn.preprocessing import MinMaxScaler

    
    data = fin_data.loc[fin_data.naics.astype('str').str.startswith(sector)]
    data = data[['gvkey', 'fqtr', 'tic', 'conm', 'atq', 'niq', 'loc', 'state', 'naics', 'Date']]
    data.index = data.Date
    data = data.loc[data['loc'] == 'USA'] #keep only utility companies with headquarters in the US
    comp_names = list(data.conm.unique()) 
    comp_gvkey = list(data.gvkey.unique())
    data_states = list(data.state.unique())
    
    us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
    }
    abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
    data['State'] = data['state'].map(abbrev_us_state)
    data.State = data.State.str.upper()
    data['ROA'] = data['niq'] / data['atq']
    data = data.loc[abs(data.ROA)<2]
    data = data[['gvkey', 'tic', 'conm', 'ROA','State']]
    ranking = TotalLoss_ranking(clim_data, 'TOTAL_DAMAGES', type_disaster, min_date, max_date)
    df = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    ranking = ranking.loc[ranking.State.isin(list(df.State.unique()))]
    ROA = []
    companies = list(df.conm.unique())
    l = len(companies)
    for comp in companies:
        subdf = df.loc[df.conm==comp]
        mean_roa = subdf.ROA.mean()
        state = subdf.State.iloc[0]
        ROA.extend((comp, mean_roa, state))
    n = len(ROA)
    ROA = np.array(ROA).reshape((int(n/3), 3))
    ROA = pd.DataFrame(ROA, columns=['Company', 'Mean ROA', 'Location'])
    ROA['Mean ROA'] = ROA['Mean ROA'].astype('float')
    ROA = ROA.groupby(['Location']).mean()
    ranking.index = ranking.State
    scaler = MinMaxScaler()
    r = np.array(scaler.fit_transform(ranking['Total Loss'].values.reshape(-1,1)))
    ranking['Climate Risk'] = r
    y = pd.merge(ranking['Climate Risk'], ROA, right_index=True, left_index=True)
    corr = y['Climate Risk'].corr(y['Mean ROA'])
    #y.plot.scatter(x='Climate Risk', y='Mean ROA')
    q1 = y.iloc[:5]['Mean ROA'].mean()
    q2 = y.iloc[-5:]['Mean ROA'].mean()
    
    n = len(list(df.State.unique()))
    
    return corr, n, l, q1, q2

In [15]:
corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector='11', type_disaster='All', min_date='1996-01-01', max_date='2020-01-01')
print(' Correlation: ' + str(corr), 'Average ROA of 5 best states: ' + str(q2), ' Average ROA of 5 worst states: ' + str(q1))

 Correlation: -0.6456156844786822 Average ROA of 5 best states: -0.013297271103049235  Average ROA of 5 worst states: -0.19254415612883155


In [16]:
import datetime
ROA = []
for dates in list(np.arange(1996, 2019)):
    start_date = str(dates) + '-01-01'
    end_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=365)
    end_date = str(end_date)[0:10]
    corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector='11', type_disaster='Drought', min_date=start_date, max_date=end_date)
    diff = q2-q1
    ROA.extend((start_date, end_date, q2, q1, diff))
ROA = np.array(ROA).reshape((int(len(ROA)/5), 5))
ROA = pd.DataFrame(ROA, columns=['Start date', 'End date', 'Mean ROA of best states', 'Mean ROA of worst states', 'Difference'])
for columns in ['Mean ROA of best states', 'Mean ROA of worst states', 'Difference']:
    ROA[columns] = ROA[columns].astype('float')
ROA



Unnamed: 0,Start date,End date,Mean ROA of best states,Mean ROA of worst states,Difference
0,1996-01-01,1996-12-31,0.01507,-0.067032,0.082102
1,1997-01-01,1998-01-01,0.01001,-0.020855,0.030866
2,1998-01-01,1999-01-01,0.004232,-0.019037,0.023269
3,1999-01-01,2000-01-01,0.000746,-0.012918,0.013664
4,2000-01-01,2000-12-31,-0.034254,-0.01382,-0.020434
5,2001-01-01,2002-01-01,-0.008879,-2.3e-05,-0.008856
6,2002-01-01,2003-01-01,-0.017845,-0.006091,-0.011755
7,2003-01-01,2004-01-01,0.003498,-0.00715,0.010648
8,2004-01-01,2004-12-31,0.013817,-0.015591,0.029408
9,2005-01-01,2006-01-01,0.020136,-0.022817,0.042953


In [17]:
ROA.Difference.mean()

0.05598196646069766

In [18]:
len(ROA.loc[ROA.Difference>0])/len(ROA)

0.7391304347826086

In [87]:
import datetime
ROA = []
for dates in list(np.arange(1996, 2019)):
    start_date = str(dates) + '-01-01'
    end_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=365)
    end_date = str(end_date)[0:10]
    corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector='23', min_date=start_date, max_date=end_date)
    diff = q2-q1
    ROA.extend((start_date, end_date, q2, q1, diff))
ROA = np.array(ROA).reshape((int(len(ROA)/5), 5))
ROA = pd.DataFrame(ROA, columns=['Start date', 'End date', 'Av. ROA of best states', 'Av. ROA of worst states', 'Difference'])
for columns in ['Av. ROA of best states', 'Av. ROA of worst states', 'Difference']:
    ROA[columns] = ROA[columns].astype('float')
ROA

Unnamed: 0,Start date,End date,Av. ROA of best states,Av. ROA of worst states,Difference
0,1996-01-01,1996-12-31,0.000945,0.03451429,-0.03357
1,1997-01-01,1998-01-01,0.005572,-0.2785623,0.284135
2,1998-01-01,1999-01-01,-0.052488,0.005155139,-0.057644
3,1999-01-01,2000-01-01,-0.029262,0.01191531,-0.041177
4,2000-01-01,2000-12-31,-0.003619,3.046398e-07,-0.00362
5,2001-01-01,2002-01-01,0.009346,-0.02130128,0.030647
6,2002-01-01,2003-01-01,0.009294,-0.02809638,0.037391
7,2003-01-01,2004-01-01,0.00234,0.00983893,-0.007499
8,2004-01-01,2004-12-31,0.001913,0.02406064,-0.022148
9,2005-01-01,2006-01-01,-0.013327,0.001054318,-0.014381


In [88]:
ROA.Difference.mean()

0.018675601318353264

In [89]:
len(ROA.loc[ROA.Difference>0])/len(ROA)

0.4782608695652174

In [90]:
import datetime
ROA = []
for dates in list(np.arange(1996, 2019)):
    start_date = str(dates) + '-01-01'
    end_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=365)
    end_date = str(end_date)[0:10]
    corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector='22', min_date=start_date, max_date=end_date)
    diff = q2-q1
    ROA.extend((start_date, end_date, q2, q1, diff))
ROA = np.array(ROA).reshape((int(len(ROA)/5), 5))
ROA = pd.DataFrame(ROA, columns=['Start date', 'End date', 'Av. ROA of best states', 'Av. ROA of worst states', 'Difference'])
for columns in ['Av. ROA of best states', 'Av. ROA of worst states', 'Difference']:
    ROA[columns] = ROA[columns].astype('float')
ROA

Unnamed: 0,Start date,End date,Av. ROA of best states,Av. ROA of worst states,Difference
0,1996-01-01,1996-12-31,-0.01034,0.011136,-0.021475
1,1997-01-01,1998-01-01,0.006571,0.00822,-0.001649
2,1998-01-01,1999-01-01,0.006476,0.010072,-0.003596
3,1999-01-01,2000-01-01,0.005613,0.002764,0.002849
4,2000-01-01,2000-12-31,-0.00493,0.006542,-0.011472
5,2001-01-01,2002-01-01,0.008305,0.005048,0.003257
6,2002-01-01,2003-01-01,0.004039,0.008816,-0.004777
7,2003-01-01,2004-01-01,0.009584,0.007537,0.002047
8,2004-01-01,2004-12-31,0.0049,-0.03702,0.04192
9,2005-01-01,2006-01-01,0.006724,-0.022445,0.029169


In [91]:
ROA.Difference.mean()

0.008512581121921088

In [92]:
len(ROA.loc[ROA.Difference>0])/len(ROA)

0.6086956521739131

In [93]:
import datetime
ROA = []
for dates in list(np.arange(1996, 2019)):
    start_date = str(dates) + '-01-01'
    end_date = datetime.datetime.strptime(start_date, '%Y-%m-%d') + datetime.timedelta(days=365)
    end_date = str(end_date)[0:10]
    corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector='524126', min_date=start_date, max_date=end_date)
    diff = q2-q1
    ROA.extend((start_date, end_date, q2, q1, diff))
ROA = np.array(ROA).reshape((int(len(ROA)/5), 5))
ROA = pd.DataFrame(ROA, columns=['Start date', 'End date', 'Av. ROA of best states', 'Av. ROA of worst states', 'Difference'])
for columns in ['Av. ROA of best states', 'Av. ROA of worst states', 'Difference']:
    ROA[columns] = ROA[columns].astype('float')
ROA

Unnamed: 0,Start date,End date,Av. ROA of best states,Av. ROA of worst states,Difference
0,1996-01-01,1996-12-31,0.01022,0.012499,-0.002278
1,1997-01-01,1998-01-01,0.010485,0.015124,-0.004639
2,1998-01-01,1999-01-01,-0.005082,0.006784,-0.011866
3,1999-01-01,2000-01-01,0.000526,0.003928,-0.003403
4,2000-01-01,2000-12-31,0.007687,0.009622,-0.001935
5,2001-01-01,2002-01-01,0.010969,-0.012124,0.023093
6,2002-01-01,2003-01-01,0.004886,-0.011362,0.016248
7,2003-01-01,2004-01-01,0.001671,0.006815,-0.005144
8,2004-01-01,2004-12-31,0.007596,0.008994,-0.001397
9,2005-01-01,2006-01-01,0.009802,0.013548,-0.003746


In [94]:
ROA.Difference.mean()

0.0016392108229687562

In [95]:
len(ROA.loc[ROA.Difference>0])/len(ROA)

0.391304347826087

In [27]:
for sectors in ['7211', '11', '21', '22', '23', '42', '44', '48', '52', '53', '72']:
    corr, n, l, q1, q2 = ClimateRisk_Perf_Corr(fin_data=fundamentals, clim_data=STORMS, sector=sectors, min_date='1996-01-01', max_date='2020-01-01')
    print('Sector: ' + sectors, ' Correlation: ' + str(corr), 'Average ROA of 5 best states: ' + str(q2), ' Average ROA of 5 worst states: ' + str(q1))

Sector: 7211  Correlation: -0.048759701776071404 Average ROA of 5 best states: -0.0013519383616641587  Average ROA of 5 worst states: 0.0065263580015035546
Sector: 11  Correlation: -0.6456156844786822 Average ROA of 5 best states: -0.013297271103049235  Average ROA of 5 worst states: -0.19254415612883155
Sector: 21  Correlation: -0.17922114628895502 Average ROA of 5 best states: -0.044721978210928784  Average ROA of 5 worst states: -0.21048612051475252
Sector: 22  Correlation: -0.1999176089953097 Average ROA of 5 best states: 0.006012840377371177  Average ROA of 5 worst states: -0.02601293034558213
Sector: 23  Correlation: -0.23091428216567145 Average ROA of 5 best states: -0.02424596901195452  Average ROA of 5 worst states: -0.04553922812682323
Sector: 42  Correlation: 0.035946394247447715 Average ROA of 5 best states: -0.11277885280036518  Average ROA of 5 worst states: -0.06444253056722678
Sector: 44  Correlation: 0.01775866856783145 Average ROA of 5 best states: -0.0125979528150772

In [28]:
ev=[]
for events in list(STORMS.EVENT_TYPE.unique()):
    df = STORMS.loc[(STORMS.EVENT_TYPE==events) & (STORMS.index>='1996-01-01')]
    j = len(STORMS.loc[STORMS.index>='1996-01-01'])
    n = len(df)/j * 100
    ev.extend((events, n))
l = len(list(STORMS.loc[STORMS.index>='1996-01-01'].EVENT_TYPE.unique()))
ev = np.array(ev).reshape((l, 2))
ev = pd.DataFrame(data=ev, columns=['Event', 'Event dist. (%)'])
ev['Event dist. (%)'] = ev['Event dist. (%)'].astype('float')
ev = ev.sort_values(by='Event dist. (%)', ascending=False)
ev.reset_index(inplace=True)
ev.drop(columns=['index'], inplace=True)
ev

Unnamed: 0,Event,Event dist. (%)
0,Thunderstorm Wind,46.631025
1,Flash Flood,10.640306
2,Hail,8.102831
3,Strong Wind,6.500947
4,Flood,6.413499
5,Tornado,4.938811
6,High Wind,3.866583
7,Lightning,2.897701
8,Winter Storm,1.92882
9,Drought,1.595425


# III. Find if there is a correlation between performance and climate risk

In [213]:
#fundamentals = pd.read_csv('/Users/allanbellahsene/Desktop/THESIS/DATA/WHARTON_DATABASE/Funda_data.csv')
#fundamentals[['Date']] = fundamentals[['datadate']].applymap(str).applymap(lambda s: "{}/{}/{}".format(s[4:6],s[6:], s[0:4]))
#fundamentals.Date = pd.to_datetime(fundamentals.Date)

In [6]:
data = pd.read_csv('/Users/allanbellahsene/Desktop/THESIS/DATA/WHARTON_DATABASE/fun_data2.csv')
data[['Date']] = data[['datadate']].applymap(str).applymap(lambda s: "{}/{}/{}".format(s[4:6],s[6:], s[0:4]))
data.Date = pd.to_datetime(data.Date)
data.index = data.Date
data['Market Cap'] = data['prccq'] * data['cshtrq']
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}
abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
data['State'] = data['state'].map(abbrev_us_state)
data.State = data.State.str.upper()
data['ROA'] = data['niq'] / data['atq']
#data = data.loc[abs(data.ROA)<2]
df = data[['gvkey', 'conm', 'ROA', 'prccq', 'Market Cap', 'naics', 'State']]
df = df.loc[abs(df.ROA)<5]
sectors = ['11', '22', '23']
fundamentals = df.loc[df['Market Cap']<1e9]
data = fundamentals.loc[(fundamentals.naics.astype('str').str.startswith(sectors[0]))]
for sector in sectors[1:]:
    df = fundamentals.loc[(fundamentals.naics.astype('str').str.startswith(sector))]
    data = pd.concat([data, df], axis= 0)

In [8]:
data

Unnamed: 0_level_0,gvkey,conm,ROA,prccq,Market Cap,naics,State
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1981-11-30,1266,ALICO INC,0.055831,44.5000,2.905850e+06,111310.0,FLORIDA
1982-08-31,1266,ALICO INC,0.017322,32.0000,1.660800e+06,111310.0,FLORIDA
1982-11-30,1266,ALICO INC,0.009589,36.0000,1.839600e+06,111310.0,FLORIDA
1983-02-28,1266,ALICO INC,0.062897,43.5000,2.705700e+06,111310.0,FLORIDA
1983-05-31,1266,ALICO INC,0.025911,41.0000,1.205400e+06,111310.0,FLORIDA
...,...,...,...,...,...,...,...
2018-12-31,279431,RENESOLA LTD,-0.011861,1.1900,3.691134e+06,237130.0,CONNECTICUT
2019-03-31,279431,RENESOLA LTD,-0.012869,1.6908,3.508074e+06,237130.0,CONNECTICUT
2019-06-30,279431,RENESOLA LTD,0.012612,1.2800,1.634921e+06,237130.0,CONNECTICUT
2019-09-30,279431,RENESOLA LTD,0.007093,1.4900,8.430845e+06,237130.0,CONNECTICUT


In [9]:
len(data.State.unique())

50

In [10]:
len(data.conm.unique())

952

In [11]:
df = data
l_comp = len(df.conm.unique())
States = list(df.State.unique())
dist=[]
for state in States:
    subdf = df.loc[df.State==state]
    l = len(subdf.conm.unique())
    dist.extend((state, l))
dist = np.array(dist).reshape(int(len(dist)/2), 2)
dist = pd.DataFrame(dist, columns=['State', 'Number of companies'])
dist['Number of companies'] = dist['Number of companies'].astype("float")
dist = dist.sort_values(by='Number of companies', ascending=False)


In [12]:
dist.reset_index(inplace=True)
dist.drop(columns=['index'], inplace=True)
dist 

Unnamed: 0,State,Number of companies
0,TEXAS,83.0
1,CALIFORNIA,80.0
2,FLORIDA,53.0
3,NEW YORK,48.0
4,NEW JERSEY,32.0
5,PENNSYLVANIA,25.0
6,ARIZONA,25.0
7,ILLINOIS,25.0
8,OHIO,22.0
9,MASSACHUSETTS,22.0


In [55]:
print(dist.to_latex(index=False))

\begin{tabular}{lr}
\toprule
                State &  Number of companies \\
\midrule
                TEXAS &                 83.0 \\
           CALIFORNIA &                 80.0 \\
              FLORIDA &                 53.0 \\
             NEW YORK &                 48.0 \\
           NEW JERSEY &                 32.0 \\
         PENNSYLVANIA &                 25.0 \\
              ARIZONA &                 25.0 \\
             ILLINOIS &                 25.0 \\
                 OHIO &                 22.0 \\
        MASSACHUSETTS &                 22.0 \\
             COLORADO &                 19.0 \\
          CONNECTICUT &                 16.0 \\
              INDIANA &                 16.0 \\
             MISSOURI &                 15.0 \\
            MINNESOTA &                 15.0 \\
             MICHIGAN &                 14.0 \\
            WISCONSIN &                 14.0 \\
       NORTH CAROLINA &                 14.0 \\
                 IOWA &                 13.0 \\
  

In [316]:
def rankStatesPerROA(fin_data, clim_data, min_date, max_date):
    df = data.loc[(data.index>=min_date) & (data.index<=max_date)]
    states = list(df.State.unique())
    ROA_per_state = []
    for state in States:
        subdf = df.loc[df.State==state]
        companies = list(subdf.conm.unique())
        l = len(companies)
        ROA_per_comp = []
        for comp in companies:
            subsubdf = subdf.loc[subdf.conm==comp]
            if len(subsubdf)>2:
                mean_ROA = subsubdf.ROA.mean()
                ROA_per_comp.append(mean_ROA)
            else: 
                pass
        avg_ROA = np.mean(ROA_per_comp)
        ROA_per_state.extend((state, avg_ROA, l))

    n = len(ROA_per_state)
    ROA = np.array(ROA_per_state).reshape((int(n/3), 3))
    ROA = pd.DataFrame(ROA, columns=['State', 'Mean ROA', 'Number of companies'])
    ROA['Mean ROA'] = ROA['Mean ROA'].astype('float')
    ROA['Number of companies'] = ROA['Number of companies'].astype('float')
    ROA.dropna(inplace=True)
    ROA = ROA.sort_values(by='Mean ROA',ascending=False)
    ROA.reset_index(inplace=True)
    ROA.drop(columns=['index'], inplace=True)
    
    return ROA

In [325]:
ranking = rankStatesPerROA(fin_data=data, clim_data=STORMS, min_date='1960-01-01', max_date='2010-01-01')
ranking.loc[ranking['Number of companies']>10]

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,State,Mean ROA,Number of companies
2,TEXAS,0.011329,187.0
4,IOWA,0.01126,14.0
12,LOUISIANA,0.00664,11.0
13,TENNESSEE,0.005574,33.0
14,MICHIGAN,0.004919,28.0
16,SOUTH CAROLINA,0.00387,11.0
17,WISCONSIN,0.003696,16.0
18,INDIANA,0.000899,24.0
19,DELAWARE,1.3e-05,11.0
20,MISSOURI,-0.000525,27.0


In [324]:
r = seasonal_trend_ranking(data=STORMS, type_damage='TOTAL_DAMAGES', type_disaster='All', min_date='1960-01-01', max_date='2010-01-01')
r.sort_values(by='Kendall Tau', ascending=False)

Unnamed: 0,State,Trend,Sen Slope,Kendall Tau,P-value
2,OKLAHOMA,increasing,8212.5,0.416667,8.65974e-15
1,DELAWARE,increasing,12500.0,0.4,0.02560113
41,ARKANSAS,increasing,0.0,0.381868,1.554312e-15
54,MISSISSIPPI,increasing,0.0,0.368471,4.041212e-14
7,MASSACHUSETTS,increasing,0.0,0.346244,2.997602e-14
43,PENNSYLVANIA,increasing,0.0,0.34334,2.657874e-13
46,ILLINOIS,increasing,0.0,0.329535,4.217071e-12
4,MICHIGAN,increasing,2636.363636,0.325301,3.303509e-10
37,NEW YORK,increasing,0.0,0.323689,7.410117e-11
36,LOUISIANA,increasing,0.0,0.314647,3.304468e-11


In [269]:
ranking = rankStatesPerROA(fundamentals, STORMS, '11', '1996-01-01', '1997-01-01')

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [270]:
ranking

Unnamed: 0,State,Mean ROA,Number of companies
0,CONNECTICUT,0.070734,1.0
1,WASHINGTON,0.040376,2.0
2,IOWA,0.033413,1.0
3,NEW YORK,0.033015,5.0
4,PENNSYLVANIA,0.022981,1.0
5,MISSISSIPPI,0.020195,3.0
6,NEW JERSEY,0.014586,1.0
7,MINNESOTA,0.012486,1.0
8,HAWAII,0.011517,2.0
9,FLORIDA,0.009196,1.0


In [273]:
r = rank_states(data=STORMS, method='Total Loss', type_damage='TOTAL_DAMAGES', type_disaster='All', min_date='1960-01-01', max_date='1997-01-01', alpha=None)
r.loc[r.State.isin(list(ranking.State.unique()))]

Unnamed: 0,State,Total Loss
0,NORTH CAROLINA,920251000.0
2,WISCONSIN,189462700.0
6,IOWA,101380000.0
10,ILLINOIS,24338500.0
12,CALIFORNIA,21234000.0
17,HAWAII,16540000.0
18,NEW JERSEY,13210000.0
22,INDIANA,9805000.0
24,WASHINGTON,7000000.0
26,MINNESOTA,3880000.0


# IV. Portfolio Strategy with climate risk

In [42]:
data_stocks = pd.read_csv('/Users/allanbellahsene/Desktop/THESIS/DATA/WHARTON_DATABASE/Monthly_stocks.csv')

In [43]:
stocks = data_stocks.loc[data_stocks['loc'] == 'USA']
stocks

Unnamed: 0,gvkey,iid,datadate,tic,conm,prccm,trt1m,gsubind,loc,naics,state
0,1000,01,19700930,AE.2,A & E PLASTIK PAK INC,11.75,,,USA,,
1,1000,01,19701031,AE.2,A & E PLASTIK PAK INC,12.50,6.3830,,USA,,
2,1000,01,19701130,AE.2,A & E PLASTIK PAK INC,9.00,-28.0000,,USA,,
3,1000,01,19701231,AE.2,A & E PLASTIK PAK INC,10.00,11.1111,,USA,,
4,1000,01,19710131,AE.2,A & E PLASTIK PAK INC,9.00,-10.0000,,USA,,
...,...,...,...,...,...,...,...,...,...,...,...
6961864,332115,01,20190930,ARMP,ARMATA PHARMACEUTICALS INC,3.70,-16.8932,35201010.0,USA,325414.0,CA
6961865,332115,01,20191031,ARMP,ARMATA PHARMACEUTICALS INC,4.15,12.1622,35201010.0,USA,325414.0,CA
6961866,332115,01,20191130,ARMP,ARMATA PHARMACEUTICALS INC,4.38,5.5422,35201010.0,USA,325414.0,CA
6961867,332115,01,20191231,ARMP,ARMATA PHARMACEUTICALS INC,3.25,-25.7991,35201010.0,USA,325414.0,CA


In [44]:
stocks[['Date']] = stocks[['datadate']].applymap(str).applymap(lambda s: "{}/{}/{}".format(s[4:6],s[6:], s[0:4]))
stocks.index = pd.to_datetime(stocks.Date)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


In [45]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
stocks_data = stocks
stocks_data['State'] = stocks_data['state'].map(abbrev_us_state)
stocks_data.State = stocks_data.State.str.upper()
stocks_data = stocks_data[['conm', 'gvkey', 'naics', 'prccm','trt1m', 'State']]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[name] = value


In [46]:
data.head()

Unnamed: 0_level_0,gvkey,conm,ROA,prccq,Market Cap,naics,State
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1981-11-30,1266,ALICO INC,0.055831,44.5,2905850.0,111310.0,FLORIDA
1982-08-31,1266,ALICO INC,0.017322,32.0,1660800.0,111310.0,FLORIDA
1982-11-30,1266,ALICO INC,0.009589,36.0,1839600.0,111310.0,FLORIDA
1983-02-28,1266,ALICO INC,0.062897,43.5,2705700.0,111310.0,FLORIDA
1983-05-31,1266,ALICO INC,0.025911,41.0,1205400.0,111310.0,FLORIDA


In [47]:
gvkeys = list(data.gvkey.unique())
stocks_data = stocks_data.loc[stocks_data.gvkey.isin(gvkeys)]

In [48]:
stocks_data

Unnamed: 0_level_0,conm,gvkey,naics,prccm,trt1m,State
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1971-01-31,A P S INC,1059,236210.0,17.1250,,TEXAS
1971-02-28,A P S INC,1059,236210.0,17.0000,-0.7299,TEXAS
1971-03-31,A P S INC,1059,236210.0,19.0000,11.7647,TEXAS
1971-04-30,A P S INC,1059,236210.0,22.0000,15.7895,TEXAS
1971-05-31,A P S INC,1059,236210.0,20.3750,-7.3864,TEXAS
...,...,...,...,...,...,...
2019-09-30,RENESOLA LTD,279431,237130.0,1.4900,54.7409,CONNECTICUT
2019-10-31,RENESOLA LTD,279431,237130.0,1.3149,-11.7517,CONNECTICUT
2019-11-30,RENESOLA LTD,279431,237130.0,1.4527,10.4799,CONNECTICUT
2019-12-31,RENESOLA LTD,279431,237130.0,1.4150,-2.5952,CONNECTICUT


In [49]:
stocks = []
for comp in list(stocks_data.conm.unique()):
    subdf = stocks_data.loc[stocks_data.conm==comp]
    subdf['Monthly Return'] = subdf.prccm.pct_change()
    subdf = subdf.loc[abs(subdf['Monthly Return']) < 10]
    stocks.append(subdf)
    
        

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [50]:
df = pd.concat([stocks[0], stocks[1]], axis=0)
for i in range(2, len(stocks)):
    df = pd.concat([df, stocks[i]], axis=0)
df

Unnamed: 0_level_0,conm,gvkey,naics,prccm,trt1m,State,Monthly Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1971-02-28,A P S INC,1059,236210.0,17.0000,-0.7299,TEXAS,-0.007299
1971-03-31,A P S INC,1059,236210.0,19.0000,11.7647,TEXAS,0.117647
1971-04-30,A P S INC,1059,236210.0,22.0000,15.7895,TEXAS,0.157895
1971-05-31,A P S INC,1059,236210.0,20.3750,-7.3864,TEXAS,-0.073864
1971-06-30,A P S INC,1059,236210.0,10.8750,6.7485,TEXAS,-0.466258
...,...,...,...,...,...,...,...
2019-09-30,RENESOLA LTD,279431,237130.0,1.4900,54.7409,CONNECTICUT,0.547409
2019-10-31,RENESOLA LTD,279431,237130.0,1.3149,-11.7517,CONNECTICUT,-0.117517
2019-11-30,RENESOLA LTD,279431,237130.0,1.4527,10.4799,CONNECTICUT,0.104799
2019-12-31,RENESOLA LTD,279431,237130.0,1.4150,-2.5952,CONNECTICUT,-0.025952


In [51]:
len(df.conm.unique())

709

In [52]:
min(df.index)

Timestamp('1962-02-28 00:00:00')

In [53]:
max(df.index)

Timestamp('2020-01-31 00:00:00')

In [54]:
len(data.conm.unique())

952

In [79]:
STORMS.groupby(['EPISODE_ID']).agg({'TOTAL_DAMAGES': sum}).nlargest(10, 'TOTAL_DAMAGES')

Unnamed: 0_level_0,TOTAL_DAMAGES
EPISODE_ID,Unnamed: 1_level_1
119753.0,38257530000.0
68471.0,24959090000.0
120357.0,18263750000.0
131864.0,17000000000.0
1198432.0,7390300000.0
109514.0,6983176000.0
130185.0,5700849000.0
1178038.0,5667000000.0
119746.0,5290005000.0
1181034.0,4923200000.0


In [82]:
STORMS.loc[STORMS.EPISODE_ID==119753].head()

Unnamed: 0,Date,STATE,DAMAGE_PROPERTY,DAMAGE_CROPS,TOTAL_DAMAGES,DEATHS_DIRECT,DEATHS_INDIRECT,EVENT_TYPE,EVENT_ID,EPISODE_ID
2017-08-25,2017-08-25,TEXAS,500000.0,0.0,500000.0,0,0,Tornado,720473,119753.0
2017-08-25,2017-08-25,TEXAS,100000.0,0.0,100000.0,0,0,Flash Flood,721321,119753.0
2017-08-25,2017-08-25,TEXAS,50000.0,0.0,50000.0,0,0,Tornado,720470,119753.0
2017-08-25,2017-08-25,TEXAS,500000.0,0.0,500000.0,0,0,Tornado,720461,119753.0
2017-08-25,2017-08-25,TEXAS,30000.0,0.0,30000.0,0,0,Tornado,720460,119753.0


Compare the performance of companies in Texas before and after 25/08/2017.

In [209]:
sectors = ['11']
df = stocks_data.loc[stocks_data.naics.astype('str').str.startswith(sectors[0])]
for sector in sectors[1:]:
    data = stocks_data.loc[stocks_data.naics.astype('str').str.startswith(sector)]
    df = pd.concat([df, data], axis=0)
df = df.loc[(df.State=='TEXAS')]

In [210]:
df = df[['conm', 'naics', 'prccm', 'trt1m']].loc[df.index.isin(['2016-09-30', '2017-07-31', '2017-09-30'])]

In [211]:
df

Unnamed: 0_level_0,conm,naics,prccm,trt1m
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-09-30,NATURALSHRIMP INC,112512.0,0.4685,-24.4355
2017-07-31,NATURALSHRIMP INC,112512.0,0.331,-6.7606
2017-09-30,NATURALSHRIMP INC,112512.0,0.145,-23.6842


In [212]:
fundamentals

Unnamed: 0,gvkey,datadate,fyearq,fqtr,indfmt,consol,popsrc,datafmt,tic,conm,...,datacqtr,datafqtr,atq,niq,costat,gsubind,loc,naics,state,Date
0,1000,19660331,1966,1,INDL,C,D,STD,AE.2,A & E PLASTIK PAK INC,...,1966Q1,1966Q1,,,I,,USA,,,1966-03-31
1,1000,19660630,1966,2,INDL,C,D,STD,AE.2,A & E PLASTIK PAK INC,...,1966Q2,1966Q2,,,I,,USA,,,1966-06-30
2,1000,19660930,1966,3,INDL,C,D,STD,AE.2,A & E PLASTIK PAK INC,...,1966Q3,1966Q3,,,I,,USA,,,1966-09-30
3,1000,19661231,1966,4,INDL,C,D,STD,AE.2,A & E PLASTIK PAK INC,...,1966Q4,1966Q4,,0.164,I,,USA,,,1966-12-31
4,1000,19670331,1967,1,INDL,C,D,STD,AE.2,A & E PLASTIK PAK INC,...,1967Q1,1967Q1,,,I,,USA,,,1967-03-31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1796721,331856,20191231,2019,4,INDL,C,D,STD,IMUX,IMMUNIC INC,...,2019Q4,2019Q4,65.955,-7.691,A,35201010.0,USA,325414.0,CA,2019-12-31
1796722,332115,20190331,2019,1,INDL,C,D,STD,ARMP,ARMATA PHARMACEUTICALS INC,...,2019Q1,2019Q1,,-3.739,A,35201010.0,USA,325414.0,CA,2019-03-31
1796723,332115,20190630,2019,2,INDL,C,D,STD,ARMP,ARMATA PHARMACEUTICALS INC,...,2019Q2,2019Q2,34.916,-4.199,A,35201010.0,USA,325414.0,CA,2019-06-30
1796724,332115,20190930,2019,3,INDL,C,D,STD,ARMP,ARMATA PHARMACEUTICALS INC,...,2019Q3,2019Q3,29.304,-6.955,A,35201010.0,USA,325414.0,CA,2019-09-30


In [None]:
prices = np.array(prices).reshape((int(len(prices)/5), 5))
prices = pd.DataFrame(prices, columns=['Company', 'NAICS code', 'Price on ' + start_date, 'Price on ' + mid_date, 'Price on ' + end_date])

In [None]:
prices

In [None]:
prices.loc[prices['Price on 2017-09-30'] < prices['Price on 2017-07-31']]

In [None]:
prices.loc[prices['Price on 2017-09-30'] < prices['Price on 2016-09-30']]

In [59]:
def convert_stock_data(fin_data, sectors, start_date, end_date):
    
    if len(sectors) == 1:
        data = fin_data.loc[fin_data.naics.astype('str').str.startswith(sectors)]
    else:
        data = fundamentals.loc[(fundamentals.naics.astype('str').str.startswith(sectors[0]))]
        for sector in sectors[1:]:
            df = fundamentals.loc[(fundamentals.naics.astype('str').str.startswith(sector))]
            data = pd.concat([data, df], axis= 0)
        
    data = data[['gvkey', 'fqtr', 'tic', 'conm', 'atq', 'niq', 'loc', 'state', 'naics', 'Date']]
    data.index = data.Date
    data = data.loc[data['loc'] == 'USA'] #keep only companies with headquarters in the US
    comp_names = list(data.conm.unique()) 
    comp_gvkey = list(data.gvkey.unique())
    data_states = list(data.state.unique())
    data_stocks = pd.read_csv('/Users/allanbellahsene/Desktop/THESIS/DATA/WHARTON_DATABASE/Monthly_stocks.csv')
    stocks = []
    for keys in comp_gvkey:
        stock = data_stocks.loc[data_stocks.gvkey==keys]
        stocks.append(stock)

    stocks_data = pd.concat([stocks[0], stocks[1]], axis=0)
    for i in range(2, len(stocks)):
        stocks_data = pd.concat([stocks_data, stocks[i]], axis=0)
    stocks_data[['Date']] = stocks_data[['datadate']].applymap(str).applymap(lambda s: "{}/{}/{}".format(s[4:6],s[6:], s[0:4]))
    stocks_data.index = pd.to_datetime(stocks_data.Date)
    
    us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
    }

    abbrev_us_state = dict(map(reversed, us_state_abbrev.items()))
    stocks_data['State'] = stocks_data['state'].map(abbrev_us_state)
    stocks_data.State = stocks_data.State.str.upper()
    stocks_data = stocks_data[['prccm', 'conm', 'trt1m', 'State']]
    stock_data = stocks_data.loc[(stocks_data.index>=start_date) & (stocks_data.index<=end_date)]
    
    return stock_data
    

In [60]:
def rankStockReturns(fin_data, sectors, start_date, end_date):
    stock_data = convert_stock_data(fin_data, sectors, start_date, end_date)
    States = list(stock_data.State.unique())
    rtn_per_state = []
    for state in States:
        df = stock_data.loc[stock_data.State==state]
        companies = list(df.conm.unique())
        n = len(companies)
        rtn_per_comp = []
        for comp in companies:
            subdf = df.loc[df.conm==comp]
            l = len(subdf)
            threshold = 0.1*l
            if len(subdf) - subdf.prccm.count() < threshold:
                log_r = np.log(subdf.prccm.shift(-1) / subdf.prccm).mean()
                rtn_per_comp.append(log_r)
            else:
                pass
        rtn_per_comp = [r for r in rtn_per_comp if abs(r) < 1000]
        r_per_state = np.mean(rtn_per_comp)
        rtn_per_state.extend((state, r_per_state, n))
    rtn_per_state = np.array(rtn_per_state).reshape((int(len(rtn_per_state)/3), 3))
    rtn_per_state = pd.DataFrame(rtn_per_state, columns=['State', 'Average Monthly Stock Return', 'Number of companies'])
    rtn_per_state['Average Monthly Stock Return'] = rtn_per_state['Average Monthly Stock Return'].astype('float')
    rtn_per_state['Number of companies'] = rtn_per_state['Number of companies'].astype('float')
    #rtn_per_state = rtn_per_state.loc[rtn_per_state['Number of companies'] >= 5]
    rtn_per_state = rtn_per_state.sort_values(by='Average Monthly Stock Return', ascending=False)
    rtn_per_state.reset_index(inplace=True)
    rtn_per_state.drop(columns=['index'], inplace=True)
    
    
    return rtn_per_state    

In [62]:
%%time 
rankStockReturns(fin_data=fundamentals, sectors=['11', '22', '23'], start_date='1996-01-01', end_date='2020-01-01')

  result = getattr(ufunc, method)(*inputs, **kwargs)


CPU times: user 49.4 s, sys: 7.3 s, total: 56.7 s
Wall time: 57.4 s


Unnamed: 0,State,Average Monthly Stock Return,Number of companies
0,NEW MEXICO,0.036181,3.0
1,RHODE ISLAND,0.015519,2.0
2,MAINE,0.010289,4.0
3,WASHINGTON,0.00876,9.0
4,LOUISIANA,0.008562,5.0
5,NEW HAMPSHIRE,0.006354,5.0
6,IOWA,0.005197,7.0
7,OHIO,0.005134,16.0
8,OREGON,0.004158,6.0
9,IDAHO,0.002683,3.0


In [64]:
rankStockReturns(fin_data=fundamentals, sectors=['11', '22', '23'], start_date='1996-01-01', end_date='1997-01-01')

Unnamed: 0,State,Average Monthly Stock Return,Number of companies
0,NEW MEXICO,0.072048,2.0
1,ALABAMA,0.026153,1.0
2,PUERTO RICO,0.026153,1.0
3,FLORIDA,0.023406,24.0
4,MISSISSIPPI,0.017626,3.0
5,OREGON,0.016774,4.0
6,TENNESSEE,0.016454,2.0
7,WASHINGTON,0.013541,8.0
8,IOWA,0.012689,6.0
9,SOUTH DAKOTA,0.011627,2.0


In [65]:
rank_states(data=STORMS, method='Total Loss', type_damage='TOTAL_DAMAGES', type_disaster='All', min_date='1996-01-01', max_date='1997-01-01', alpha=None)

Unnamed: 0,State,Total Loss
0,NORTH CAROLINA,920251000.0
1,TEXAS,543311100.0
2,WISCONSIN,189462700.0
3,VIRGINIA,185654000.0
4,OREGON,120200000.0
...,...,...
59,NEVADA,0.0
60,MASSACHUSETTS,0.0
61,UTAH,0.0
62,GUAM,0.0


    First, sort US states by their climate risk trend. Then, go long in the stocks of companies located in the top 5 best states and short in the stocks of companies located in the top 5 worst states. Re-allocate yearly, after re-ranking the states. For this strategy to work, one has to find companies for which natural disasters have critical importance. Agricultural companies can be a good example.