In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dask.dataframe as ddf
from pandas import Series, DataFrame
%matplotlib inline
import seaborn as sn

In [2]:
import geopandas as gpd
import pandas as pd

## Load daily temperature 

In [3]:
years = [year for year in range(1960, 2020)]

In [4]:
data_temp = []
for year in years:
    temp_year = pd.read_csv('/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str})
    temp_year = temp_year.loc[:, ~temp_year.columns.str.contains('^Unnamed')]
    data_temp.append(temp_year)

In [5]:
temp = pd.concat(data_temp)
temp.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,1960-01-01,5.417989,2.635663,8.200316,2.279021,1960
1,1001,1960-01-02,7.005736,4.201073,9.810398,10.412733,1960
2,1001,1960-01-03,9.061477,5.235675,12.887278,16.011329,1960
3,1001,1960-01-04,5.334175,-0.74457,11.412921,0.216112,1960
4,1001,1960-01-05,6.495222,3.006032,9.984412,0.274311,1960


In [6]:
temp['year'].unique()

array([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970,
       1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981,
       1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992,
       1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
       2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
       2015, 2016, 2017, 2018, 2019])

In [7]:
import datetime

In [8]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [9]:
def getMonth(date):
    return(date.strftime('%b'))

In [10]:
temp['date'] = pd.to_datetime(temp['date'])
temp['month'] = temp['date'].apply(getMonth)

In [11]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}

In [12]:
temp['month'] = temp['month'].replace(month_replace)

In [13]:
import math

def water_vapor_pressure(temperature):
    e = 0.61094 * math.exp(17.625 * temperature / (temperature + 243.04))
    return e


def apparent_temperature(temperature, water_vapor_pressure):
    A = -1.3 + 0.92 * temperature + 2.2 * water_vapor_pressure
    return A


temperature = 28 
water_vapor_pressure = water_vapor_pressure(temperature) 
apparent_temp = apparent_temperature(temperature, water_vapor_pressure)
print(f"Apparent temperature at {temperature} degrees Celsius and {water_vapor_pressure:.2f} kPa water vapor pressure is {apparent_temp:.2f} °C")

Apparent temperature at 28 degrees Celsius and 3.77 kPa water vapor pressure is 32.76 °C


In [14]:
def findApparentTemp(temperature):
    e = 0.61094 * math.exp(17.625 * temperature / (temperature + 243.04))
    A = -1.3 + 0.92 * temperature + 2.2 * e
    return A

In [15]:
temp = temp.rename(columns = {'tMean': 'mean_temp', 'tMin': 'min_temp', 'tMax': 'max_temp'})

In [16]:
climate = temp[['year', 'month', 'date', 'fips', 'mean_temp', 'min_temp', 'max_temp']]
climate.head()

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp
0,1960,1,1960-01-01,1001,5.417989,2.635663,8.200316
1,1960,1,1960-01-02,1001,7.005736,4.201073,9.810398
2,1960,1,1960-01-03,1001,9.061477,5.235675,12.887278
3,1960,1,1960-01-04,1001,5.334175,-0.74457,11.412921
4,1960,1,1960-01-05,1001,6.495222,3.006032,9.984412


In [17]:
climate['AT_mean'] = climate['mean_temp'].apply(findApparentTemp)
climate['AT_min'] = climate['min_temp'].apply(findApparentTemp)
climate['AT_max'] = climate['max_temp'].apply(findApparentTemp)
climate.head()

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1960,1,1960-01-01,1001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515
1,1960,1,1960-01-02,1001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789
2,1960,1,1960-01-03,1001,9.061477,5.235675,12.887278,9.569066,5.465944,13.82114
3,1960,1,1960-01-04,1001,5.334175,-0.74457,11.412921,5.569946,-0.711797,12.162972
4,1960,1,1960-01-05,1001,6.495222,3.006032,9.984412,6.802082,3.13256,10.580093


In [18]:
arizona = climate[(climate['fips'] == '04013') & (climate['month'] == 7)]
arizona

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
27266,1960,7,1960-07-01,04013,32.574107,22.326286,42.821929,39.459619,25.161506,56.934893
27267,1960,7,1960-07-02,04013,34.630369,26.586453,42.674284,42.667713,30.801159,56.653746
27268,1960,7,1960-07-03,04013,33.936788,26.412528,41.461047,41.570807,30.563185,54.379815
27269,1960,7,1960-07-04,04013,31.660556,22.925237,40.395875,38.075547,25.931752,52.435182
27270,1960,7,1960-07-05,04013,30.997479,22.093775,39.901183,37.086057,24.864364,51.547937
...,...,...,...,...,...,...,...,...,...,...
27217,2019,7,2019-07-27,04013,35.028518,27.783607,42.273429,43.304431,32.458485,55.895320
27218,2019,7,2019-07-28,04013,35.377315,28.138999,42.615630,43.866529,32.957144,56.542327
27219,2019,7,2019-07-29,04013,35.150217,28.151650,42.148783,43.500093,32.974953,55.660931
27220,2019,7,2019-07-30,04013,33.723849,26.492046,40.955653,41.237115,30.671899,53.451236


### After data imputation, 97% of the counties have daily apparent tempeature data from 1960 to 2020

In [19]:
fips_numbers = []
for year in years:
    dict_year = {}
    df_year = climate[climate['year'] == year]
    df_new = df_year.dropna(subset = 'AT_mean')
    numbers = len(df_new['fips'].unique())
    dict_year[year] = numbers
    fips_numbers.append(dict_year)

In [20]:
fips_numbers

[{1960: 3105},
 {1961: 3105},
 {1962: 3105},
 {1963: 3105},
 {1964: 3105},
 {1965: 3105},
 {1966: 3105},
 {1967: 3105},
 {1968: 3105},
 {1969: 3105},
 {1970: 3105},
 {1971: 3105},
 {1972: 3105},
 {1973: 3105},
 {1974: 3105},
 {1975: 3105},
 {1976: 3105},
 {1977: 3105},
 {1978: 3105},
 {1979: 3105},
 {1980: 3105},
 {1981: 3105},
 {1982: 3105},
 {1983: 3105},
 {1984: 3105},
 {1985: 3105},
 {1986: 3105},
 {1987: 3105},
 {1988: 3105},
 {1989: 3105},
 {1990: 3105},
 {1991: 3105},
 {1992: 3105},
 {1993: 3105},
 {1994: 3105},
 {1995: 3105},
 {1996: 3105},
 {1997: 3105},
 {1998: 3105},
 {1999: 3105},
 {2000: 3105},
 {2001: 3105},
 {2002: 3105},
 {2003: 3105},
 {2004: 3105},
 {2005: 3105},
 {2006: 3105},
 {2007: 3105},
 {2008: 3105},
 {2009: 3105},
 {2010: 3105},
 {2011: 3105},
 {2012: 3105},
 {2013: 3105},
 {2014: 3105},
 {2015: 3105},
 {2016: 3105},
 {2017: 3105},
 {2018: 3105},
 {2019: 3105}]

In [21]:
climate['AT_mean'].describe()

count    6.804608e+07
mean     1.405631e+01
std      1.204229e+01
min     -3.634401e+01
25%      5.119291e+00
50%      1.478848e+01
75%      2.386305e+01
max      5.131303e+01
Name: AT_mean, dtype: float64

In [22]:
climate['year'].unique()

array([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970,
       1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981,
       1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992,
       1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
       2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
       2015, 2016, 2017, 2018, 2019])

In [23]:
climate = climate.sort_values(['fips', 'month', 'year'])
climate

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093
...,...,...,...,...,...,...,...,...,...,...
1133320,2019,12,2019-12-27,56045,-3.423038,-9.134636,2.288561,-3.404296,-9.028568,2.389735
1133321,2019,12,2019-12-28,56045,-3.415311,-7.042524,0.211902,-3.396585,-6.984792,0.259813
1133322,2019,12,2019-12-29,56045,-4.982109,-7.380541,-2.583677,-4.954086,-7.316181,-2.564803
1133323,2019,12,2019-12-30,56045,-5.757468,-10.321875,-1.193062,-5.720491,-10.181061,-1.165475


### find temperature data during 1981 to 2010

In [24]:
year_temp = [year for year in range(1981, 2011)]

In [25]:
temp = climate[climate['year'].isin(year_temp)]
temp

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1981,1,1981-01-01,01001,7.352787,-1.231505,15.937078,7.719810,-1.204307,17.338278
1,1981,1,1981-01-02,01001,6.394303,-1.310534,14.099139,6.694518,-1.284108,15.204017
2,1981,1,1981-01-03,01001,5.027573,-4.055402,14.110547,5.246482,-4.034347,15.217124
3,1981,1,1981-01-04,01001,5.861992,-2.187024,13.911007,6.128640,-2.166769,14.988108
4,1981,1,1981-01-05,01001,1.541635,-5.145511,8.228781,1.620299,-5.115832,8.664323
...,...,...,...,...,...,...,...,...,...,...
1133320,2010,12,2010-12-27,56045,0.174541,-5.311567,5.660649,0.221754,-5.280078,5.915240
1133321,2010,12,2010-12-28,56045,-3.112616,-9.886477,3.661245,-3.094259,-9.758986,3.814245
1133322,2010,12,2010-12-29,56045,-0.935533,-6.949906,5.078840,-0.905114,-6.893907,5.300514
1133323,2010,12,2010-12-30,56045,-7.037766,-17.145259,3.069727,-6.980124,-16.720898,3.198687


In [26]:
temp['mean_temp'].describe()

count    3.402148e+07
mean     1.258282e+01
std      1.059617e+01
min     -3.814622e+01
25%      5.043820e+00
50%      1.370560e+01
75%      2.127092e+01
max      3.976928e+01
Name: mean_temp, dtype: float64

In [27]:
fips_numbers = []
for year in year_temp:
    dict_year = {}
    df_year = temp[temp['year'] == year]
    numbers = len(df_year['fips'].unique())
    dict_year[year] = numbers
    fips_numbers.append(dict_year)

In [28]:
fips_numbers

[{1981: 3105},
 {1982: 3105},
 {1983: 3105},
 {1984: 3105},
 {1985: 3105},
 {1986: 3105},
 {1987: 3105},
 {1988: 3105},
 {1989: 3105},
 {1990: 3105},
 {1991: 3105},
 {1992: 3105},
 {1993: 3105},
 {1994: 3105},
 {1995: 3105},
 {1996: 3105},
 {1997: 3105},
 {1998: 3105},
 {1999: 3105},
 {2000: 3105},
 {2001: 3105},
 {2002: 3105},
 {2003: 3105},
 {2004: 3105},
 {2005: 3105},
 {2006: 3105},
 {2007: 3105},
 {2008: 3105},
 {2009: 3105},
 {2010: 3105}]

In [29]:
import datetime

temp['date'] = pd.to_datetime(temp['date'])
temp.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  temp['date'] = pd.to_datetime(temp['date'])


Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1981,1,1981-01-01,1001,7.352787,-1.231505,15.937078,7.71981,-1.204307,17.338278
1,1981,1,1981-01-02,1001,6.394303,-1.310534,14.099139,6.694518,-1.284108,15.204017
2,1981,1,1981-01-03,1001,5.027573,-4.055402,14.110547,5.246482,-4.034347,15.217124
3,1981,1,1981-01-04,1001,5.861992,-2.187024,13.911007,6.12864,-2.166769,14.988108
4,1981,1,1981-01-05,1001,1.541635,-5.145511,8.228781,1.620299,-5.115832,8.664323


In [30]:
temp_valid = temp[(temp['month'] == 7) | (temp['month'] == 8)]
temp_valid

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
181,1981,7,1981-07-01,01001,25.258767,19.771108,30.746426,29.001928,21.950693,36.714649
182,1981,7,1981-07-02,01001,22.747374,19.327411,26.167336,25.702292,21.404888,30.228882
183,1981,7,1981-07-03,01001,23.346042,19.872791,26.819293,26.477125,22.076246,31.120841
184,1981,7,1981-07-04,01001,24.560164,19.250909,29.869419,28.070840,21.311116,35.430792
185,1981,7,1981-07-05,01001,25.964896,21.254444,30.675348,29.953901,23.800192,36.609815
...,...,...,...,...,...,...,...,...,...,...
1133198,2010,8,2010-08-27,56045,24.817030,14.288773,35.345287,28.411967,15.422117,43.814747
1133199,2010,8,2010-08-28,56045,23.812539,14.602777,33.022301,27.085878,15.784301,40.147719
1133200,2010,8,2010-08-29,56045,22.271928,13.423014,31.120841,25.091945,14.430186,37.269204
1133201,2010,8,2010-08-30,56045,21.234611,12.363376,30.105845,23.775204,13.228960,35.774844


In [31]:
temp_valid['mean_temp'].isna().sum()

0

In [32]:
temp_valid['max_temp'].isna().sum()

0

In [33]:
temp_valid['min_temp'].isna().sum()

0

In [34]:
min_AT_p85 = temp_valid.groupby(['fips'])['AT_min'].apply(lambda x: np.percentile(x.dropna(), 85)).reset_index()
min_AT_p85.columns = ['fips', 'AT_min_p85']
min_AT_p85

Unnamed: 0,fips,AT_min_p85
0,01001,25.319677
1,01003,26.775692
2,01005,24.736532
3,01007,24.910650
4,01009,24.220104
...,...,...
3100,56037,12.578402
3101,56039,7.031365
3102,56041,11.381369
3103,56043,15.771672


In [34]:
mean_temp_p85 = temp_valid.groupby(['fips'])['mean_temp'].apply(lambda x: np.percentile(x.dropna(), 85)).reset_index()
mean_temp_p85.columns = ['fips', 'mean_temp_p85']
mean_temp_p85

Unnamed: 0,fips,mean_temp_p85
0,01001,28.736536
1,01003,28.879616
2,01005,28.348567
3,01007,28.668666
4,01009,27.786085
...,...,...
3100,56037,20.731717
3101,56039,15.527622
3102,56041,19.461227
3103,56043,23.708815


In [35]:
mean_AT_p85 = temp_valid.groupby(['fips'])['AT_mean'].apply(lambda x: np.percentile(x.dropna(), 85)).reset_index()
mean_AT_p85.columns = ['fips', 'AT_mean_p85']
mean_AT_p85

Unnamed: 0,fips,AT_mean_p85
0,01001,33.802649
1,01003,34.006444
2,01005,33.252659
3,01007,33.706161
4,01009,32.461951
...,...,...
3100,56037,23.143976
3101,56039,16.858785
3102,56041,21.569150
3103,56043,26.950141


In [36]:
temp_p85 = mean_temp_p85.merge(mean_AT_p85, on = ['fips'], how = 'inner')
temp_p85.head()

Unnamed: 0,fips,mean_temp_p85,AT_mean_p85
0,1001,28.736536,33.802649
1,1003,28.879616,34.006444
2,1005,28.348567,33.252659
3,1007,28.668666,33.706161
4,1009,27.786085,32.461951


In [37]:
temp_p85

Unnamed: 0,fips,mean_temp_p85,AT_mean_p85
0,01001,28.736536,33.802649
1,01003,28.879616,34.006444
2,01005,28.348567,33.252659
3,01007,28.668666,33.706161
4,01009,27.786085,32.461951
...,...,...,...
3100,56037,20.731717,23.143976
3101,56039,15.527622,16.858785
3102,56041,19.461227,21.569150
3103,56043,23.708815,26.950141


In [38]:
climate

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093
...,...,...,...,...,...,...,...,...,...,...
1133320,2019,12,2019-12-27,56045,-3.423038,-9.134636,2.288561,-3.404296,-9.028568,2.389735
1133321,2019,12,2019-12-28,56045,-3.415311,-7.042524,0.211902,-3.396585,-6.984792,0.259813
1133322,2019,12,2019-12-29,56045,-4.982109,-7.380541,-2.583677,-4.954086,-7.316181,-2.564803
1133323,2019,12,2019-12-30,56045,-5.757468,-10.321875,-1.193062,-5.720491,-10.181061,-1.165475


### Find the heatwave Count

In [39]:
def findList(fips, mean_temp_p85):
    temp_p85_list = []
    temp_p85_list.append(fips)
    temp_p85_list.append(mean_temp_p85)
    return temp_p85_list

In [40]:
def findATList(fips, mean_AT_p85):
    temp_p85_list = []
    temp_p85_list.append(fips)
    temp_p85_list.append(mean_AT_p85)
    return temp_p85_list

In [41]:
temp_p85['temp_p85_list'] = temp_p85.apply(lambda x: findList(x['fips'], x['mean_temp_p85']), axis=1)
temp_p85.head()

Unnamed: 0,fips,mean_temp_p85,AT_mean_p85,temp_p85_list
0,1001,28.736536,33.802649,"[01001, 28.736535868364225]"
1,1003,28.879616,34.006444,"[01003, 28.879616359170413]"
2,1005,28.348567,33.252659,"[01005, 28.34856679656289]"
3,1007,28.668666,33.706161,"[01007, 28.668665998632253]"
4,1009,27.786085,32.461951,"[01009, 27.78608468745617]"


In [42]:
temp_p85['AT_p85_list'] = temp_p85.apply(lambda x: findList(x['fips'], x['AT_mean_p85']), axis=1)
temp_p85.head()

Unnamed: 0,fips,mean_temp_p85,AT_mean_p85,temp_p85_list,AT_p85_list
0,1001,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
1,1003,28.879616,34.006444,"[01003, 28.879616359170413]","[01003, 34.006444382639664]"
2,1005,28.348567,33.252659,"[01005, 28.34856679656289]","[01005, 33.25265925266209]"
3,1007,28.668666,33.706161,"[01007, 28.668665998632253]","[01007, 33.70616084378674]"
4,1009,27.786085,32.461951,"[01009, 27.78608468745617]","[01009, 32.46195145922724]"


In [43]:
temp_p85_list = temp_p85['temp_p85_list'].tolist()
temp_p85_list[:5]

[['01001', 28.736535868364225],
 ['01003', 28.879616359170413],
 ['01005', 28.34856679656289],
 ['01007', 28.668665998632253],
 ['01009', 27.78608468745617]]

In [44]:
AT_p85_list = temp_p85['AT_p85_list'].tolist()
AT_p85_list[:5]

[['01001', 33.80264874006083],
 ['01003', 34.006444382639664],
 ['01005', 33.25265925266209],
 ['01007', 33.70616084378674],
 ['01009', 32.46195145922724]]

In [45]:
def findHeatwaveCount(temp, fips, temp_p85):
    df = temp[temp['fips'] == fips]
    c1 = df['AT_min'].ge(temp_p85)
    N = 2
    g = (c1 != c1.shift()).cumsum()
    df1 = df.assign(
        cnt=df.groupby(g).date.transform('count')
      , n=df.groupby(g).agg('cumcount')
      , g=g
    )
    c2 = df1.cnt.ge(N)
    c3 = df1.n.mod(N).eq(0) & df1.n.le(df1.cnt-N)
    df['flag'] = np.where(c1 & c2 & c3, 1, 0)
    del(df1)
    return df

In [35]:
climate

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093
...,...,...,...,...,...,...,...,...,...,...
1133320,2019,12,2019-12-27,56045,-3.423038,-9.134636,2.288561,-3.404296,-9.028568,2.389735
1133321,2019,12,2019-12-28,56045,-3.415311,-7.042524,0.211902,-3.396585,-6.984792,0.259813
1133322,2019,12,2019-12-29,56045,-4.982109,-7.380541,-2.583677,-4.954086,-7.316181,-2.564803
1133323,2019,12,2019-12-30,56045,-5.757468,-10.321875,-1.193062,-5.720491,-10.181061,-1.165475


In [36]:
def findDecade(year):
    if year >= 1960 and year <= 1969:
        return '1960s'
    elif year >= 1970 and year <= 1979:
        return '1970s'
    elif year >= 1980 and year <= 1989:
        return '1980s'
    elif year >= 1990 and year <= 1999:
        return '1990s'
    elif year >= 2000 and year <= 2009:
        return '2000s'
    elif year >= 2010 and year <= 2019:
        return '2010s'
    else:
        return '2020s'

In [37]:
climate['decade'] = climate['year'].apply(findDecade)
climate

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max,decade
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515,1960s
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789,1960s
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140,1960s
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972,1960s
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093,1960s
...,...,...,...,...,...,...,...,...,...,...,...
1133320,2019,12,2019-12-27,56045,-3.423038,-9.134636,2.288561,-3.404296,-9.028568,2.389735,2010s
1133321,2019,12,2019-12-28,56045,-3.415311,-7.042524,0.211902,-3.396585,-6.984792,0.259813,2010s
1133322,2019,12,2019-12-29,56045,-4.982109,-7.380541,-2.583677,-4.954086,-7.316181,-2.564803,2010s
1133323,2019,12,2019-12-30,56045,-5.757468,-10.321875,-1.193062,-5.720491,-10.181061,-1.165475,2010s


In [38]:
climate_1960s = climate[climate['decade'] == '1960s']
climate_1970s = climate[climate['decade'] == '1970s']
climate_1980s = climate[climate['decade'] == '1980s']
climate_1990s = climate[climate['decade'] == '1990s']
climate_2000s = climate[climate['decade'] == '2000s']
climate_2010s = climate[climate['decade'] == '2010s']

In [39]:
df_1960s = climate_1960s.merge(min_AT_p85, on='fips', how='left')

In [40]:
df_1970s = climate_1970s.merge(min_AT_p85, on='fips', how='left')
df_1980s = climate_1980s.merge(min_AT_p85, on='fips', how='left')
df_1990s = climate_1990s.merge(min_AT_p85, on='fips', how='left')
df_2000s = climate_2000s.merge(min_AT_p85, on='fips', how='left')
df_2010s = climate_2010s.merge(min_AT_p85, on='fips', how='left')

In [42]:
df_1960s

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max,decade,AT_min_p85
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515,1960s,25.319677
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789,1960s,25.319677
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140,1960s,25.319677
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972,1960s,25.319677
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093,1960s,25.319677
...,...,...,...,...,...,...,...,...,...,...,...,...
11342560,1969,12,1969-12-27,56045,-9.454123,-15.065380,-3.842867,-9.339201,-14.740788,-3.822815,1960s,17.602207
11342561,1969,12,1969-12-28,56045,-11.951790,-16.824419,-7.079162,-11.755470,-16.416110,-7.020734,1960s,17.602207
11342562,1969,12,1969-12-29,56045,-14.616143,-22.877268,-6.355017,-14.311703,-22.131787,-6.309281,1960s,17.602207
11342563,1969,12,1969-12-30,56045,-7.781079,-12.244620,-3.317538,-7.708260,-12.037429,-3.298984,1960s,17.602207


In [41]:
# Function to count heatwaves
def count_heatwaves(series, threshold):
    # Create a boolean series where True represents exceeding the threshold
    is_above_threshold = series > threshold

    # Identify groups of consecutive days above threshold
    # The diff and cumsum methods identify groups of consecutive True values
    heatwave_groups = (is_above_threshold.diff(1) != 0).cumsum()

    # Count the length of each group
    group_lengths = heatwave_groups[is_above_threshold].value_counts()

    # Count the groups that are at least two days long
    heatwave_count = (group_lengths >= 2).sum()

    return heatwave_count

In [54]:
heatwave_counts_1960s_temp = df_1960s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_1960s_temp = heatwave_counts_1960s_temp.reset_index(name='heatwave_count')
heatwave_counts_1960s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,1960,1,01001,0
1,1960,1,01003,0
2,1960,1,01005,0
3,1960,1,01007,0
4,1960,1,01009,0
...,...,...,...,...
372595,1969,12,56037,0
372596,1969,12,56039,0
372597,1969,12,56041,0
372598,1969,12,56043,0


In [55]:
heatwave_counts_1960s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.003011
std           0.056907
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           3.000000
Name: heatwave_count, dtype: float64

In [56]:
df_1960s

Unnamed: 0,year,month,date,fips,mean_temp,min_temp,max_temp,AT_mean,AT_min,AT_max,decade,mean_temp_p85,AT_mean_p85,temp_p85_list,AT_p85_list
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,5.658506,2.748637,8.633515,1960s,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,7.347604,4.378348,10.388789,1960s,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,9.569066,5.465944,13.821140,1960s,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,5.569946,-0.711797,12.162972,1960s,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,6.802082,3.132560,10.580093,1960s,28.736536,33.802649,"[01001, 28.736535868364225]","[01001, 33.80264874006083]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11342560,1969,12,1969-12-27,56045,-9.454123,-15.065380,-3.842867,-9.339201,-14.740788,-3.822815,1960s,24.978560,28.627208,"[56045, 24.97855981861105]","[56045, 28.627207591381598]"
11342561,1969,12,1969-12-28,56045,-11.951790,-16.824419,-7.079162,-11.755470,-16.416110,-7.020734,1960s,24.978560,28.627208,"[56045, 24.97855981861105]","[56045, 28.627207591381598]"
11342562,1969,12,1969-12-29,56045,-14.616143,-22.877268,-6.355017,-14.311703,-22.131787,-6.309281,1960s,24.978560,28.627208,"[56045, 24.97855981861105]","[56045, 28.627207591381598]"
11342563,1969,12,1969-12-30,56045,-7.781079,-12.244620,-3.317538,-7.708260,-12.037429,-3.298984,1960s,24.978560,28.627208,"[56045, 24.97855981861105]","[56045, 28.627207591381598]"


In [43]:
heatwave_counts_1960s_AT = df_1960s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_1960s_AT = heatwave_counts_1960s_AT.reset_index(name='heatwave_count')
heatwave_counts_1960s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,1960,1,01001,0
1,1960,1,01003,0
2,1960,1,01005,0
3,1960,1,01007,0
4,1960,1,01009,0
...,...,...,...,...
372595,1969,12,56037,0
372596,1969,12,56039,0
372597,1969,12,56041,0
372598,1969,12,56043,0


In [44]:
heatwave_counts_1960s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.131157
std           0.471694
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           7.000000
Name: heatwave_count, dtype: float64

In [59]:
heatwave_counts_1970s_temp = df_1970s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_1970s_temp = heatwave_counts_1970s_temp.reset_index(name='heatwave_count')
heatwave_counts_1970s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,1970,1,01001,0
1,1970,1,01003,0
2,1970,1,01005,0
3,1970,1,01007,0
4,1970,1,01009,0
...,...,...,...,...
372595,1979,12,56037,0
372596,1979,12,56039,0
372597,1979,12,56041,0
372598,1979,12,56043,0


In [61]:
heatwave_counts_1970s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.002568
std           0.053802
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           3.000000
Name: heatwave_count, dtype: float64

In [45]:
heatwave_counts_1970s_AT = df_1970s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_1970s_AT = heatwave_counts_1970s_AT.reset_index(name='heatwave_count')
heatwave_counts_1970s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,1970,1,01001,0
1,1970,1,01003,0
2,1970,1,01005,0
3,1970,1,01007,0
4,1970,1,01009,0
...,...,...,...,...
372595,1979,12,56037,0
372596,1979,12,56039,0
372597,1979,12,56041,0
372598,1979,12,56043,0


In [46]:
heatwave_counts_1970s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.131444
std           0.461777
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           6.000000
Name: heatwave_count, dtype: float64

In [64]:
heatwave_counts_1980s_temp = df_1980s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_1980s_temp = heatwave_counts_1980s_temp.reset_index(name='heatwave_count')
heatwave_counts_1980s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,1980,1,01001,0
1,1980,1,01003,0
2,1980,1,01005,0
3,1980,1,01007,0
4,1980,1,01009,0
...,...,...,...,...
372595,1989,12,56037,0
372596,1989,12,56039,0
372597,1989,12,56041,0
372598,1989,12,56043,0


In [66]:
heatwave_counts_1980s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.006731
std           0.095989
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           4.000000
Name: heatwave_count, dtype: float64

In [47]:
heatwave_counts_1980s_AT = df_1980s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_1980s_AT = heatwave_counts_1980s_AT.reset_index(name='heatwave_count')
heatwave_counts_1980s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,1980,1,01001,0
1,1980,1,01003,0
2,1980,1,01005,0
3,1980,1,01007,0
4,1980,1,01009,0
...,...,...,...,...
372595,1989,12,56037,0
372596,1989,12,56039,0
372597,1989,12,56041,0
372598,1989,12,56043,0


In [48]:
heatwave_counts_1980s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.198108
std           0.593176
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           6.000000
Name: heatwave_count, dtype: float64

In [69]:
heatwave_counts_1990s_temp = df_1990s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_1990s_temp = heatwave_counts_1990s_temp.reset_index(name='heatwave_count')
heatwave_counts_1990s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,1990,1,01001,0
1,1990,1,01003,0
2,1990,1,01005,0
3,1990,1,01007,0
4,1990,1,01009,0
...,...,...,...,...
372595,1999,12,56037,0
372596,1999,12,56039,0
372597,1999,12,56041,0
372598,1999,12,56043,0


In [70]:
heatwave_counts_1990s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.007525
std           0.099690
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           5.000000
Name: heatwave_count, dtype: float64

In [49]:
heatwave_counts_1990s_AT = df_1990s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_1990s_AT = heatwave_counts_1990s_AT.reset_index(name='heatwave_count')
heatwave_counts_1990s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,1990,1,01001,0
1,1990,1,01003,0
2,1990,1,01005,0
3,1990,1,01007,0
4,1990,1,01009,0
...,...,...,...,...
372595,1999,12,56037,0
372596,1999,12,56039,0
372597,1999,12,56041,0
372598,1999,12,56043,0


In [50]:
heatwave_counts_1990s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.210776
std           0.612235
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           6.000000
Name: heatwave_count, dtype: float64

In [73]:
heatwave_counts_2000s_temp = df_2000s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_2000s_temp = heatwave_counts_2000s_temp.reset_index(name='heatwave_count')
heatwave_counts_2000s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,2000,1,01001,0
1,2000,1,01003,0
2,2000,1,01005,0
3,2000,1,01007,0
4,2000,1,01009,0
...,...,...,...,...
372595,2009,12,56037,0
372596,2009,12,56039,0
372597,2009,12,56041,0
372598,2009,12,56043,0


In [74]:
heatwave_counts_2000s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.007762
std           0.095181
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           4.000000
Name: heatwave_count, dtype: float64

In [51]:
heatwave_counts_2000s_AT = df_2000s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_2000s_AT = heatwave_counts_2000s_AT.reset_index(name='heatwave_count')
heatwave_counts_2000s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,2000,1,01001,0
1,2000,1,01003,0
2,2000,1,01005,0
3,2000,1,01007,0
4,2000,1,01009,0
...,...,...,...,...
372595,2009,12,56037,0
372596,2009,12,56039,0
372597,2009,12,56041,0
372598,2009,12,56043,0


In [52]:
heatwave_counts_2000s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.237840
std           0.648596
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           6.000000
Name: heatwave_count, dtype: float64

In [77]:
heatwave_counts_2010s_temp = df_2010s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['mean_temp_p85'].iloc[0]))
heatwave_counts_2010s_temp = heatwave_counts_2010s_temp.reset_index(name='heatwave_count')
heatwave_counts_2010s_temp

Unnamed: 0,year,month,fips,heatwave_count
0,2010,1,01001,0
1,2010,1,01003,0
2,2010,1,01005,0
3,2010,1,01007,0
4,2010,1,01009,0
...,...,...,...,...
372595,2019,12,56037,0
372596,2019,12,56039,0
372597,2019,12,56041,0
372598,2019,12,56043,0


In [78]:
heatwave_counts_2010s_temp['heatwave_count'].describe()

count    372600.000000
mean          0.011989
std           0.128427
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           6.000000
Name: heatwave_count, dtype: float64

In [53]:
heatwave_counts_2010s_AT = df_2010s.groupby(['year', 'month', 'fips']).apply(lambda group: count_heatwaves(group['AT_min'], group['AT_min_p85'].iloc[0]))
heatwave_counts_2010s_AT = heatwave_counts_2010s_AT.reset_index(name='heatwave_count')
heatwave_counts_2010s_AT

Unnamed: 0,year,month,fips,heatwave_count
0,2010,1,01001,0
1,2010,1,01003,0
2,2010,1,01005,0
3,2010,1,01007,0
4,2010,1,01009,0
...,...,...,...,...
372595,2019,12,56037,0
372596,2019,12,56039,0
372597,2019,12,56041,0
372598,2019,12,56043,0


In [54]:
heatwave_counts_2010s_AT['heatwave_count'].describe()

count    372600.000000
mean          0.313001
std           0.767236
min           0.000000
25%           0.000000
50%           0.000000
75%           0.000000
max           7.000000
Name: heatwave_count, dtype: float64

In [55]:
count_AT = [heatwave_counts_1960s_AT,heatwave_counts_1970s_AT, heatwave_counts_1980s_AT, heatwave_counts_1990s_AT, heatwave_counts_2000s_AT,
              heatwave_counts_2010s_AT]

In [81]:
count_temp = [heatwave_counts_1960s_temp,heatwave_counts_1970s_temp, heatwave_counts_1980s_temp, heatwave_counts_1990s_temp, heatwave_counts_2000s_temp,
              heatwave_counts_2010s_temp]
count_AT = [heatwave_counts_1960s_AT,heatwave_counts_1970s_AT, heatwave_counts_1980s_AT, heatwave_counts_1990s_AT, heatwave_counts_2000s_AT,
              heatwave_counts_2010s_AT]

In [82]:
heatwave_count_temp = pd.concat(count_temp)
heatwave_count_temp

Unnamed: 0,year,month,fips,heatwave_count
0,1960,1,01001,0
1,1960,1,01003,0
2,1960,1,01005,0
3,1960,1,01007,0
4,1960,1,01009,0
...,...,...,...,...
372595,2019,12,56037,0
372596,2019,12,56039,0
372597,2019,12,56041,0
372598,2019,12,56043,0


In [56]:
heatwave_count_AT = pd.concat(count_AT)
heatwave_count_AT

Unnamed: 0,year,month,fips,heatwave_count
0,1960,1,01001,0
1,1960,1,01003,0
2,1960,1,01005,0
3,1960,1,01007,0
4,1960,1,01009,0
...,...,...,...,...
372595,2019,12,56037,0
372596,2019,12,56039,0
372597,2019,12,56041,0
372598,2019,12,56043,0


In [57]:
heatwave_count_AT.to_csv('heatwave_details/PRISM_min_AT_p85/heatwave_count_monthly_1960_2020.csv')

In [84]:
heatwave_count_temp.to_csv('heatwave_details/PRISM_mean_temp_p85/heatwave_count_monthly_1960_2020.csv')

In [85]:
heatwave_count_AT.to_csv('heatwave_details/PRISM_mean_AT_p85/heatwave_count_monthly_1960_2020.csv')

In [86]:
heatwave_count_temp['heatwave_count'].unique()

array([0, 1, 2, 3, 4, 5, 6])

In [87]:
heatwave_count_AT['heatwave_count'].unique()

array([0, 1])

### calculating heatwave_durations

In [104]:
df_data = [df_1960s, df_1970s, df_1980s, df_1990s, df_2000s, df_2010s, df_2020s]

In [None]:
df = pd.concat(df_data)
df

In [63]:
import pandas as pd
import numpy as np

# Assuming df is your DataFrame and has columns 'city', 'date', 'temperature'
# Assuming percentile_dict is your dictionary with 85th percentile temperatures for each city

# Sort by city and date
df = df.sort_values(['fips', 'month', 'date'])

In [64]:
years = [year for year in range(1960, 2021)]

In [65]:
# Define a function to calculate heatwave durations for each city
def calculate_heatwave(df, years):
    pattern = []
    for year in years:
        # Create a boolean series where True indicates a day is part of a heatwave
        city_df = df[df['year'] == year] 
        for fips in city_df['fips'].unique():
            df_fips = city_df[city_df['fips'] == fips]
            percentile = df_fips['mean_temp_p85']
            is_heatwave = df_fips['AT_min'] > percentile

            # Identify where True values change to False and vice versa,
            # and assign each group of consecutive True values a unique identifier
            heatwave_id = (is_heatwave.diff() != 0).cumsum()

            # Group by the identifiers and count the size of each group
            heatwave_lengths = heatwave_id[is_heatwave].value_counts()

            # Filter out heatwaves that are less than 2 days long
            heatwave_lengths = heatwave_lengths[heatwave_lengths >= 2]

            # Create a DataFrame with the start date of each heatwave and its duration
            heatwaves = pd.DataFrame({
                'fips': fips,
                'duration': heatwave_lengths.values,
                'year': year
            })
            pattern.append(heatwaves)
    
    return pattern

In [66]:
pattern = calculate_heatwave(df, years)
heatwave_durations = pd.concat(pattern)
heatwave_durations

Unnamed: 0,fips,duration,year
0,01045,2,1960
0,01047,5,1960
1,01047,3,1960
2,01047,2,1960
3,01047,2,1960
...,...,...,...
0,55025,2,2020
0,55029,2,2020
0,55043,2,2020
0,55051,2,2020


In [67]:
# Function to calculate heatwave durations
def calculate_heatwave_durations(series, threshold):
    # Create a boolean series where True represents exceeding the threshold
    is_above_threshold = series > threshold

    # Identify groups of consecutive days above threshold
    # The diff and cumsum methods identify groups of consecutive True values
    heatwave_groups = (is_above_threshold.diff(1) != 0).cumsum()

    # Count the length of each group
    group_lengths = heatwave_groups[is_above_threshold].value_counts()

    # Filter for groups that are at least two days long, and return their lengths
    heatwave_durations = group_lengths[group_lengths >= 2]

    return heatwave_durations

In [68]:
# Apply the function to each year and fips
heatwave_durations = df.groupby(['year', 'fips']).apply(lambda group: calculate_heatwave_durations(group['AT_min'], group['mean_temp_p85'].iloc[0]))
# This will result in a series where each element is a series of heat wave durations
# To convert this into a DataFrame where each row represents a heat wave, you can do:
heatwave_durations = heatwave_durations.explode().reset_index(name='heatwave_duration')
heatwave_durations

Unnamed: 0,year,fips,AT_min,heatwave_duration
0,1960,01045,6,2
1,1960,01047,4,5
2,1960,01047,18,3
3,1960,01047,8,2
4,1960,01047,10,2
...,...,...,...,...
41738,2020,55025,8,2
41739,2020,55029,2,2
41740,2020,55043,8,2
41741,2020,55051,4,2


In [69]:
fips_numbers = []
for year in years_30:
    dict_year = {}
    df_year = heatwave_durations[heatwave_durations['year'] == year]
    numbers = len(df_year['fips'].unique())
    dict_year[year] = numbers
    fips_numbers.append(dict_year)

In [70]:
fips_numbers

[{1960: 65},
 {1961: 70},
 {1962: 57},
 {1963: 70},
 {1964: 96},
 {1965: 61},
 {1966: 170},
 {1967: 38},
 {1968: 221},
 {1969: 192},
 {1970: 148},
 {1971: 64},
 {1972: 118},
 {1973: 179},
 {1974: 53},
 {1975: 105},
 {1976: 96},
 {1977: 245},
 {1978: 256},
 {1979: 155},
 {1980: 367},
 {1981: 278},
 {1982: 127},
 {1983: 367},
 {1984: 66},
 {1985: 97},
 {1986: 246},
 {1987: 286},
 {1988: 355},
 {1989: 172},
 {1990: 134},
 {1991: 203},
 {1992: 97},
 {1993: 261},
 {1994: 111},
 {1995: 413},
 {1996: 112},
 {1997: 249},
 {1998: 360},
 {1999: 709},
 {2000: 217},
 {2001: 468},
 {2002: 458},
 {2003: 248},
 {2004: 197},
 {2005: 677},
 {2006: 647},
 {2007: 614},
 {2008: 201},
 {2009: 287},
 {2010: 843},
 {2011: 1017},
 {2012: 542},
 {2013: 531},
 {2014: 196},
 {2015: 453},
 {2016: 756},
 {2017: 417},
 {2018: 656},
 {2019: 743},
 {2020: 504}]

In [71]:
heatwave_durations.to_csv('heatwave_1960_2020/heatwave_duration_1960_2020.csv')

In [88]:
df['date'] = pd.to_datetime(df['date'])

In [89]:
# Function to calculate heatwave dates
def calculate_heatwave_dates(series, threshold):
    # Create a boolean series where True represents exceeding the threshold
    is_above_threshold = series['AT_min'] > threshold

    # Identify groups of consecutive days above threshold
    # The diff and cumsum methods identify groups of consecutive True values
    heatwave_groups = (is_above_threshold.diff(1) != 0).cumsum()
    
    # Get the dates of each group
    group_dates = series.loc[is_above_threshold, 'date'].groupby(heatwave_groups)

    # Filter for groups that are at least two days long, and return their dates
    heatwave_dates = group_dates.filter(lambda dates: len(dates) >= 2)

    return heatwave_dates

In [None]:
# Apply the function to each year and fips
heatwave_dates = df.groupby(['year', 'month', 'fips']).apply(lambda group: calculate_heatwave_dates(group[['date', 'AT_min']], group['mean_temp_p85'].iloc[0]))
heatwave_dates

In [75]:
# The season length for each year and fips is the difference between the max and min date
season_lengths = heatwave_dates.groupby(level=['year','month','fips']).apply(lambda dates: dates.max() - dates.min())
season_lengths = season_lengths.explode().reset_index(name='heatwave_season_length')
season_lengths

Unnamed: 0,year,fips,heatwave_season_length
0,1960,01045,1 days
1,1960,01047,41 days
2,1960,01061,1 days
3,1960,01113,21 days
4,1960,05045,25 days
...,...,...,...
18136,2020,55025,1 days
18137,2020,55029,1 days
18138,2020,55043,1 days
18139,2020,55051,1 days


In [76]:
def findDaysNumber(days):
    days = str(days)
    return int(days.split()[0])+1

In [77]:
season_lengths['heatwave_length'] = season_lengths['heatwave_season_length'].apply(findDaysNumber)
season_lengths

Unnamed: 0,year,fips,heatwave_season_length,heatwave_length
0,1960,01045,1 days,2
1,1960,01047,41 days,42
2,1960,01061,1 days,2
3,1960,01113,21 days,22
4,1960,05045,25 days,26
...,...,...,...,...
18136,2020,55025,1 days,2
18137,2020,55029,1 days,2
18138,2020,55043,1 days,2
18139,2020,55051,1 days,2


In [78]:
season_lengths = season_lengths.drop('heatwave_season_length', axis = 1)
season_lengths = season_lengths.rename(columns = {'heatwave_length': 'heatwave_season_length'})
season_lengths

Unnamed: 0,year,fips,heatwave_season_length
0,1960,01045,2
1,1960,01047,42
2,1960,01061,2
3,1960,01113,22
4,1960,05045,26
...,...,...,...
18136,2020,55025,2
18137,2020,55029,2
18138,2020,55043,2
18139,2020,55051,2


In [79]:
fips_numbers = []
for year in years_30:
    dict_year = {}
    df_year = season_lengths[season_lengths['year'] == year]
    numbers = len(df_year['fips'].unique())
    dict_year[year] = numbers
    fips_numbers.append(dict_year)

In [80]:
fips_numbers

[{1960: 65},
 {1961: 70},
 {1962: 57},
 {1963: 70},
 {1964: 96},
 {1965: 61},
 {1966: 170},
 {1967: 38},
 {1968: 221},
 {1969: 192},
 {1970: 148},
 {1971: 64},
 {1972: 118},
 {1973: 179},
 {1974: 53},
 {1975: 105},
 {1976: 96},
 {1977: 245},
 {1978: 256},
 {1979: 155},
 {1980: 367},
 {1981: 278},
 {1982: 127},
 {1983: 367},
 {1984: 66},
 {1985: 97},
 {1986: 246},
 {1987: 286},
 {1988: 355},
 {1989: 172},
 {1990: 134},
 {1991: 203},
 {1992: 97},
 {1993: 261},
 {1994: 111},
 {1995: 413},
 {1996: 112},
 {1997: 249},
 {1998: 360},
 {1999: 709},
 {2000: 217},
 {2001: 468},
 {2002: 458},
 {2003: 248},
 {2004: 197},
 {2005: 677},
 {2006: 647},
 {2007: 614},
 {2008: 201},
 {2009: 287},
 {2010: 843},
 {2011: 1017},
 {2012: 542},
 {2013: 531},
 {2014: 196},
 {2015: 453},
 {2016: 756},
 {2017: 417},
 {2018: 656},
 {2019: 743},
 {2020: 504}]

In [81]:
season_lengths.to_csv('heatwave_1960_2020/heatwave_season_length_1960_2020.csv')

In [82]:
def celsius_to_fahrenheit(c):
    f = c * 9/5 + 32
    return f

In [83]:
df['AT_min'] = df['AT_min'].apply(celsius_to_fahrenheit)
df['mean_temp_p85'] = df['mean_temp_p85'].apply(celsius_to_fahrenheit)

In [75]:
# Function to calculate average exceeded temperatures above threshold during heatwaves
def calculate_heatwave_exceeded_temp_avgs(series, threshold):
    # Create a boolean series where True represents exceeding the threshold
    is_above_threshold = series['AT_min'] > threshold

    # Identify groups of consecutive days above threshold
    # The diff and cumsum methods identify groups of consecutive True values
    heatwave_groups = (is_above_threshold.diff(1) != 0).cumsum()

    # Get the mean of exceeded temperatures (temperature - threshold) above threshold in each group
    group_exceeded_temp_avgs = (series.loc[is_above_threshold, 'AT_min'] - threshold).groupby(heatwave_groups).mean()

    # Filter for groups that are at least two days long, and return their exceeded temperature averages
    heatwave_exceeded_temp_avgs = group_exceeded_temp_avgs[group_exceeded_temp_avgs.index.map(is_above_threshold.groupby(heatwave_groups).size() >= 2)]

    return heatwave_exceeded_temp_avgs

In [76]:
# Apply the function to each year and fips
heatwave_exceeded_temp_avgs = df.groupby(['year', 'fips']).apply(lambda group: calculate_heatwave_exceeded_temp_avgs(group[['AT_min']], group['mean_temp_p85'].iloc[0]))
heatwave_exceeded_temp_avgs 

year  fips   AT_min
1960  01045  6         1.126089
      01047  4         2.826390
             8         0.267139
             10        3.829449
             14        1.010165
                         ...   
2020  55025  8         0.621146
      55029  2         1.515584
      55043  8         0.777385
      55051  4         2.759159
      55061  2         1.067584
Name: AT_min, Length: 41836, dtype: float64

In [77]:
# This will result in a series where each element is a series of heat wave exceeded temperature averages
# To convert this into a DataFrame where each row represents a heat wave, you can do:
heatwave_exceeded_temp_avgs = heatwave_exceeded_temp_avgs.explode().reset_index(name='heatwave_exceeded_temp_avg')
heatwave_exceeded_temp_avgs

Unnamed: 0,year,fips,AT_min,heatwave_exceeded_temp_avg
0,1960,01045,6,1.126089
1,1960,01047,4,2.826390
2,1960,01047,8,0.267139
3,1960,01047,10,3.829449
4,1960,01047,14,1.010165
...,...,...,...,...
41831,2020,55025,8,0.621146
41832,2020,55029,2,1.515584
41833,2020,55043,8,0.777385
41834,2020,55051,4,2.759159


In [78]:
heatwave_exceeded_temp_avgs = heatwave_exceeded_temp_avgs.drop('AT_min', axis = 1)
heatwave_exceeded_temp_avgs = heatwave_exceeded_temp_avgs.rename(columns = {'heatwave_exceeded_temp_avg': 'heatwave_intensity'})
heatwave_exceeded_temp_avgs

Unnamed: 0,year,fips,heatwave_intensity
0,1960,01045,1.126089
1,1960,01047,2.826390
2,1960,01047,0.267139
3,1960,01047,3.829449
4,1960,01047,1.010165
...,...,...,...
41831,2020,55025,0.621146
41832,2020,55029,1.515584
41833,2020,55043,0.777385
41834,2020,55051,2.759159


In [79]:
fips_numbers = []
for year in years_30:
    dict_year = {}
    df_year = heatwave_exceeded_temp_avgs[heatwave_exceeded_temp_avgs['year'] == year]
    numbers = len(df_year['fips'].unique())
    dict_year[year] = numbers
    fips_numbers.append(dict_year)

In [80]:
fips_numbers

[{1960: 67},
 {1961: 70},
 {1962: 57},
 {1963: 70},
 {1964: 96},
 {1965: 61},
 {1966: 170},
 {1967: 38},
 {1968: 222},
 {1969: 192},
 {1970: 148},
 {1971: 65},
 {1972: 118},
 {1973: 179},
 {1974: 53},
 {1975: 105},
 {1976: 96},
 {1977: 245},
 {1978: 263},
 {1979: 160},
 {1980: 376},
 {1981: 278},
 {1982: 128},
 {1983: 373},
 {1984: 66},
 {1985: 97},
 {1986: 250},
 {1987: 291},
 {1988: 358},
 {1989: 173},
 {1990: 134},
 {1991: 204},
 {1992: 97},
 {1993: 263},
 {1994: 111},
 {1995: 414},
 {1996: 115},
 {1997: 253},
 {1998: 360},
 {1999: 712},
 {2000: 217},
 {2001: 469},
 {2002: 468},
 {2003: 253},
 {2004: 206},
 {2005: 681},
 {2006: 658},
 {2007: 617},
 {2008: 204},
 {2009: 289},
 {2010: 845},
 {2011: 1017},
 {2012: 542},
 {2013: 534},
 {2014: 196},
 {2015: 453},
 {2016: 757},
 {2017: 419},
 {2018: 660},
 {2019: 748},
 {2020: 504}]

In [81]:
heatwave_exceeded_temp_avgs.to_csv('heatwave_1960_2020/heatwave_intensity_1960_2020.csv')