In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import dask.dataframe as ddf
from pandas import Series, DataFrame
%matplotlib inline
import seaborn as sn

In [2]:
import geopandas as gpd
import pandas as pd

## Load daily temperature 

In [3]:
years = [year for year in range(1960, 2020)]

In [4]:
pattern = []
for year in years:
    temp_year = ddf.read_csv(r'/global/cfs/cdirs/m1532/Projects_MVP/geospatial/PRISM_Data/PRISM_daily_county_level/prism_daily_county_level_' + str(year) + '.csv', dtype = {'year': int, 'fips': str}).compute().drop(columns={'Unnamed: 0'})
    pattern.append(temp_year)

In [5]:
temp = pd.concat(pattern)
temp.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year
0,1001,1960-01-01,5.417989,2.635663,8.200316,2.279021,1960
1,1001,1960-01-02,7.005736,4.201073,9.810398,10.412733,1960
2,1001,1960-01-03,9.061477,5.235675,12.887278,16.011329,1960
3,1001,1960-01-04,5.334175,-0.74457,11.412921,0.216112,1960
4,1001,1960-01-05,6.495222,3.006032,9.984412,0.274311,1960


In [6]:
temp['year'].unique()

array([1960, 1961, 1962, 1963, 1964, 1965, 1966, 1967, 1968, 1969, 1970,
       1971, 1972, 1973, 1974, 1975, 1976, 1977, 1978, 1979, 1980, 1981,
       1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991, 1992,
       1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
       2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014,
       2015, 2016, 2017, 2018, 2019])

### Definition 3: A threshold equal to the 95th percentile of local (grid cell) daily maximum temperature for each extended summer month (May through September) in each year is used to define heat waves. Heat wave events must also last for at least three consecutive days, and events are considered temporally independent if separated by four or more days of below threshold temperatures. Link: https://link.springer.com/article/10.1007/s10584-022-03476-y#Tab1

In [7]:
import datetime

In [8]:
def convertTime(time):
    return datetime.strptime(time, '%Y-%m-%d')

In [9]:
def getMonth(date):
    return(date.strftime('%b'))

In [10]:
temp['date'] = pd.to_datetime(temp['date'])
temp['month'] = temp['date'].apply(getMonth)

In [11]:
month_replace = {'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4, 'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8, 'Sep': 9, 'Oct': 10,
                'Nov': 11, 'Dec': 12}

In [12]:
temp['month'] = temp['month'].replace(month_replace)
temp.head()

Unnamed: 0,fips,date,tMean,tMin,tMax,prec,year,month
0,1001,1960-01-01,5.417989,2.635663,8.200316,2.279021,1960,1
1,1001,1960-01-02,7.005736,4.201073,9.810398,10.412733,1960,1
2,1001,1960-01-03,9.061477,5.235675,12.887278,16.011329,1960,1
3,1001,1960-01-04,5.334175,-0.74457,11.412921,0.216112,1960,1
4,1001,1960-01-05,6.495222,3.006032,9.984412,0.274311,1960,1


In [32]:
climate = temp[['year', 'month', 'date', 'fips', 'tMean', 'tMin', 'tMax']]
climate.head()

Unnamed: 0,year,month,date,fips,tMean,tMin,tMax
0,1960,1,1960-01-01,1001,5.417989,2.635663,8.200316
1,1960,1,1960-01-02,1001,7.005736,4.201073,9.810398
2,1960,1,1960-01-03,1001,9.061477,5.235675,12.887278
3,1960,1,1960-01-04,1001,5.334175,-0.74457,11.412921
4,1960,1,1960-01-05,1001,6.495222,3.006032,9.984412


In [33]:
summer_months = [5, 6, 7, 8, 9]
df_summer = climate[climate['month'].isin(summer_months)]

In [34]:
percentile_95 = df_summer.groupby(['fips', 'year', 'month'])['tMax'].quantile(0.95).reset_index()
percentile_95.rename(columns={'tMax': 'threshold_95'}, inplace=True)
percentile_95

Unnamed: 0,fips,year,month,threshold_95
0,01001,1960,5,33.311427
1,01001,1960,6,34.884412
2,01001,1960,7,36.050825
3,01001,1960,8,34.327950
4,01001,1960,9,34.157964
...,...,...,...,...
931495,56045,2019,5,24.976680
931496,56045,2019,6,29.844211
931497,56045,2019,7,32.050554
931498,56045,2019,8,31.872316


In [35]:
climate = climate.merge(percentile_95, on=['fips', 'year', 'month'], how='left')
climate.head()

Unnamed: 0,year,month,date,fips,tMean,tMin,tMax,threshold_95
0,1960,1,1960-01-01,1001,5.417989,2.635663,8.200316,
1,1960,1,1960-01-02,1001,7.005736,4.201073,9.810398,
2,1960,1,1960-01-03,1001,9.061477,5.235675,12.887278,
3,1960,1,1960-01-04,1001,5.334175,-0.74457,11.412921,
4,1960,1,1960-01-05,1001,6.495222,3.006032,9.984412,


In [36]:
climate

Unnamed: 0,year,month,date,fips,tMean,tMin,tMax,threshold_95
0,1960,1,1960-01-01,01001,5.417989,2.635663,8.200316,
1,1960,1,1960-01-02,01001,7.005736,4.201073,9.810398,
2,1960,1,1960-01-03,01001,9.061477,5.235675,12.887278,
3,1960,1,1960-01-04,01001,5.334175,-0.744570,11.412921,
4,1960,1,1960-01-05,01001,6.495222,3.006032,9.984412,
...,...,...,...,...,...,...,...,...
68046070,2019,12,2019-12-27,56045,-3.423038,-9.134636,2.288561,
68046071,2019,12,2019-12-28,56045,-3.415311,-7.042524,0.211902,
68046072,2019,12,2019-12-29,56045,-4.982109,-7.380541,-2.583677,
68046073,2019,12,2019-12-30,56045,-5.757468,-10.321875,-1.193062,


In [37]:
climate['is_heatwave_day'] = climate['tMax'] > climate['threshold_95']
climate.head()

Unnamed: 0,year,month,date,fips,tMean,tMin,tMax,threshold_95,is_heatwave_day
0,1960,1,1960-01-01,1001,5.417989,2.635663,8.200316,,False
1,1960,1,1960-01-02,1001,7.005736,4.201073,9.810398,,False
2,1960,1,1960-01-03,1001,9.061477,5.235675,12.887278,,False
3,1960,1,1960-01-04,1001,5.334175,-0.74457,11.412921,,False
4,1960,1,1960-01-05,1001,6.495222,3.006032,9.984412,,False


In [39]:
def identify_heatwaves(group):
    #group = group.sort_values(by='date').reset_index(drop=True)
    
    # Identify consecutive heatwave days
    group['heatwave_event'] = (group['is_heatwave_day'].shift(1, fill_value=False) != group['is_heatwave_day']).cumsum()
    
    # Filter heatwave events that last at least three consecutive days
    heatwave_events = group[group['is_heatwave_day']].groupby('heatwave_event').filter(lambda x: len(x) >= 3)
    
    # Separate independent events
    heatwave_events['event_gap'] = (heatwave_events['date'].diff().dt.days > 3).cumsum()
    
    return heatwave_events

In [40]:
heatwave_events = climate.groupby('fips').apply(identify_heatwaves).reset_index(drop=True)

# Count the number of heatwave events in each month
heatwave_counts = heatwave_events.groupby(['fips', 'year', 'month', 'event_gap']).size().reset_index(name='heatwave_count')

heatwave_counts = heatwave_counts.sort_values(by=['year', 'fips']).reset_index(drop=True)
heatwave_counts

Unnamed: 0,fips,year,month,event_gap,heatwave_count
0,01051,1960,6,0,1
1,01051,1960,7,0,2
2,01087,1960,6,0,1
3,01087,1960,7,0,2
4,13009,1960,6,0,1
...,...,...,...,...,...
29483,49017,2019,9,6,2
29484,49025,2019,8,5,1
29485,49025,2019,9,5,2
29486,55113,2019,5,9,2


In [41]:
heatwave_counts = heatwave_counts.sort_values(by=['fips', 'year']).reset_index(drop=True)
heatwave_counts

Unnamed: 0,fips,year,month,event_gap,heatwave_count
0,01001,1964,8,0,2
1,01001,1964,9,0,2
2,01001,1968,6,1,1
3,01001,1968,7,1,2
4,01001,1978,6,2,2
...,...,...,...,...,...
29483,56041,2000,8,1,2
29484,56043,1990,6,0,1
29485,56043,1990,7,0,2
29486,56045,1990,6,0,1


In [42]:
heatwave_counts = heatwave_counts.drop('event_gap', axis = 1)
heatwave_counts

Unnamed: 0,fips,year,month,heatwave_count
0,01001,1964,8,2
1,01001,1964,9,2
2,01001,1968,6,1
3,01001,1968,7,2
4,01001,1978,6,2
...,...,...,...,...
29483,56041,2000,8,2
29484,56043,1990,6,1
29485,56043,1990,7,2
29486,56045,1990,6,1


In [43]:
heatwave_counts.to_csv('heatwave_definition3/heatwave_count_definition3.csv')