# start_pakistan_correlations
## HWDR_computeCorrelTriggers.ipynb
This script computes correlation between locations for heatwave and drought DRF based on the yearly triggers at each location.

In [2]:
from pathlib import Path
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date, timedelta
import matplotlib.pyplot as plt
from scipy import stats
from scipy.stats.stats import pearsonr
import scipy.spatial.distance
import seaborn as sb

  from scipy.stats.stats import pearsonr


In [3]:
# Set the root path
rootPath = Path.cwd().parents[0]
crossPerilPath = Path.cwd()
hwPath = rootPath/'HW/data'
drPath = rootPath/'DR/data'

## Load the triggers by year data for both perils

In [4]:
hwTriggers = pd.read_csv(hwPath/'city_triggers/triggerYears.csv')
hwTriggers.index = hwTriggers.year
hwTriggers = hwTriggers.drop('year',axis=1)
hwTriggers

Unnamed: 0_level_0,Jacobabad,Karachi_Jinnah_Airport,Lahore,Multan,Nawabshah,Sibi
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004,0.0,0.0,0.0,0.0,0.0,0.0
2005,0.0,1.0,1.0,1.0,0.0,0.0
2006,1.0,0.0,0.0,0.0,0.0,0.0
2007,0.0,1.0,1.0,1.0,0.0,0.0
2008,0.0,0.0,0.0,0.0,0.0,0.0
2009,1.0,0.0,1.0,0.0,0.0,0.0
2010,0.0,0.0,0.0,2.0,0.0,0.0
2011,0.0,0.0,0.0,1.0,0.0,0.0
2012,0.0,0.0,1.0,1.0,0.0,0.0
2013,0.0,1.0,2.0,0.0,0.0,0.0


In [5]:
drTriggers = pd.read_csv(drPath/'triggers/allDetrendedTriggers.csv', index_col=0)
drTriggers.index = drTriggers.index.astype(int)
drTriggers.columns= [item.split('_')[0] for item in drTriggers.columns]

In [6]:
drTriggers

Unnamed: 0_level_0,Balochistan,Punjab,Sindh
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1983,,0.0,
1984,,1.0,
1985,,0.0,
1986,,0.0,
1987,,1.0,
1988,,0.0,
1989,,0.0,
1990,,0.0,
1991,,1.0,
1992,,0.0,


## Compute correlation of annual triggers for all pairs of HW-DR locations

In [21]:
hwSites=hwTriggers.columns.to_list()
drSites= drTriggers.columns.to_list()

In [22]:
# Create sitePairs
sites1=[]
sites2=[]
for i, site1 in enumerate(hwSites):
    for j, site2 in enumerate(drSites):
        sites1.append(site1)
        sites2.append(site2)

In [25]:
hwData

year
2004    0.0
2005    0.0
2006    1.0
2007    0.0
2008    0.0
2009    1.0
2010    0.0
2011    0.0
2012    0.0
2013    0.0
2014    1.0
2015    1.0
2016    4.0
2017    1.0
2018    3.0
2019    2.0
2020    1.0
2021    0.0
2022    0.0
Name: Jacobabad, dtype: float64

In [26]:
drData

Unnamed: 0
1983    NaN
1984    NaN
1985    NaN
1986    NaN
1987    NaN
1988    NaN
1989    NaN
1990    NaN
1991    NaN
1992    NaN
1993    NaN
1994    NaN
1995    NaN
1996    NaN
1997    NaN
1998    NaN
1999    NaN
2000    NaN
2001    NaN
2002    1.0
2003    0.0
2004    1.0
2005    0.0
2006    0.0
2007    0.0
2008    0.0
2009    0.0
2010    0.0
2011    0.0
2012    1.0
2013    0.0
2014    0.0
2015    0.0
2016    0.0
2017    0.0
2018    0.0
2019    0.0
2020    0.0
2021    1.0
Name: Balochistan, dtype: float64

Unnamed: 0,Balochistan,Jacobabad
2004,1.0,0.0
2005,0.0,0.0
2006,0.0,1.0
2007,0.0,0.0
2008,0.0,0.0
2009,0.0,1.0
2010,0.0,0.0
2011,0.0,0.0
2012,1.0,0.0
2013,0.0,0.0


In [32]:
# Loop through sites, for each, compute and record the correlation - use only years with overlapping data
correlsData=pd.DataFrame(data=[], index=['hwSite','drSite','correl']).T
correlsResultData=pd.DataFrame(data=[], index=['hwSite','drSite','pearsonr','p']).T

for i in range(0, len(sites1)):
    
    # Get data for both sites
    hwData=hwTriggers[sites1[i]]
    drData=drTriggers[sites2[i]]
    
    # Get matching years
    mergeData = pd.DataFrame(drData).merge(pd.DataFrame(hwData), left_index=True, right_index=True)
    mergeData=mergeData.dropna()
    dr = mergeData.iloc[:,0]
    hw = mergeData.iloc[:,1]
    
    # Compute correlation    
    correl=hw.corr(dr)
    correlResult = list(pearsonr(hw, dr)) # (Pearson's correlation coefficient, 2-tailed p-value)
    
    # Record in dataframe
    correlData = pd.DataFrame(data=[sites1[i], sites2[i], correl], index=correlsData.columns).T
    correlsData = pd.concat(objs=[correlsData, correlData])
    
    correlResultData  = pd.DataFrame(data=[sites1[i], sites2[i]] + correlResult, index=correlsResultData.columns).T
    correlsResultData = pd.concat(objs=[correlsResultData, correlResultData])

In [39]:
# Pivot to create a matrix
pearsonrMatrix=correlsResultData.pivot(index='hwSite', columns='drSite', values='pearsonr')
pearsonrMatrix

drSite,Balochistan,Punjab,Sindh
hwSite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jacobabad,-0.333333,0.485071,0.2
Karachi_Jinnah_Airport,-0.2,0.478091,0.0
Lahore,0.154672,-0.19868,-0.216541
Multan,-0.21693,0.108465,0.433861
Nawabshah,-0.260931,0.39736,0.130466
Sibi,0.16,0.468165,-0.08


In [40]:
# Pivot to create a matrix
pMatrix=correlsResultData.pivot(index='hwSite', columns='drSite', values='p')
pMatrix

drSite,Balochistan,Punjab,Sindh
hwSite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jacobabad,0.176463,0.06684,0.426206
Karachi_Jinnah_Airport,0.426206,0.071455,1.0
Lahore,0.54,0.477795,0.388106
Multan,0.387232,0.700402,0.072039
Nawabshah,0.295645,0.142471,0.605858
Sibi,0.525953,0.078407,0.752344


In [42]:
# Write out as csv
outPath = crossPerilPath/'data'
outPath.mkdir(exist_ok=True)
pearsonrMatrix.to_csv(outPath/'hwdr_Pearsonr.csv')
pMatrix.to_csv(outPath/'hwdr_pval.csv')

### Repeat with only one activation per hw location per season

In [44]:
# Loop through sites, for each, compute and record the correlation - use only years with overlapping data
correlsData=pd.DataFrame(data=[], index=['hwSite','drSite','correl']).T
correlsResultData=pd.DataFrame(data=[], index=['hwSite','drSite','pearsonr','p']).T

for i in range(0, len(sites1)):
    
    # Get data for both sites
    hwData=hwTriggers[sites1[i]]
    drData=drTriggers[sites2[i]]
    
    # Limit hw to max of one activation per loc per season
    hwData[hwData>1]=1
    
    # Get matching years
    mergeData = pd.DataFrame(drData).merge(pd.DataFrame(hwData), left_index=True, right_index=True)
    mergeData=mergeData.dropna()
    dr = mergeData.iloc[:,0]
    hw = mergeData.iloc[:,1]
    
    # Compute correlation    
    correl=hw.corr(dr)
    correlResult = list(pearsonr(hw, dr)) # (Pearson's correlation coefficient, 2-tailed p-value)
    
    # Record in dataframe
    correlData = pd.DataFrame(data=[sites1[i], sites2[i], correl], index=correlsData.columns).T
    correlsData = pd.concat(objs=[correlsData, correlData])
    
    correlResultData  = pd.DataFrame(data=[sites1[i], sites2[i]] + correlResult, index=correlsResultData.columns).T
    correlsResultData = pd.concat(objs=[correlsResultData, correlResultData])

In [45]:
# Pivot to create a matrix
pearsonrMatrix=correlsResultData.pivot(index='hwSite', columns='drSite', values='pearsonr')
pearsonrMatrix

drSite,Balochistan,Punjab,Sindh
hwSite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jacobabad,-0.447214,0.188982,0.149071
Karachi_Jinnah_Airport,-0.149071,0.188982,-0.149071
Lahore,0.149071,-0.094491,-0.149071
Multan,-0.149071,-0.094491,0.149071
Nawabshah,-0.27735,0.213201,0.05547
Sibi,-0.050965,0.533002,-0.050965


In [46]:
# Pivot to create a matrix
pMatrix=correlsResultData.pivot(index='hwSite', columns='drSite', values='p')
pMatrix

drSite,Balochistan,Punjab,Sindh
hwSite,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jacobabad,0.062772,0.499964,0.554946
Karachi_Jinnah_Airport,0.554946,0.499964,0.554946
Lahore,0.554946,0.737652,0.554946
Multan,0.554946,0.737652,0.554946
Nawabshah,0.265168,0.445508,0.826953
Sibi,0.84083,0.040772,0.84083


In [47]:
# Write out as csv
outPath = crossPerilPath/'data'
outPath.mkdir(exist_ok=True)
pearsonrMatrix.to_csv(outPath/'hwdr_Pearsonr_hwMax1.csv')
pMatrix.to_csv(outPath/'hwdr_pval_hwMax1.csv')