# start_pakistan_correlations
## HWFL_computeCorrelDailyTempsPrecip.ipynb
This script computes correlation between the Pakistan HW and FL daily meteorologies, using the GFS 2 meter temperature data for the heatwave cities, and the CHIRPS 95%ile daily precipitation in the entire Indus basin.

In [1]:
from pathlib import Path
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date, timedelta
import matplotlib.pyplot as plt
from scipy.stats.stats import pearsonr

  from scipy.stats.stats import pearsonr


In [2]:
# Set the root path
rootPath = Path('C:/Users/alexa/Documents/02_work/02_start/02_deliv/05_pk_correlation/')

## Get the HW and FL daily temperature and precipitation data

In [12]:
precipPath = rootPath/'fl/data/chirps'
precipData = pd.read_csv(precipPath/('CHIRPSDailyIndus1981-2022.csv'))
precipData.index = [pd.to_datetime(item, format='%Y%m%d') for item in precipData.date]
precipData=precipData.drop('date',axis=1)
precipData

Unnamed: 0,mean,median,p95
1981-01-01,0.000000,0.000000,0.000000
1981-01-02,2.616719,0.000036,13.866395
1981-01-03,3.128428,0.000000,15.615570
1981-01-04,4.383004,0.619525,16.365246
1981-01-05,0.728298,0.000000,5.691761
...,...,...,...
2022-12-27,0.000000,0.000000,0.000000
2022-12-28,0.114238,0.000000,0.000000
2022-12-29,1.790827,0.000000,6.558889
2022-12-30,0.000592,0.000000,0.000000


In [13]:
hwPath = rootPath/'hw/data/city_extracts_2023'
sites = [item.stem for item in list(hwPath.iterdir())]

In [19]:
# Loop through sites and get the daily values

# Record daily date for all sites in df
startDate=date(2004,4,1)
endDate=date(2022,12,31)
allDailyData = pd.DataFrame(data=[pd.date_range(startDate, endDate, freq='d')],index=['time']).T
allDailyData.index = allDailyData.time
allDailyData = allDailyData.drop('time', axis=1)
keepCols=['t2m_cel']

# Loop through sites
for i, site in enumerate(sites):
    
    # Read the daily data
    siteFile = hwPath/'{0}/gfsanl_daily_{1}_allyears.csv'.format(site, site)
    siteData = pd.read_csv(siteFile)
    siteData.index = pd.to_datetime(siteData.time)
    siteData = siteData[keepCols]
    
    # # Filter for monitoring months
    # siteData.index = pd.to_datetime(siteData.time)
    # siteData['month'] = [item.month for item in siteData.index]
    # siteDataSub = siteData[siteData.month.isin(months)]
    
    # Merge the site data into the allDailyData dataframe
    allDailyData = allDailyData.merge(siteData, left_index=True, right_index=True, how='outer')
    
allDailyData.columns=sites
# allDailyData['month'] = [item.month for item in allDailyData.index]
hwData=allDailyData.copy(deep=True)

  allDailyData = allDailyData.merge(siteData, left_index=True, right_index=True, how='outer')
  allDailyData = allDailyData.merge(siteData, left_index=True, right_index=True, how='outer')


In [20]:
hwData

Unnamed: 0_level_0,Jacobabad,Karachi_Jinnah_Airport,Lahore,Multan,Nawabshah,Sibi
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2004-04-01,31.0250,26.4750,26.3500,30.7500,29.8750,22.7750
2004-04-02,30.8500,27.1250,29.7250,28.5750,31.4000,23.3500
2004-04-03,31.8500,27.5500,28.4750,28.4500,32.4000,22.9500
2004-04-04,31.6750,28.2750,30.0250,29.7000,32.7500,24.1000
2004-04-05,31.7250,27.5000,29.8000,31.6000,32.2000,24.9000
...,...,...,...,...,...,...
2022-12-27,18.3225,21.7150,14.1325,16.7575,19.0175,12.8875
2022-12-28,19.7075,21.9275,14.9500,18.2525,21.3325,14.5625
2022-12-29,19.5150,23.4425,11.3700,15.8850,23.1975,14.2550
2022-12-30,18.1700,21.6725,12.8525,14.0400,19.0100,13.2950


## Compute correlations
Compute correlation for each city

In [38]:
hwMonths = list(range(4,8))

In [42]:
flMonths = list(range(7,10))

In [24]:
hwData.columns

Index(['Jacobabad', 'Karachi_Jinnah_Airport', 'Lahore', 'Multan', 'Nawabshah',
       'Sibi'],
      dtype='object')

In [44]:
precipMetric='p95'
correlsResultData=pd.DataFrame(data=[], index=['HWsite','precipMetric','pearson-r','p', 'HWMonths_pearson-r','HWMonths_p', 'FLMonths_pearson-r','FLMonths_p']).T

for i, site in enumerate(list(hwData.columns)):
    
    # Get the daily temps for the city
    hw=pd.DataFrame(hwData[site])
    
    # Compute correlation with the precip on the same dates
    corrData = hw.merge(precipData[precipMetric], left_index=True, right_index=True)
    correlResult = list(pearsonr(corrData.iloc[:,0], corrData.iloc[:,1])) # (Pearson's correlation coefficient, 2-tailed p-value)
    
    # Repeat for only heatwave months
    corrData['month']=corrData.index.month
    corrData = corrData[corrData.month.isin(hwMonths)]
    correlResultWindowHW = list(pearsonr(corrData.iloc[:,0], corrData.iloc[:,1])) # (Pearson's correlation coefficient, 2-tailed p-value)
   
    # Repeat for only flood months
    corrData = hw.merge(precipData[precipMetric], left_index=True, right_index=True)
    corrData['month']=corrData.index.month
    corrData = corrData[corrData.month.isin(flMonths)]
    correlResultWindowFL = list(pearsonr(corrData.iloc[:,0], corrData.iloc[:,1])) # (Pearson's correlation coefficient, 2-tailed p-value)
   
    # Record in dataframe
    correlResultData  = pd.DataFrame(data=[site, precipMetric] + correlResult + correlResultWindowHW + correlResultWindowFL, index=correlsResultData.columns).T
    correlsResultData = pd.concat(objs=[correlsResultData, correlResultData])

In [45]:
correlsResultData

Unnamed: 0,HWsite,precipMetric,pearson-r,p,HWMonths_pearson-r,HWMonths_p,FLMonths_pearson-r,FLMonths_p
0,Jacobabad,p95,0.216514,0.0,0.078134,0.000166,0.094183,8e-05
0,Karachi_Jinnah_Airport,p95,0.139241,0.0,-0.052874,0.010895,0.02382,0.319574
0,Lahore,p95,0.196351,0.0,0.034255,0.099186,-0.016783,0.483149
0,Multan,p95,0.225015,0.0,0.118728,0.0,0.085248,0.00036
0,Nawabshah,p95,0.16668,0.0,-0.130267,0.0,-0.031763,0.184384
0,Sibi,p95,0.225637,0.0,0.124725,0.0,0.185498,0.0


In [46]:
# Save the results as a csv
correlsResultData.to_csv(rootPath/'cross-peril/hwfl_correlDailyTempsPrecip.csv', index=False)