# start_pakistan_correlations
## DR_calcTriggers.ipynb
This script calculates the number of annual triggers using the data for Pakistan drought, specifically the 20%ile threshold foer each district and the total Province 25% population threshold.

In [2]:
from pathlib import Path
import os
import sys
import pandas as pd
import numpy as np
from datetime import datetime
from datetime import date, timedelta
import matplotlib.pyplot as plt
from scipy import signal
from scipy import stats

In [3]:
# Set the root path
rootPath = Path('C:/Users/alexa/Documents/02_work/02_start/02_deliv/05_pk_correlation/dr/data/')

## Load the population data and the Province population thresholds

In [4]:
provinceThreshold=pd.read_csv(rootPath/'prep/ProvinceThreshold.csv')

In [5]:
provinceThreshold

Unnamed: 0,Province,Rural_population,Province_threshold
0,Balochistan,8740869,2185217.25
1,Punjab,70618306,17654576.5
2,Sindh,20195605,5048901.25


In [6]:
popData=pd.read_csv(rootPath/'emily/demographic_updated.csv')
popData=popData[['District_id','Rural_population']]

In [7]:
popData

Unnamed: 0,District_id,Rural_population
0,601.0,1393550
1,701.0,87584
2,801.0,1414138
3,602.0,2391743
4,603.0,2750310
...,...,...
84,823.0,829785
85,636.0,1658756
86,729.0,153877
87,730.0,264190


## Calculate triggers

In [8]:
provinces = list(provinceThreshold.index.values)
variables=['precip','yield','vhi']

In [9]:
outPath=rootPath/'triggers'
outPath.mkdir(exist_ok=True)

### Balochistan - precip

In [10]:
precipYears = pd.read_csv(rootPath/'prep/precipThreshold.csv')
precipYears=precipYears.set_index('District_id')
precipThresh=list(precipYears.pctile)
precipYears=precipYears.drop(columns=['pctile'])

In [11]:
triggerDF=pd.DataFrame(data=[],index=precipYears.columns)
for i, distID in enumerate(precipYears.index):
    thresh=precipThresh[i]
    pop=popData.Rural_population[popData.District_id==distID].values[0]
    varYears=precipYears[precipYears.index==distID]
    interDF=(varYears<=thresh).T
    interDF[interDF[distID]==True]=pop
    interDF[interDF[distID]==False]=0
    triggerDF = triggerDF.merge(interDF, left_index=True, right_index=True, how='outer')

In [12]:
provThresh = provinceThreshold[provinceThreshold.Province=='Balochistan'].Province_threshold[0]

In [13]:
triggerDF['total']=triggerDF.sum(axis=1)
triggerDF['trigger']=0
triggerDF['trigger'][triggerDF.total>=provThresh]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  triggerDF['trigger'][triggerDF.total>=provThresh]=1


In [98]:
triggerDF.to_csv(outPath/'precipTriggers.csv', index=True)

### Punjab - yield

In [15]:
yieldYears = pd.read_csv(rootPath/'prep/yieldThresholdDetrended.csv')
yieldYears=yieldYears.set_index('District_id')
yieldThresh=list(yieldYears.pctile)
yieldYears=yieldYears.drop(columns=['pctile'])

In [16]:
triggerDF=pd.DataFrame(data=[],index=yieldYears.columns)
for i, distID in enumerate(yieldYears.index):
    thresh=yieldThresh[i]
    pop=popData.Rural_population[popData.District_id==distID].values[0]
    varYears=yieldYears[yieldYears.index==distID]
    interDF=(varYears<=thresh).T
    interDF[interDF[distID]==True]=pop
    interDF[interDF[distID]==False]=0
    triggerDF = triggerDF.merge(interDF, left_index=True, right_index=True, how='outer')

In [17]:
provThresh = provinceThreshold[provinceThreshold.Province=='Punjab'].Province_threshold[1]

In [18]:
triggerDF['total']=triggerDF.sum(axis=1)
triggerDF['trigger']=0
triggerDF['trigger'][triggerDF.total>=provThresh]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  triggerDF['trigger'][triggerDF.total>=provThresh]=1


In [19]:
triggerDF.to_csv(outPath/'yieldDetrendedTriggers.csv', index=True)

In [20]:
triggerDF

Unnamed: 0,601,602,603,604,605,606,607,608,609,610,...,629,630,631,632,633,634,635,636,total,trigger
1983.0,0,0,0,0,0,0,2612543,0,0,0,...,0,0,0,0,0,0,0,0,2612543.0,0
1984.0,0,0,2750310,0,0,0,2612543,0,0,0,...,2258636,0,1110321,2324346,0,2357910,0,0,34875091.0,1
1985.0,0,0,0,1928714,0,0,2612543,0,0,868448,...,2258636,0,0,0,0,2357910,0,0,13481915.0,0
1986.0,0,0,0,0,0,947202,0,0,0,0,...,0,0,0,0,0,0,0,0,947202.0,0
1987.0,0,0,0,1928714,0,947202,0,754106,0,868448,...,2258636,0,1110321,2324346,1434910,2357910,991747,1658756,32957192.0,1
1988.0,1393550,0,0,0,1212042,0,0,754106,0,0,...,0,2360823,0,0,0,0,0,0,13462087.0,0
1989.0,0,0,0,0,0,0,0,0,0,0,...,0,2360823,0,0,0,0,0,0,4890940.0,0
1990.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1658756,1658756.0,0
1991.0,0,0,2750310,0,0,0,0,754106,0,0,...,0,0,1110321,0,1434910,0,991747,1658756,21112966.0,1
1992.0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,7524310.0,0


### Sindh - VHI

In [21]:
vhiYears = pd.read_csv(rootPath/'prep/vhiThreshold.csv')
vhiYears=vhiYears.set_index('District_id')
vhiThresh=list(vhiYears.pctile)
vhiYears=vhiYears.drop(columns=['pctile'])

In [22]:
triggerDF=pd.DataFrame(data=[],index=vhiYears.columns)
for i, distID in enumerate(vhiYears.index):
    thresh=vhiThresh[i]
    pop=popData.Rural_population[popData.District_id==distID].values[0]
    varYears=vhiYears[vhiYears.index==distID]
    interDF=(varYears<=thresh).T
    interDF[interDF[distID]==True]=pop
    interDF[interDF[distID]==False]=0
    triggerDF = triggerDF.merge(interDF, left_index=True, right_index=True, how='outer')

In [23]:
provThresh = provinceThreshold[provinceThreshold.Province=='Sindh'].Province_threshold[2]

In [24]:
triggerDF['total']=triggerDF.sum(axis=1)
triggerDF['trigger']=0
triggerDF['trigger'][triggerDF.total>=provThresh]=1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  triggerDF['trigger'][triggerDF.total>=provThresh]=1


In [115]:
triggerDF.to_csv(outPath/'vhiTriggers.csv', index=True)

In [25]:
triggerDF

Unnamed: 0,801,802,803,804,805,806,807,808,809,810,...,816,817,818,819,820,821,822,823,total,trigger
2002,1414138,1167097,1243701,366708,709170,558955,0,835556,1628484,822754,...,1123510,928232,767788,575094,535178,0,803759,829785,18678015.0,1
2003,1414138,1167097,1243701,366708,0,558955,0,835556,1628484,0,...,1123510,0,767788,575094,0,1517590,803759,829785,17200271.0,1
2004,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,0
2005,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,0
2006,1414138,1167097,1243701,0,709170,0,0,835556,1628484,822754,...,0,928232,767788,575094,535178,1517590,803759,829785,17066308.0,1
2007,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,0
2008,0,0,0,0,709170,0,0,0,0,822754,...,0,928232,0,0,0,0,0,0,2460156.0,0
2009,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1517590,0,829785,3816027.0,0
2010,0,1167097,1243701,0,709170,0,0,835556,1628484,822754,...,1123510,928232,767788,0,0,0,0,0,9226292.0,1
2011,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0.0,0


## Combine triggers for all Provinces

In [26]:
precipTriggers = pd.read_csv(rootPath/'triggers/precipTriggers.csv')
precipTriggers=pd.DataFrame(precipTriggers.set_index('Unnamed: 0')['trigger'])
precipTriggers.columns=['Balochistan_Precip']

In [27]:
yieldTriggers = pd.read_csv(rootPath/'triggers/yieldDetrendedTriggers.csv')
yieldTriggers=pd.DataFrame(yieldTriggers.set_index('Unnamed: 0')['trigger'])
yieldTriggers.columns=['Punjab_Yield']

In [28]:
vhiTriggers = pd.read_csv(rootPath/'triggers/vhiTriggers.csv')
vhiTriggers=pd.DataFrame(vhiTriggers.set_index('Unnamed: 0')['trigger'])
vhiTriggers.columns=['Sindh_VHI']

In [29]:
allTriggers = precipTriggers.merge(yieldTriggers, left_index=True, right_index=True, how='outer')
allTriggers = allTriggers.merge(vhiTriggers, left_index=True, right_index=True, how='outer')

In [30]:
allTriggers

Unnamed: 0_level_0,Balochistan_Precip,Punjab_Yield,Sindh_VHI
Unnamed: 0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1983.0,,0.0,
1984.0,,1.0,
1985.0,,0.0,
1986.0,,0.0,
1987.0,,1.0,
1988.0,,0.0,
1989.0,,0.0,
1990.0,,0.0,
1991.0,,1.0,
1992.0,,0.0,


In [31]:
allTriggers.to_csv(outPath/'allDetrendedTriggers.csv', index=True)