# Set Libraries

In [1]:
import pandas as pd
import statsmodels.formula.api as sm
import numpy as np

# Read in the data

In [2]:
Circ_Yearly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllCirculatory_yearly_all.csv')

In [3]:
Circ_Yearly[:5]

Unnamed: 0.1,Unnamed: 0,Disease,year,lsoa11,n,DSR,lcl,ucl,score,rank,decile
0,0,All Circulatory,2009,E01015272,29.0,1379.819824,907.973938,2002.930054,11.43,22492,7
1,1,All Circulatory,2009,E01015273,24.0,2712.837646,1679.499756,4116.15918,33.619999,6665,3
2,2,All Circulatory,2009,E01015274,30.0,2070.800049,1386.778442,2969.841064,20.290001,13991,5
3,3,All Circulatory,2009,E01015275,37.0,3559.793457,2480.718994,4939.573242,19.780001,14447,5
4,4,All Circulatory,2009,E01015276,15.0,1460.698853,791.767883,2447.211182,26.4,10063,4


### Calculate the weights

In [4]:
#calculate the uncertainty for the DSR's
Circ_Yearly['uncertainty'] = Circ_Yearly['ucl'] - Circ_Yearly['lcl']

In [5]:
# Calculate the inverse of the uncertainty
Circ_Yearly['inverse_uncertainty'] = 1 / Circ_Yearly['uncertainty']

In [6]:
# keep the columns needed
Circ_Yearly = Circ_Yearly[['Disease', 'lsoa11', 'n', 'year', 'DSR', 'lcl', 
                           'ucl', 'score', 'rank', 'decile', 'uncertainty', 'inverse_uncertainty']].dropna()

### repeat steps for respiratory disease

In [10]:
Resp_Yearly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllRespiratory_yearly_all.csv')

In [11]:
Resp_Yearly[:5]

Unnamed: 0.1,Unnamed: 0,Disease,year,lsoa11,n,DSR,lcl,ucl,score,rank,decile
0,0,All Respiratory,2009,E01015272,12.0,738.839172,367.731018,1312.295532,11.43,22492,7
1,1,All Respiratory,2009,E01015273,25.0,1500.134521,896.891479,2314.249512,33.619999,6665,3
2,2,All Respiratory,2009,E01015274,20.0,1518.648193,919.455261,2356.864258,20.290001,13991,5
3,3,All Respiratory,2009,E01015275,28.0,2429.869873,1573.156006,3566.883301,19.780001,14447,5
4,4,All Respiratory,2009,E01015276,32.0,2487.559082,1609.623413,3631.377686,26.4,10063,4


In [12]:
Resp_Yearly['uncertainty'] = Resp_Yearly['ucl'] - Resp_Yearly['lcl']

In [13]:
Resp_Yearly['inverse_uncertainty'] = 1 / Resp_Yearly['uncertainty']

In [14]:
Resp_Yearly = Resp_Yearly[['Disease', 'lsoa11', 'n', 'year', 'DSR', 'lcl', 
                           'ucl', 'score', 'rank', 'decile', 'uncertainty', 'inverse_uncertainty']].dropna()

## Read in PM2.5 data 

In [15]:
AP_Yearly = pd.read_csv('D:\Annies_Dissertation\Analysis\Regression\Yearly_PM25_LSOA.csv')

In [16]:
AP_Yearly = AP_Yearly.rename(columns={'LSOA':'lsoa11', 'value':'PM25'})

In [17]:
AP_Yearly[:5]

Unnamed: 0.1,Unnamed: 0,time,lsoa11,PM25,year
0,0,2009-12-31,E01014869,11.05674,2009
1,1,2010-12-31,E01014869,9.198227,2010
2,2,2011-12-31,E01014869,9.872396,2011
3,3,2012-12-31,E01014869,9.418056,2012
4,4,2013-12-31,E01014869,9.172091,2013


In [18]:
AP_Yearly = AP_Yearly[['time', 'lsoa11', 'PM25', 'year']].dropna()

## Merge PM2.5 and hospital admissions data

In [22]:
Circul_AP_Y = pd.merge(Circ_Yearly, AP_Yearly, on=['year','lsoa11'], how='inner')

In [23]:
Circul_AP_Y[:5]

Unnamed: 0,Disease,lsoa11,n,year,DSR,lcl,ucl,score,rank,decile,uncertainty,inverse_uncertainty,time,PM25
0,All Circulatory,E01015272,29.0,2009,1379.819824,907.973938,2002.930054,11.43,22492,7,1094.956116,0.000913,2009-12-31,15.233035
1,All Circulatory,E01015273,24.0,2009,2712.837646,1679.499756,4116.15918,33.619999,6665,3,2436.659424,0.00041,2009-12-31,15.614657
2,All Circulatory,E01015274,30.0,2009,2070.800049,1386.778442,2969.841064,20.290001,13991,5,1583.062622,0.000632,2009-12-31,15.820494
3,All Circulatory,E01015275,37.0,2009,3559.793457,2480.718994,4939.573242,19.780001,14447,5,2458.854248,0.000407,2009-12-31,15.245512
4,All Circulatory,E01015276,15.0,2009,1460.698853,791.767883,2447.211182,26.4,10063,4,1655.443299,0.000604,2009-12-31,15.811707


In [19]:
Respir_AP_Y = pd.merge(Resp_Yearly, AP_Yearly, on=['year','lsoa11'], how='inner')

In [20]:
Respir_AP_Y[:5]

Unnamed: 0,Disease,lsoa11,n,year,DSR,lcl,ucl,score,rank,decile,uncertainty,inverse_uncertainty,time,PM25
0,All Respiratory,E01015272,12.0,2009,738.839172,367.731018,1312.295532,11.43,22492,7,944.564514,0.001059,2009-12-31,15.233035
1,All Respiratory,E01015273,25.0,2009,1500.134521,896.891479,2314.249512,33.619999,6665,3,1417.358033,0.000706,2009-12-31,15.614657
2,All Respiratory,E01015274,20.0,2009,1518.648193,919.455261,2356.864258,20.290001,13991,5,1437.408997,0.000696,2009-12-31,15.820494
3,All Respiratory,E01015275,28.0,2009,2429.869873,1573.156006,3566.883301,19.780001,14447,5,1993.727295,0.000502,2009-12-31,15.245512
4,All Respiratory,E01015276,32.0,2009,2487.559082,1609.623413,3631.377686,26.4,10063,4,2021.754273,0.000495,2009-12-31,15.811707


In [None]:
# Conduct WLS regression on the data, specifying the weights as a numpy array
model = sm.wls("DSR ~ PM25", data=Circul_AP_Y, weights=np.array(Circul_AP_Y.inverse_uncertainty))

In [30]:
Circ_results = model.fit()
Circ_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.007
Model:,WLS,Adj. R-squared:,0.006
Method:,Least Squares,F-statistic:,53.63
Date:,"Mon, 29 Aug 2016",Prob (F-statistic):,2.65e-13
Time:,20:22:33,Log-Likelihood:,-64207.0
No. Observations:,8160,AIC:,128400.0
Df Residuals:,8158,BIC:,128400.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,1376.4600,45.467,30.274,0.000,1287.333 1465.587
PM25,25.3192,3.457,7.323,0.000,18.542 32.096

0,1,2,3
Omnibus:,260.001,Durbin-Watson:,1.512
Prob(Omnibus):,0.0,Jarque-Bera (JB):,422.032
Skew:,0.293,Prob(JB):,2.28e-92
Kurtosis:,3.947,Cond. No.,87.9


In [24]:
model = sm.wls("DSR ~ PM25", data=Respir_AP_Y, weights=np.array(Respir_AP_Y.inverse_uncertainty))

In [25]:
Resp_results = model.fit()
Resp_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.002
Model:,WLS,Adj. R-squared:,0.002
Method:,Least Squares,F-statistic:,17.92
Date:,"Sat, 10 Sep 2016",Prob (F-statistic):,2.33e-05
Time:,22:37:25,Log-Likelihood:,-64870.0
No. Observations:,8184,AIC:,129700.0
Df Residuals:,8182,BIC:,129800.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,1182.1285,47.796,24.733,0.000,1088.436 1275.821
PM25,15.3855,3.635,4.233,0.000,8.260 22.511

0,1,2,3
Omnibus:,564.564,Durbin-Watson:,1.433
Prob(Omnibus):,0.0,Jarque-Bera (JB):,824.487
Skew:,0.578,Prob(JB):,9.22e-180
Kurtosis:,4.04,Cond. No.,87.5
