In [1]:
import pandas as pd
from statsmodels.formula.api import ols

# Nomenclature
- DSR- Age-standardised rate for hospital admissions
- lcl- Lower Confidence limit
- ucl- Upper Confidence limit
- score- Index of Multiple Deprivation Score
- rank- Index of Multiple Deprivation Rank
- decile- Index of Multiple Deprivation Decile

# Merging HES with list of LSOA's that are not within 1.6km of coast
## Monthly

### Circulatory

In [2]:
Circulatory_monthly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllCirculatory_monthly_all.csv')

In [3]:
Circulatory_monthly = Circulatory_monthly[['Disease', 'year', 'month', 'n', 'DSR', 'lcl', 'ucl', 'score', 'rank', 'decile', 'lsoa11']]

In [33]:
Circulatory_monthly[:5]

Unnamed: 0,Disease,year,month,n,DSR,lcl,ucl,score,rank,decile,lsoa11
0,All Circulatory,2009,1,,69.247604,8.247776,250.557617,11.43,22492,7,E01015272
1,All Circulatory,2009,1,,147.841522,3.743035,823.720154,33.619999,6665,3,E01015273
2,All Circulatory,2009,1,,187.446503,38.15966,549.000244,20.290001,13991,5,E01015274
3,All Circulatory,2009,1,,309.000488,59.781487,912.577209,19.780001,14447,5,E01015275
4,All Circulatory,2009,1,,162.742996,15.881656,599.25946,26.4,10063,4,E01015276


In [5]:
Coastal = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\Coastal\Coastal_5.csv')

In [34]:
Coastal[:5]

Unnamed: 0,lsoa11,LSOA11NM
0,E01014869,South Gloucestershire 024B
1,E01014890,South Gloucestershire 024E
2,E01014891,South Gloucestershire 024F
3,E01015301,Bournemouth 001A
4,E01015302,Bournemouth 002A


In [7]:
Coastal = Coastal.rename(columns={'LSOA11CD':'lsoa11'})

In [8]:
Circ_Coastal = Circulatory_monthly.merge(Coastal, on='lsoa11', how = 'inner')

In [35]:
Circ_Coastal[:5]

Unnamed: 0,Disease,year,month,n,DSR,lcl,ucl,score,rank,decile,lsoa11,LSOA11NM
0,All Circulatory,2009,1,,117.585571,13.604174,426.649841,18.93,15111,5,E01015301,Bournemouth 001A
1,All Circulatory,2009,2,,200.200333,48.492283,525.5802,18.93,15111,5,E01015301,Bournemouth 001A
2,All Circulatory,2009,3,,250.110779,43.129189,751.39386,18.93,15111,5,E01015301,Bournemouth 001A
3,All Circulatory,2009,4,,37.453182,0.948231,208.675781,18.93,15111,5,E01015301,Bournemouth 001A
4,All Circulatory,2009,5,,85.490967,2.164445,476.325165,18.93,15111,5,E01015301,Bournemouth 001A


In [10]:
Circ_Coastal.to_csv(r'D:\Annies_Dissertation\Analysis\Regression\Coastal\Circ_Coastal_5.csv')

### Respiratory

In [11]:
Respiratory_monthly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllRespiratory_monthly_all.csv')

In [12]:
Respiratory_monthly = Respiratory_monthly[['Disease', 'year', 'month', 'n', 'DSR', 'lcl', 'ucl', 'score', 'rank', 'decile', 'lsoa11']]

In [36]:
Respiratory_monthly[:5]

Unnamed: 0,Disease,year,month,n,DSR,lcl,ucl,score,rank,decile,lsoa11
0,All Respiratory,2009,1,,78.530869,1.988227,437.54602,20.290001,13991,5,E01015274
1,All Respiratory,2009,1,,198.915909,20.564419,729.030701,19.780001,14447,5,E01015275
2,All Respiratory,2009,1,8.0,519.371643,196.146973,1071.320923,26.4,10063,4,E01015276
3,All Respiratory,2009,1,6.0,607.023254,197.484589,1368.225708,35.400002,5995,2,E01015277
4,All Respiratory,2009,1,5.0,469.725189,136.39975,1128.015015,45.439999,3096,1,E01015279


In [14]:
Resp_Coastal = Respiratory_monthly.merge(Coastal, on='lsoa11', how = 'inner')

In [37]:
Resp_Coastal[:5]

Unnamed: 0,Disease,year,month,n,DSR,lcl,ucl,score,rank,decile,lsoa11,LSOA11NM
0,All Respiratory,2009,1,,141.714142,28.508238,415.88443,20.549999,13802,5,E01015302,Bournemouth 002A
1,All Respiratory,2009,2,,58.520599,1.481616,326.055908,20.549999,13802,5,E01015302,Bournemouth 002A
2,All Respiratory,2009,3,,154.989273,30.814402,455.725006,20.549999,13802,5,E01015302,Bournemouth 002A
3,All Respiratory,2009,4,,96.468681,10.960583,350.624695,20.549999,13802,5,E01015302,Bournemouth 002A
4,All Respiratory,2009,5,,58.520599,1.481616,326.055908,20.549999,13802,5,E01015302,Bournemouth 002A


In [16]:
Resp_Coastal.to_csv(r'D:\Annies_Dissertation\Analysis\Regression\Coastal\Resp_Coastal_5.csv')

## Merging Coastal HES and AP
### Reading in LSOA AP data

In [17]:
AP_Monthly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\Monthly_PM25_LSOA.csv')

In [18]:
AP_Monthly = AP_Monthly.rename(columns={'LSOA':'lsoa11', 'value':'PM25'})

In [19]:
AP_Monthly = AP_Monthly[['time', 'lsoa11', 'PM25', 'month', 'year']].dropna()

### Merging

In [20]:
Circ_AP = pd.merge(Circ_Coastal, AP_Monthly, on=['year', 'month','lsoa11'], how='inner')

In [21]:
Resp_AP = pd.merge(Resp_Coastal, AP_Monthly, on=['year', 'month','lsoa11'], how='inner')

In [22]:
Circ_AP.corr()

Unnamed: 0,year,month,n,DSR,lcl,ucl,score,rank,decile,PM25
year,1.0,-0.136313,-0.03194,-0.04168,-0.020893,-0.050614,-0.004544,0.004493,0.004173,-0.053956
month,-0.136313,1.0,-0.000418,0.006279,0.00279,0.008397,5.6e-05,0.000101,0.000417,-0.196038
n,-0.03194,-0.000418,1.0,0.364992,0.610957,0.169013,-0.02787,0.015435,0.013882,0.012438
DSR,-0.04168,0.006279,0.364992,1.0,0.7302,0.868549,0.105266,-0.105212,-0.105,0.001851
lcl,-0.020893,0.00279,0.610957,0.7302,1.0,0.354405,0.066347,-0.075361,-0.0746,0.003199
ucl,-0.050614,0.008397,0.169013,0.868549,0.354405,1.0,0.107624,-0.099629,-0.099881,0.001053
score,-0.004544,5.6e-05,-0.02787,0.105266,0.066347,0.107624,1.0,-0.967557,-0.962575,0.046325
rank,0.004493,0.000101,0.015435,-0.105212,-0.075361,-0.099629,-0.967557,1.0,0.992416,-0.03743
decile,0.004173,0.000417,0.013882,-0.105,-0.0746,-0.099881,-0.962575,0.992416,1.0,-0.035801
PM25,-0.053956,-0.196038,0.012438,0.001851,0.003199,0.001053,0.046325,-0.03743,-0.035801,1.0


In [23]:
Resp_AP.corr()

Unnamed: 0,year,month,n,DSR,lcl,ucl,score,rank,decile,PM25
year,1.0,-0.140934,0.008978,-0.002959,0.020757,-0.029983,-0.005205,0.004941,0.005144,-0.043221
month,-0.140934,1.0,0.076622,0.008164,0.021764,0.002734,-0.000442,0.000583,0.000856,-0.21151
n,0.008978,0.076622,1.0,0.407454,0.622319,0.2168,0.10138,-0.100153,-0.09989,-0.065204
DSR,-0.002959,0.008164,0.407454,1.0,0.667381,0.88874,0.174901,-0.163548,-0.161657,-0.05129
lcl,0.020757,0.021764,0.622319,0.667381,1.0,0.326997,0.190136,-0.180784,-0.178333,-0.063784
ucl,-0.029983,0.002734,0.2168,0.88874,0.326997,1.0,0.112422,-0.100767,-0.100577,-0.022357
score,-0.005205,-0.000442,0.10138,0.174901,0.190136,0.112422,1.0,-0.966882,-0.962399,0.055337
rank,0.004941,0.000583,-0.100153,-0.163548,-0.180784,-0.100767,-0.966882,1.0,0.992785,-0.04598
decile,0.005144,0.000856,-0.09989,-0.161657,-0.178333,-0.100577,-0.962399,0.992785,1.0,-0.044322
PM25,-0.043221,-0.21151,-0.065204,-0.05129,-0.063784,-0.022357,0.055337,-0.04598,-0.044322,1.0


In [24]:
model = ols("DSR ~  PM25", data=Circ_AP)
Circulatory_results = model.fit()
Circulatory_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.0
Model:,OLS,Adj. R-squared:,-0.0
Method:,Least Squares,F-statistic:,0.1557
Date:,"Sat, 10 Sep 2016",Prob (F-statistic):,0.693
Time:,21:52:39,Log-Likelihood:,-290910.0
No. Observations:,45422,AIC:,581800.0
Df Residuals:,45420,BIC:,581800.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,182.9921,1.931,94.774,0.000,179.208 186.776
PM25,0.0592,0.150,0.395,0.693,-0.235 0.353

0,1,2,3
Omnibus:,68874.487,Durbin-Watson:,1.718
Prob(Omnibus):,0.0,Jarque-Bera (JB):,183774921.374
Skew:,8.771,Prob(JB):,0.0
Kurtosis:,314.119,Cond. No.,36.4


In [25]:
model = ols("DSR ~  PM25", data=Resp_AP)
Respiratory_results = model.fit()
Respiratory_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.003
Model:,OLS,Adj. R-squared:,0.003
Method:,Least Squares,F-statistic:,113.8
Date:,"Sat, 10 Sep 2016",Prob (F-statistic):,1.5700000000000001e-26
Time:,21:52:43,Log-Likelihood:,-272280.0
No. Observations:,43140,AIC:,544600.0
Df Residuals:,43138,BIC:,544600.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,171.4410,1.788,95.910,0.000,167.937 174.945
PM25,-1.4898,0.140,-10.667,0.000,-1.764 -1.216

0,1,2,3
Omnibus:,67522.404,Durbin-Watson:,1.759
Prob(Omnibus):,0.0,Jarque-Bera (JB):,287381610.553
Skew:,9.22,Prob(JB):,0.0
Kurtosis:,402.423,Cond. No.,35.8


## Yearly

In [26]:
Circ_Yearly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllCirculatory_yearly_all.csv')

In [38]:
Circ_Yearly[:5]

Unnamed: 0.1,Unnamed: 0,Disease,year,lsoa11,n,DSR,lcl,ucl,score,rank,decile
0,0,All Circulatory,2009,E01015272,29.0,1379.819824,907.973938,2002.930054,11.43,22492,7
1,1,All Circulatory,2009,E01015273,24.0,2712.837646,1679.499756,4116.15918,33.619999,6665,3
2,2,All Circulatory,2009,E01015274,30.0,2070.800049,1386.778442,2969.841064,20.290001,13991,5
3,3,All Circulatory,2009,E01015275,37.0,3559.793457,2480.718994,4939.573242,19.780001,14447,5
4,4,All Circulatory,2009,E01015276,15.0,1460.698853,791.767883,2447.211182,26.4,10063,4


In [28]:
Circ_Coastal_Y = Circ_Yearly.merge(Coastal, on='lsoa11', how = 'inner')

In [39]:
Circ_Coastal_Y[:5]

Unnamed: 0.1,Unnamed: 0,Disease,year,lsoa11,n,DSR,lcl,ucl,score,rank,decile,LSOA11NM
0,27,All Circulatory,2009,E01015301,33.0,1912.189087,1283.139404,2728.40332,18.93,15111,5,Bournemouth 001A
1,27,All Circulatory,2010,E01015301,33.0,1932.944336,1300.952148,2752.976563,18.93,15111,5,Bournemouth 001A
2,27,All Circulatory,2011,E01015301,37.0,1981.868774,1385.45874,2744.480225,18.93,15111,5,Bournemouth 001A
3,27,All Circulatory,2012,E01015301,37.0,1923.57312,1335.414185,2675.634277,18.93,15111,5,Bournemouth 001A
4,27,All Circulatory,2013,E01015301,35.0,1822.383667,1261.149414,2545.063232,18.93,15111,5,Bournemouth 001A


In [40]:
Resp_Yearly = pd.read_csv(r'D:\Annies_Dissertation\Analysis\Regression\HES\AllRespiratory_yearly_all.csv')

In [41]:
Resp_Coastal_Y = Resp_Yearly.merge(Coastal, on='lsoa11', how = 'inner')

In [43]:
Resp_Coastal_Y[:5]

Unnamed: 0.1,Unnamed: 0,Disease,year,lsoa11,n,DSR,lcl,ucl,score,rank,decile,LSOA11NM
0,27,All Respiratory,2009,E01015301,16.0,930.146851,522.468079,1523.88623,18.93,15111,5,Bournemouth 001A
1,27,All Respiratory,2010,E01015301,15.0,816.386841,446.385162,1362.050415,18.93,15111,5,Bournemouth 001A
2,27,All Respiratory,2011,E01015301,19.0,1122.626465,656.222351,1780.884399,18.93,15111,5,Bournemouth 001A
3,27,All Respiratory,2012,E01015301,27.0,1651.603027,1052.388428,2451.063965,18.93,15111,5,Bournemouth 001A
4,27,All Respiratory,2013,E01015301,27.0,1634.310913,1047.59082,2417.102051,18.93,15111,5,Bournemouth 001A


In [44]:
AP_Yearly = pd.read_csv('D:\Annies_Dissertation\Analysis\Regression\Yearly_PM25_LSOA.csv')

In [45]:
AP_Yearly = AP_Yearly.rename(columns={'LSOA':'lsoa11', 'value':'PM25'})

In [46]:
AP_Yearly = AP_Yearly[['time', 'lsoa11', 'PM25', 'year']].dropna()

In [47]:
Circul_AP_Y = pd.merge(Circ_Coastal_Y, AP_Yearly, on=['year','lsoa11'], how='inner')

In [48]:
Respir_AP_Y = pd.merge(Resp_Coastal_Y, AP_Yearly, on=['year','lsoa11'], how='inner')

In [49]:
Circul_AP_Y.corr()

Unnamed: 0.1,Unnamed: 0,year,n,DSR,lcl,ucl,score,rank,decile,PM25
Unnamed: 0,1.0,1.075592e-05,-0.286843,-0.200932,-0.278363,-0.090194,-0.1330913,0.1236241,0.1228156,-0.07519
year,1.1e-05,1.0,-0.005728,-0.073505,-0.049062,-0.081358,9.616131999999998e-19,-7.645217e-20,-6.218046e-20,-0.119752
n,-0.286843,-0.005728298,1.0,0.503244,0.742008,0.213912,0.0266834,-0.06035588,-0.05810518,0.029801
DSR,-0.200932,-0.07350485,0.503244,1.0,0.86571,0.89816,0.2570406,-0.2705507,-0.2687671,0.061301
lcl,-0.278363,-0.04906229,0.742008,0.86571,1.0,0.563348,0.2432008,-0.2654969,-0.2626985,0.071436
ucl,-0.090194,-0.0813581,0.213912,0.89816,0.563348,1.0,0.2114543,-0.2148219,-0.2141354,0.040236
score,-0.133091,9.616131999999998e-19,0.026683,0.257041,0.243201,0.211454,1.0,-0.9679992,-0.9631367,0.139782
rank,0.123624,-7.645217e-20,-0.060356,-0.270551,-0.265497,-0.214822,-0.9679992,1.0,0.9925018,-0.113146
decile,0.122816,-6.218046e-20,-0.058105,-0.268767,-0.262698,-0.214135,-0.9631367,0.9925018,1.0,-0.109506
PM25,-0.07519,-0.1197516,0.029801,0.061301,0.071436,0.040236,0.1397821,-0.1131455,-0.1095062,1.0


In [50]:
Respir_AP_Y.corr()

Unnamed: 0.1,Unnamed: 0,year,n,DSR,lcl,ucl,score,rank,decile,PM25
Unnamed: 0,1.0,-9.406e-20,-0.104451,-0.079187,-0.114604,-0.040214,-0.1330909,0.1236237,0.1228155,-0.075205
year,-9.406e-20,1.0,0.092986,0.012628,0.053465,-0.022358,9.616131999999998e-19,-7.645217e-20,-6.218046e-20,-0.119752
n,-0.1044513,0.09298613,1.0,0.726193,0.852413,0.51511,0.4235726,-0.4193964,-0.4095976,0.066513
DSR,-0.0791867,0.01262783,0.726193,1.0,0.891997,0.938261,0.4652187,-0.4421815,-0.4352413,0.104269
lcl,-0.1146038,0.05346467,0.852413,0.891997,1.0,0.682293,0.5049498,-0.4830308,-0.4753163,0.111644
ucl,-0.04021448,-0.02235814,0.51511,0.938261,0.682293,1.0,0.3686941,-0.346838,-0.3417827,0.083641
score,-0.1330909,9.616131999999998e-19,0.423573,0.465219,0.50495,0.368694,1.0,-0.9679992,-0.9631367,0.139782
rank,0.1236237,-7.645217e-20,-0.419396,-0.442182,-0.483031,-0.346838,-0.9679992,1.0,0.9925018,-0.113146
decile,0.1228155,-6.218046e-20,-0.409598,-0.435241,-0.475316,-0.341783,-0.9631367,0.9925018,1.0,-0.109506
PM25,-0.07520456,-0.1197516,0.066513,0.104269,0.111644,0.083641,0.1397821,-0.1131455,-0.1095062,1.0


In [50]:
model = ols("DSR ~  PM25", data=Circul_AP_Y)
Circulatory_results = model.fit()
Circulatory_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.004
Model:,OLS,Adj. R-squared:,0.004
Method:,Least Squares,F-statistic:,15.85
Date:,"Fri, 26 Aug 2016",Prob (F-statistic):,6.96e-05
Time:,16:34:47,Log-Likelihood:,-33515.0
No. Observations:,4205,AIC:,67030.0
Df Residuals:,4203,BIC:,67050.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,1490.9333,79.847,18.672,0.000,1334.391 1647.476
PM25,25.7165,6.459,3.982,0.000,13.054 38.379

0,1,2,3
Omnibus:,1926.419,Durbin-Watson:,1.389
Prob(Omnibus):,0.0,Jarque-Bera (JB):,24812.68
Skew:,1.841,Prob(JB):,0.0
Kurtosis:,14.316,Cond. No.,92.0


In [51]:
model = ols("DSR ~  PM25", data=Respir_AP_Y)
Respiratory_results = model.fit()
Respiratory_results.summary()

0,1,2,3
Dep. Variable:,DSR,R-squared:,0.011
Model:,OLS,Adj. R-squared:,0.011
Method:,Least Squares,F-statistic:,46.2
Date:,"Fri, 26 Aug 2016",Prob (F-statistic):,1.22e-11
Time:,16:34:51,Log-Likelihood:,-33407.0
No. Observations:,4205,AIC:,66820.0
Df Residuals:,4203,BIC:,66830.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
Intercept,901.1561,77.817,11.580,0.000,748.593 1053.719
PM25,42.7831,6.295,6.797,0.000,30.442 55.124

0,1,2,3
Omnibus:,1831.15,Durbin-Watson:,1.185
Prob(Omnibus):,0.0,Jarque-Bera (JB):,15496.666
Skew:,1.866,Prob(JB):,0.0
Kurtosis:,11.632,Cond. No.,92.0
