In [None]:
%matplotlib inline

import re

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn; seaborn.set()
from sklearn.decomposition import PCA

import plotly.express as px  # (version 4.7.0 or higher)
import plotly.graph_objects as go
#from dash import Dash, dcc, html, Input, Output


## Read input data

In [None]:
pd.set_option('display.max_columns', None)  
def getHour(timestr):
    return timestr.split(':')[0]
def getDate(datestr):
    return datestr.split('/')[1]
def correctTimestamp(timestamp):
    if type(timestamp) == 'str':
        return timestamp.replace('T', '')
    else:
        return timestamp

def cleanDF(dataframe):
    if 'accident_index' in dataframe.columns:
        dataframe['accident_index'] = dataframe['accident_index'].apply(correctTimestamp)
    if 'time' in dataframe.columns:
        dataframe['time'] = dataframe['time'].apply(getHour)
    if 'date' in dataframe.columns:
        dataframe['date'] = dataframe['date'].apply(getDate)

In [None]:
casualty_2020_df=pd.read_csv(
    "data/dft-road-casualty-statistics-casualty-2020.csv"
)
cleanDF(casualty_2020_df)
casualty_2020_df

In [None]:
vehicle_2020_df=pd.read_csv(
    "data/dft-road-casualty-statistics-vehicle-2020.csv"
)
cleanDF(vehicle_2020_df)
vehicle_2020_df

In [None]:
accident_2020_df=pd.read_csv(
    "data/dft-road-casualty-statistics-accident-2020.csv"
)
cleanDF(accident_2020_df)
accident_2020_df

#### We have 91199 accidents but 167375 vechicles and 115584 casualties in the data
#### Need to determine the primary vechicle in the crash data

In [None]:
vehicle_2020_df[vehicle_2020_df['vehicle_reference'] ==1]

We have 91152 accidents where there is a primary vechicle
That leaves 47 accidents where there is not a primary vechicle

In [None]:
vehicle_2020_df[vehicle_2020_df['vehicle_reference'] !=1]

In [None]:
accident_2020_df['latitude'].mean()

In [None]:
accident_2020_df['longitude'].mean()

In [None]:
accident_2020_df[accident_2020_df['accident_severity']==1]

In [None]:
accident_2020_df['accident_severity']

In [None]:
from accidentdashboard import utils, accident_data_lookup
accident_data =  accident_2020_df[ accident_2020_df['accident_index']=='2020340S03832'].copy()


for i in accident_data:
            if i in accident_data_lookup.accident_data_lookup.keys():
                lookup = accident_data_lookup.accident_data_lookup[i]
                value = accident_data[i].values[0]
                if value in lookup:
                    accident_data[i] = lookup[value]
            



print(  accident_data.to_dict('records') )

In [None]:
token = open(".mapbox_token").read()

crash_colours = ['yellow','orange', 'red' ]

crash_categories = accident_2020_df.accident_severity.unique()


crash_dict = dict(zip(crash_categories,crash_colours)) #set up band to color substitution dict
accident_2020_df['color'] = accident_2020_df['accident_severity'].replace(to_replace=crash_colours)

accident_2020_df['accident_severity'] = accident_2020_df['accident_severity'].astype(str)

fig = px.scatter_mapbox(accident_2020_df, lat="latitude", lon="longitude", hover_name="accident_severity", 
                        hover_data=["speed_limit", "number_of_vehicles"],
                        custom_data=['accident_index'],
                        color="accident_severity", 
                        color_discrete_sequence=crash_colours,
                        zoom=4, height=800, width=600)
    

fig.update_layout(mapbox_style="open-street-map", mapbox_accesstoken=token)
fig.update_mapboxes(center_lat=55, center_lon=-3.5)
fig.update_layout(margin={"r":1,"t":1,"l":1,"b":1})
fig.update_layout(height=600)
fig.show()


In [None]:
full_df = full_df.merge(casualty_2020_df, on='accident_index')


In [None]:
full_df

In [None]:
no_loc_data = full_df.drop(['accident_reference_x', 'location_easting_osgr', 'location_northing_osgr', 'longitude', 'latitude', 'local_authority_ons_district','local_authority_highway', 'lsoa_of_accident_location', 'accident_reference_y', 'accident_reference', 'generic_make_model' ], axis = 1)
no_loc_data 

In [None]:
no_loc_data['time'] = no_loc_data['time'].apply(getHour)

In [None]:
no_loc_data['date'] = no_loc_data['date'].apply(getMonth)

In [None]:
no_loc_data  

In [None]:
no_loc_data = no_loc_data.set_index('accident_index')

In [None]:
no_loc_data = no_loc_data.drop([], axis=1)

In [None]:
no_loc_data.head()

In [None]:
clean_pca = PCA(n_components=50)
principalComponents = clean_pca.fit_transform(no_loc_data)
principalDf = pd.DataFrame(data = principalComponents
             )
principalDf

In [None]:
pd.Series(clean_pca.explained_variance_ratio_).plot(logy=True)

In [None]:
factors_2020 = accident_2020_df.merge(vehicle_2020_df, on='accident_index')

In [None]:
factors_2020 = factors_2020.drop(['accident_reference_x', 'location_easting_osgr', 'location_northing_osgr', 'longitude', 'latitude', 'local_authority_ons_district','local_authority_highway', 'lsoa_of_accident_location', 'accident_reference_y',  'generic_make_model' ], axis = 1)

In [None]:
factors_2020

In [None]:
factors_2020['time'] = factors_2020['time'].apply(getHour)
factors_2020['date'] = factors_2020['date'].apply(getMonth)

In [None]:
pd.set_option('display.max_columns', None)  
factors_2020.head()

In [None]:
factors_2020[factors_2020['accident_index'].str.contains("T")==True]

In [None]:
factors_2020.head()

In [None]:
clean_pca = PCA(n_components=52)
principalComponents = clean_pca.fit_transform(factors_2020)
principalDf = pd.DataFrame(data = principalComponents
             )
principalDf

In [None]:
pd.Series(clean_pca.explained_variance_ratio_).plot(logy=True)

In [None]:
sns.heatmap(numerical_only_df.corr(), annot=True, cmap="coolwarm") 

In [1]:
local_authority_district={

        1: 'Westminster',
        2: 'Camden',
        3: 'Islington',
        4: 'Hackney',
        5: 'Tower Hamlets',
        6: 'Greenwich',
        7: 'Lewisham',
        8: 'Southwark',
        9: 'Lambeth',
        10: 'Wandsworth',
        11: 'Hammersmith and Fulham',
        12: 'Kensington and Chelsea',
        13: 'Waltham Forest',
        14: 'Redbridge',
        15: 'Havering',
        16: 'Barking and Dagenham',
        17: 'Newham',
        18: 'Bexley',
        19: 'Bromley',
        20: 'Croydon',
        21: 'Sutton',
        22: 'Merton',
        23: 'Kingston upon Thames',
        24: 'Richmond upon Thames',
        25: 'Hounslow',
        26: 'Hillingdon',
        27: 'Ealing',
        28: 'Brent',
        29: 'Harrow',
        30: 'Barnet',
        31: 'Haringey',
        32: 'Enfield',
        33: 'Hertsmere',
        38: 'Epsom and Ewell',
        40: 'Spelthorne',
        57: 'London Airport (Heathrow)',
        60: 'Allerdale',
        61: 'Barrow-in-Furness',
        62: 'Carlisle',
        63: 'Copeland',
        64: 'Eden',
        65: 'South Lakeland',
        70: 'Blackburn with Darwen',
        71: 'Blackpool',
        72: 'Burnley',
        73: 'Chorley',
        74: 'Fylde',
        75: 'Hyndburn',
        76: 'Lancaster',
        77: 'Pendle',
        79: 'Preston',
        80: 'Ribble Valley',
        82: 'Rossendale',
        83: 'South Ribble',
        84: 'West Lancashire',
        85: 'Wyre',
        90: 'Knowsley',
        91: 'Liverpool',
        92: 'St. Helens',
        93: 'Sefton',
        95: 'Wirral',
        100: 'Bolton',
        101: 'Bury',
        102: 'Manchester',
        104: 'Oldham',
        106: 'Rochdale',
        107: 'Salford',
        109: 'Stockport',
        110: 'Tameside',
        112: 'Trafford',
        114: 'Wigan',
        120: 'Chester',
        121: 'Congleton',
        122: 'Crewe and Nantwich',
        123: 'Ellesmere Port and Neston',
        124: 'Halton',
        126: 'Macclesfield',
        127: 'Vale Royal',
        128: 'Warrington',
        129: 'Cheshire East',
        130: 'Cheshire West and Chester',
        139: 'Northumberland',
        140: 'Alnwick',
        141: 'Berwick-upon-Tweed',
        142: 'Blyth Valley',
        143: 'Castle Morpeth',
        144: 'Tynedale',
        145: 'Wansbeck',
        146: 'Gateshead',
        147: 'Newcastle upon Tyne',
        148: 'North Tyneside',
        149: 'South Tyneside',
        150: 'Sunderland',
        160: 'Chester-le-Street',
        161: 'Darlington',
        162: 'Derwentside',
        163: 'Durham',
        164: 'Easington',
        165: 'Sedgefield',
        166: 'Teesdale',
        168: 'Wear Valley',
        169: 'County Durham',
        180: 'Craven',
        181: 'Hambleton',
        182: 'Harrogate',
        184: 'Richmondshire',
        185: 'Ryedale',
        186: 'Scarborough',
        187: 'Selby',
        189: 'York',
        200: 'Bradford',
        202: 'Calderdale',
        203: 'Kirklees',
        204: 'Leeds',
        206: 'Wakefield',
        210: 'Barnsley',
        211: 'Doncaster',
        213: 'Rotherham',
        215: 'Sheffield',
        228: 'Kingston upon Hull, City of',
        231: 'East Riding of Yorkshire',
        232: 'North Lincolnshire',
        233: 'North East Lincolnshire',
        240: 'Hartlepool',
        241: 'Redcar and Cleveland',
        243: 'Middlesbrough',
        245: 'Stockton-on-Tees',
        250: 'Cannock Chase',
        251: 'East Staffordshire',
        252: 'Lichfield',
        253: 'Newcastle-under-Lyme',
        254: 'South Staffordshire',
        255: 'Stafford',
        256: 'Staffordshire Moorlands',
        257: 'Stoke-on-Trent',
        258: 'Tamworth',
        270: 'Bromsgrove',
        273: 'Malvern Hills',
        274: 'Redditch',
        276: 'Worcester',
        277: 'Wychavon',
        278: 'Wyre Forest',
        279: 'Bridgnorth',
        280: 'North Shropshire',
        281: 'Oswestry',
        282: 'Shrewsbury and Atcham',
        283: 'South Shropshire',
        284: 'Telford and Wrekin',
        285: 'Herefordshire, County of ',
        286: 'Shropshire',
        290: 'North Warwickshire',
        291: 'Nuneaton and Bedworth',
        292: 'Rugby ',
        293: 'Stratford-upon-Avon',
        294: 'Warwick',
        300: 'Birmingham',
        302: 'Coventry',
        303: 'Dudley',
        305: 'Sandwell',
        306: 'Solihull',
        307: 'Walsall',
        309: 'Wolverhampton',
        320: 'Amber Valley',
        321: 'Bolsover',
        322: 'Chesterfield',
        323: 'Derby',
        324: 'Erewash',
        325: 'High Peak',
        327: 'North East Derbyshire',
        328: 'South Derbyshire',
        329: 'Derbyshire Dales',
        340: 'Ashfield',
        341: 'Bassetlaw',
        342: 'Broxtowe',
        343: 'Gedling',
        344: 'Mansfield',
        345: 'Newark and Sherwood',
        346: 'Nottingham',
        347: 'Rushcliffe',
        350: 'Boston',
        351: 'East Lindsey',
        352: 'Lincoln',
        353: 'North Kesteven',
        354: 'South Holland',
        355: 'South Kesteven',
        356: 'West Lindsey',
        360: 'Blaby',
        361: 'Hinckley and Bosworth',
        362: 'Charnwood',
        363: 'Harborough',
        364: 'Leicester',
        365: 'Melton',
        366: 'North West Leicestershire',
        367: 'Oadby and Wigston',
        368: 'Rutland',
        380: 'Corby',
        381: 'Daventry',
        382: 'East Northamptonshire',
        383: 'Kettering',
        384: 'Northampton',
        385: 'South Northamptonshire',
        386: 'Wellingborough',
        390: 'Cambridge',
        391: 'East Cambridgeshire',
        392: 'Fenland',
        393: 'Huntingdonshire',
        394: 'Peterborough',
        395: 'South Cambridgeshire',
        400: 'Breckland',
        401: 'Broadland',
        402: 'Great Yarmouth',
        404: 'Norwich',
        405: 'North Norfolk',
        406: 'South Norfolk',
        407: 'Kings Lynn and West Norfolk',
        410: 'Babergh',
        411: 'Forest Heath',
        412: 'Ipswich',
        413: 'Mid Suffolk',
        414: 'St. Edmundsbury',
        415: 'Suffolk Coastal',
        416: 'Waveney',
        420: 'Bedford',
        421: 'Luton',
        422: 'Mid Bedfordshire',
        423: 'South Bedfordshire',
        424: 'Central Bedfordshire',
        430: 'Broxbourne',
        431: 'Dacorum',
        432: 'East Hertfordshire',
        433: 'North Hertfordshire',
        434: 'St. Albans',
        435: 'Stevenage',
        436: 'Three Rivers',
        437: 'Watford',
        438: 'Welwyn Hatfield',
        450: 'Basildon',
        451: 'Braintree',
        452: 'Brentwood',
        453: 'Castle Point',
        454: 'Chelmsford',
        455: 'Colchester',
        456: 'Epping Forest',
        457: 'Harlow',
        458: 'Maldon',
        459: 'Rochford',
        460: 'Southend-on-Sea',
        461: 'Tendring',
        462: 'Thurrock',
        463: 'Uttlesford',
        470: 'Bracknell Forest',
        471: 'West Berkshire',
        472: 'Reading',
        473: 'Slough',
        474: 'Windsor and Maidenhead',
        475: 'Wokingham',
        476: 'Aylesbury Vale',
        477: 'South Bucks',
        478: 'Chiltern',
        479: 'Milton Keynes',
        480: 'Wycombe',
        481: 'Cherwell',
        482: 'Oxford',
        483: 'Vale of White Horse',
        484: 'South Oxfordshire',
        485: 'West Oxfordshire',
        490: 'Basingstoke and Deane',
        491: 'Eastleigh',
        492: 'Fareham',
        493: 'Gosport',
        494: 'Hart',
        495: 'Havant',
        496: 'New Forest',
        497: 'East Hampshire',
        498: 'Portsmouth',
        499: 'Rushmoor',
        500: 'Southampton ',
        501: 'Test Valley',
        502: 'Winchester',
        505: 'Isle of Wight',
        510: 'Elmbridge',
        511: 'Guildford',
        512: 'Mole Valley',
        513: 'Reigate and Banstead',
        514: 'Runnymede',
        515: 'Surrey Heath',
        516: 'Tandridge',
        517: 'Waverley',
        518: 'Woking',
        530: 'Ashford',
        531: 'Canterbury',
        532: 'Dartford',
        533: 'Dover',
        535: 'Gravesham',
        536: 'Maidstone',
        538: 'Sevenoaks',
        539: 'Shepway',
        540: 'Swale',
        541: 'Thanet',
        542: 'Tonbridge and Malling',
        543: 'Tunbridge Wells',
        544: 'Medway',
        551: 'Eastbourne',
        552: 'Hastings',
        554: 'Lewes',
        555: 'Rother',
        556: 'Wealden',
        557: 'Adur',
        558: 'Arun',
        559: 'Chichester',
        560: 'Crawley',
        562: 'Horsham',
        563: 'Mid Sussex',
        564: 'Worthing',
        565: 'Brighton and Hove',
        570: 'City of London',
        580: 'East Devon',
        581: 'Exeter',
        582: 'North Devon',
        583: 'Plymouth',
        584: 'South Hams',
        585: 'Teignbridge',
        586: 'Mid Devon',
        587: 'Torbay',
        588: 'Torridge',
        589: 'West Devon',
        590: 'Caradon',
        591: 'Carrick',
        592: 'Kerrier',
        593: 'North Cornwall',
        594: 'Penwith',
        595: 'Restormel',
        596: 'Cornwall',
        601: 'Bristol, City of',
        605: 'North Somerset',
        606: 'Mendip',
        607: 'Sedgemoor',
        608: 'Taunton Deane',
        609: 'West Somerset',
        610: 'South Somerset',
        611: 'Bath and North East Somerset',
        612: 'South Gloucestershire',
        620: 'Cheltenham',
        621: 'Cotswold',
        622: 'Forest of Dean',
        623: 'Gloucester',
        624: 'Stroud',
        625: 'Tewkesbury',
        630: 'Kennet',
        631: 'North Wiltshire',
        632: 'Salisbury',
        633: 'Swindon',
        634: 'West Wiltshire',
        635: 'Wiltshire',
        640: 'Bournemouth',
        641: 'Christchurch',
        642: 'North Dorset',
        643: 'Poole',
        644: 'Purbeck',
        645: 'West Dorset',
        646: 'Weymouth and Portland',
        647: 'East Dorset',
        720: 'Isle of Anglesey',
        721: 'Conwy',
        722: 'Gwynedd',
        723: 'Denbighshire',
        724: 'Flintshire',
        725: 'Wrexham',
        730: 'Blaenau Gwent',
        731: 'Caerphilly',
        732: 'Monmouthshire',
        733: 'Newport',
        734: 'Torfaen',
        740: 'Bridgend',
        741: 'Cardiff',
        742: 'Merthyr Tydfil',
        743: 'Neath Port Talbot',
        744: 'Rhondda, Cynon, Taff',
        745: 'Swansea',
        746: 'The Vale of Glamorgan',
        750: 'Ceredigion',
        751: 'Carmarthenshire',
        752: 'Pembrokeshire',
        753: 'Powys',
        910: 'Aberdeen City',
        911: 'Aberdeenshire',
        912: 'Angus',
        913: 'Argyll and Bute',
        914: 'Scottish Borders',
        915: 'Clackmannanshire',
        916: 'West Dunbartonshire',
        917: 'Dumfries and Galloway',
        918: 'Dundee City',
        919: 'East Ayrshire',
        920: 'East Dunbartonshire',
        921: 'East Lothian',
        922: 'East Renfrewshire',
        923: 'Edinburgh, City of',
        924: 'Falkirk',
        925: 'Fife',
        926: 'Glasgow City',
        927: 'Highland',
        928: 'Inverclyde',
        929: 'Midlothian',
        930: 'Moray',
        931: 'North Ayrshire',
        932: 'North Lanarkshire',
        933: 'Orkney Islands',
        934: 'Perth and Kinross',
        935: 'Renfrewshire',
        936: 'Shetland Islands',
        937: 'South Ayrshire',
        938: 'South Lanarkshire',
        939: 'Stirling',
        940: 'West Lothian',
        941: 'Western Isles'
    }

In [4]:
for i in local_authority_district.items():
    print(f'{i[0]}\n')

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

38

40

57

60

61

62

63

64

65

70

71

72

73

74

75

76

77

79

80

82

83

84

85

90

91

92

93

95

100

101

102

104

106

107

109

110

112

114

120

121

122

123

124

126

127

128

129

130

139

140

141

142

143

144

145

146

147

148

149

150

160

161

162

163

164

165

166

168

169

180

181

182

184

185

186

187

189

200

202

203

204

206

210

211

213

215

228

231

232

233

240

241

243

245

250

251

252

253

254

255

256

257

258

270

273

274

276

277

278

279

280

281

282

283

284

285

286

290

291

292

293

294

300

302

303

305

306

307

309

320

321

322

323

324

325

327

328

329

340

341

342

343

344

345

346

347

350

351

352

353

354

355

356

360

361

362

363

364

365

366

367

368

380

381

382

383

384

385

386

390

391

392

393

394

395

400

401

402

404

405

406

