# Libraries

In [77]:
import pandas as pd

# Reading the CSVs

In [78]:
# Create a list of all the individual DataFrames
dfs = [pd.read_csv(f"../../Data/Police_Data/Crime_Data/crime_2021_wLocations/clean_met_2021_{month:02}.csv") for month in range(1, 13)]

In [79]:
count = 0 
for df in dfs:
    count += df.shape[0]
    print(df.shape[0])

print("Total Instances:",count)

65852
64522
74507
72481
77888
79838
80119
75879
78039
81572
79268
72555
Total Instances: 902520


# Concatenating the dataframes into one

In [80]:
# Concatenate all the DataFrames in the list into one
met_2021 = pd.concat(dfs, ignore_index=True)
met_2021

Unnamed: 0,Month,Longitude,Latitude,Borough
0,2021-01,0.880386,51.219190,Ashford
1,2021-01,0.145888,51.593835,Barking and Dagenham
2,2021-01,0.134947,51.588063,Barking and Dagenham
3,2021-01,0.140192,51.582311,Barking and Dagenham
4,2021-01,0.141143,51.590873,Barking and Dagenham
...,...,...,...,...
902515,2021-12,-0.606664,51.485155,Windsor and Maidenhead
902516,2021-12,-0.608141,51.483968,Windsor and Maidenhead
902517,2021-12,-0.580030,51.482030,Windsor and Maidenhead
902518,2021-12,-0.610450,51.480497,Windsor and Maidenhead


# Dropping the instances with null values

In [81]:
met_2021.isna().sum()

Month           0
Longitude       0
Latitude        0
Borough      4556
dtype: int64

In [82]:
met_2021.dropna(subset=['Borough'], inplace=True)

In [83]:
met_2021

Unnamed: 0,Month,Longitude,Latitude,Borough
0,2021-01,0.880386,51.219190,Ashford
1,2021-01,0.145888,51.593835,Barking and Dagenham
2,2021-01,0.134947,51.588063,Barking and Dagenham
3,2021-01,0.140192,51.582311,Barking and Dagenham
4,2021-01,0.141143,51.590873,Barking and Dagenham
...,...,...,...,...
902515,2021-12,-0.606664,51.485155,Windsor and Maidenhead
902516,2021-12,-0.608141,51.483968,Windsor and Maidenhead
902517,2021-12,-0.580030,51.482030,Windsor and Maidenhead
902518,2021-12,-0.610450,51.480497,Windsor and Maidenhead


In [84]:
met_2021.isna().sum()

Month        0
Longitude    0
Latitude     0
Borough      0
dtype: int64

In [85]:
# drop long and lat columns
met_2021.drop(columns=['Longitude', 'Latitude'], inplace=True)
met_2021

Unnamed: 0,Month,Borough
0,2021-01,Ashford
1,2021-01,Barking and Dagenham
2,2021-01,Barking and Dagenham
3,2021-01,Barking and Dagenham
4,2021-01,Barking and Dagenham
...,...,...
902515,2021-12,Windsor and Maidenhead
902516,2021-12,Windsor and Maidenhead
902517,2021-12,Windsor and Maidenhead
902518,2021-12,Windsor and Maidenhead


# Checking what Locations have been identified by the location procassing

Apparently some locations identified by the script were not in London Boroughs. Therefore, only the London boroughs have to be retained.

In [86]:
boroughs = met_2021['Borough'].unique()
boroughs

array(['Ashford', 'Barking and Dagenham', 'Redbridge',
       'No postal code found', 'Havering', 'Barnet', 'Enfield', 'Harrow',
       'Haringey', 'Brent', 'Camden', 'Basildon', 'Bassetlaw', 'Bedford',
       'Bexley', 'Greenwich', 'Bromley', 'Birmingham', 'Bolton',
       'Bournemouth, Christchurch and Poole', 'Bracknell Forest',
       'Ealing', 'Westminster', 'Hammersmith and Fulham', 'Brentwood',
       'Bristol, City of', 'Lewisham', 'Southwark', 'Croydon',
       'Broxbourne', 'Cambridge', 'Islington', 'City of London',
       'Canterbury', 'Central Bedfordshire', 'Chelmsford', 'Cherwell',
       'Cheshire West and Chester', 'Tower Hamlets', 'Colchester',
       'Copeland', 'Coventry', 'Crawley', 'Sutton', 'Tandridge',
       'Dacorum', 'Dartford', 'Dover', 'Hillingdon', 'Hounslow',
       'East Cambridgeshire', 'East Hertfordshire', 'Elmbridge',
       'Waltham Forest', 'Epping Forest', 'Epsom and Ewell', 'Fenland',
       'Fylde', 'Gateshead', 'Gravesham', 'Guildford', 'Hackne

In [87]:
boroughs.shape[0]

312

In [88]:
met_2021[met_2021['Borough'] == 'Manchester']

Unnamed: 0,Month,Borough
181513,2021-03,Manchester
181514,2021-03,Manchester
254100,2021-04,Manchester
329995,2021-05,Manchester
329996,2021-05,Manchester
329997,2021-05,Manchester
329998,2021-05,Manchester
329999,2021-05,Manchester
488825,2021-07,Manchester
488826,2021-07,Manchester


In [89]:
correct_boroughs = ['Kensington and Chelsea', 'Hounslow', 'Ealing', 'Westminster',
       'Southwark', 'Wandsworth', 'Hillingdon', 'Bexley',
       'Haringey', 'Tower Hamlets', 'Camden', 'Lambeth', 'Barnet',
       'Bromley', 'Sutton', 'Brent', 'Greenwich', 'Merton', 'Enfield',
       'Redbridge', 'Richmond upon Thames', 'Croydon', 'Waltham Forest',
       'Newham', 'Lewisham', 'Kingston upon Thames', 'Harrow',
       'Islington', 'Hackney', 'Havering', 'Hammersmith and Fulham',
       'Barking and Dagenham']

In [90]:
# met_sas_2021[met_sas_2021['Borough'] == 'Slough']
met_2021_correct = met_2021.loc[met_2021['Borough'].isin(correct_boroughs)]
met_2021_correct

Unnamed: 0,Month,Borough
1,2021-01,Barking and Dagenham
2,2021-01,Barking and Dagenham
3,2021-01,Barking and Dagenham
4,2021-01,Barking and Dagenham
5,2021-01,Barking and Dagenham
...,...,...
902504,2021-12,Westminster
902505,2021-12,Westminster
902506,2021-12,Westminster
902507,2021-12,Westminster


In [91]:
correct_boroughs = met_2021_correct['Borough'].unique()
len(correct_boroughs)

32

In [92]:
met_2021_correct.shape[0]

891395

In [93]:
met_2021_correct

Unnamed: 0,Month,Borough
1,2021-01,Barking and Dagenham
2,2021-01,Barking and Dagenham
3,2021-01,Barking and Dagenham
4,2021-01,Barking and Dagenham
5,2021-01,Barking and Dagenham
...,...,...
902504,2021-12,Westminster
902505,2021-12,Westminster
902506,2021-12,Westminster
902507,2021-12,Westminster


In [94]:
# group by borough and borough and count
met_2021_correct['Month'] = met_2021_correct['Month'].astype('datetime64[ns]')
met_2021_correct['Month'] = met_2021_correct['Month'].dt.strftime('%Y')
met_2021_correct

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  met_2021_correct['Month'] = met_2021_correct['Month'].astype('datetime64[ns]')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  met_2021_correct['Month'] = met_2021_correct['Month'].dt.strftime('%Y')


Unnamed: 0,Month,Borough
1,2021,Barking and Dagenham
2,2021,Barking and Dagenham
3,2021,Barking and Dagenham
4,2021,Barking and Dagenham
5,2021,Barking and Dagenham
...,...,...
902504,2021,Westminster
902505,2021,Westminster
902506,2021,Westminster
902507,2021,Westminster


In [96]:
# count the number of instances for each borough and month and add to a new column called 'Count' 
met_2021_correct['Count'] = met_2021_correct.groupby(['Borough', 'Month'])['Borough'].transform('count')
met_2021_correct


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  met_2021_correct['Count'] = met_2021_correct.groupby(['Borough', 'Month'])['Borough'].transform('count')


Unnamed: 0,Month,Borough,Count
1,2021,Barking and Dagenham,20964
2,2021,Barking and Dagenham,20964
3,2021,Barking and Dagenham,20964
4,2021,Barking and Dagenham,20964
5,2021,Barking and Dagenham,20964
...,...,...,...
902504,2021,Westminster,53164
902505,2021,Westminster,53164
902506,2021,Westminster,53164
902507,2021,Westminster,53164


In [99]:
# drop duplicates
met_2021_correct.drop_duplicates(subset=['Borough', 'Month'], inplace=True)
met_2021_correct.reset_index(drop=True, inplace=True)
met_2021_correct

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  met_2021_correct.drop_duplicates(subset=['Borough', 'Month'], inplace=True)


Unnamed: 0,Month,Borough,Count
0,2021,Barking and Dagenham,20964
1,2021,Redbridge,27360
2,2021,Havering,19512
3,2021,Barnet,31669
4,2021,Enfield,33356
5,2021,Harrow,17434
6,2021,Haringey,32366
7,2021,Brent,31809
8,2021,Camden,31562
9,2021,Bexley,17729


# Exporting the concatenated dataframes to a CSV 

In [100]:
# export to csv
met_2021_correct.to_csv('../../Data/Police_Data/Crime_Data/crime_2021_wLocations/crime_cleaned.csv', index=False)