# GEOG5990M Final Project Assignment

Student ID number:  201777629 

# Project Introduce

In [1]:
# this notebook run on the 'spatial_env' environment kernel
# load required packages
import pandas 
import seaborn 
import numpy 
import matplotlib.pyplot as mplpplt
import os
import zipfile

import pyproj
import contextily as ctx
import geopandas 
import geoplot as gplt
import geoplot.crs as gcrs

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas


# Data Pre-Process

## load datas

### migrant indicator from 2021 census

In [2]:
## the migrant indicator data from 2021 Census Topic Summaries, TS019 - Migrant Indicator
# this file contains multipule rows with notes that are not part of data, if reading by default will result in a dataframe with structural errors
Migrant_Indicator_wrongread = pandas.read_csv('Datas/csnsus2021_TS019 - Migrant Indicator.csv',delimiter="\t")
Migrant_Indicator_wrongread
# obviously, pandas treated the title "TS019 - Migrant Indicator" as a column

Unnamed: 0,TS019 - Migrant Indicator
0,ONS Crown Copyright Reserved [from Nomis on 22...
1,"Population :,""All usual residents"""
2,"Units :,""Persons"""
3,"Date :,""2021"""
4,"2021 super output area - lower layer,""mnemonic..."
...,...
493,",""In order to protect against disclosure of pe..."
494,",""have been swapped between different geograph..."
495,",""by small amounts. Small counts at the lowest..."
496,",""affected."""


In [3]:
# skip the first 7 row to avoid that 
Migrant_Indicator_rightread = pandas.read_csv('Datas/csnsus2021_TS019 - Migrant Indicator.csv', skiprows=7)
Migrant_Indicator_rightread
# structure correctly read

Unnamed: 0,2021 super output area - lower layer,mnemonic,Total: All usual residents,Address one year ago is the same as the address of enumeration,Address one year ago is student term-time or boarding school address in the UK,Migrant from within the UK: Address one year ago was in the UK,Migrant from outside the UK: Address one year ago was outside the UK
0,Leeds 001A,E01011698,1448.0,1367.0,5.0,75.0,1.0
1,Leeds 001B,E01011699,1278.0,1184.0,3.0,87.0,4.0
2,Leeds 001C,E01011701,1356.0,1224.0,2.0,126.0,4.0
3,Leeds 001D,E01011702,1631.0,1390.0,9.0,203.0,29.0
4,Leeds 001E,E01011703,1219.0,1137.0,6.0,74.0,2.0
...,...,...,...,...,...,...,...
488,,In order to protect against disclosure of pers...,,,,,
489,,have been swapped between different geographic...,,,,,
490,,by small amounts. Small counts at the lowest g...,,,,,
491,,affected.,,,,,


### west yorkshire street crime from uk police

In [None]:
## Crime data of 2021.4 to 2021.12 of West Yorkshire street
#the zip file has a very long file name made by source, which may cause problem with windows file explorer, so rename it
os.rename('Datas/c3b4b2b813b856dd673e42b11b7438cbdd90a5db.zip','Datas/crimeLeeds202104to12.zip')

In [4]:
#.csv file of each month is in 1 layer of folders in zip file, make a loop to read them, this step can be skiped by manually copy them out from individual folders
CrimeWestyorkshire2021_List = [None]*9 #create a list to store dataframes
CrimeWestyorkshire2021_zip = zipfile.ZipFile('Datas/crimeLeeds202104to12.zip')
#the folder structure is zipfile/2021-xx/2021-xx-west-yorkshire-street.csv 
for readnum in range(9):# due to the uk police data website's limitation, only 9 months' rather than 12 months' data are accessable 
    if readnum < 6 : pathInCrimeZip = "2021-0"+str(readnum+4)+"/2021-0"+str(readnum+4)+"-west-yorkshire-street.csv"
    else : pathInCrimeZip = "2021-"+str(readnum+4)+"/2021-"+str(readnum+4)+"-west-yorkshire-street.csv"
    CrimeWestyorkshire2021_List[readnum] = pandas.read_csv(CrimeWestyorkshire2021_zip.open(pathInCrimeZip))


### LSOA shape file in 2021 census version

In [7]:
## shape file of 2021 census version of Lower layer Super Output Areas of England&Wales
LSOA_EW_2021 = geopandas.read_file('Datas/Lower_layer_Super_Output_Areas_2021_EW_BFC_V8_8154990398368723939.zip')

## remove irrelevant parts & null

### the migrant indicator data
#### remove irrelevant information
this .csv file has some notes and record cells, which need to be removed 

In [8]:
## the migrant indicator data
# during the read process the notes at head of the form has been removed, check tail
Migrant_Indicator_rightread.tail()

Unnamed: 0,2021 super output area - lower layer,mnemonic,Total: All usual residents,Address one year ago is the same as the address of enumeration,Address one year ago is student term-time or boarding school address in the UK,Migrant from within the UK: Address one year ago was in the UK,Migrant from outside the UK: Address one year ago was outside the UK
488,,In order to protect against disclosure of pers...,,,,,
489,,have been swapped between different geographic...,,,,,
490,,by small amounts. Small counts at the lowest g...,,,,,
491,,affected.,,,,,
492,,,,,,,


In [9]:
# this datafram still contain some personal privacy related notes, so drop them
Migrant_Indicator_dataonly = Migrant_Indicator_rightread.drop([488,489,490,491])
Migrant_Indicator_dataonly.tail()

Unnamed: 0,2021 super output area - lower layer,mnemonic,Total: All usual residents,Address one year ago is the same as the address of enumeration,Address one year ago is student term-time or boarding school address in the UK,Migrant from within the UK: Address one year ago was in the UK,Migrant from outside the UK: Address one year ago was outside the UK
484,Leeds 112A,E01011467,1966.0,1608.0,16.0,308.0,34.0
485,Leeds 112B,E01011468,1996.0,1775.0,3.0,197.0,21.0
486,Leeds 112C,E01011470,1262.0,1162.0,5.0,91.0,4.0
487,Leeds 112F,E01035054,2553.0,1379.0,76.0,967.0,131.0
492,,,,,,,


#### remove null
after remove notes, re move null rows

In [10]:
# looks like there is a null row, check the sum of null
Migrant_Indicator_dataonly.isna().sum()

2021 super output area - lower layer                                              1
mnemonic                                                                          1
Total: All usual residents                                                        1
Address one year ago is the same as the address of enumeration                    1
Address one year ago is student term-time or boarding school address in the UK    1
Migrant from within the UK: Address one year ago was in the UK                    1
Migrant from outside the UK: Address one year ago was outside the UK              1
dtype: int64

In [11]:
# only one row, which is obviously the row at the end
Migrant_Indicator_dataonly = Migrant_Indicator_dataonly.dropna()
Migrant_Indicator_dataonly.isna().sum()

2021 super output area - lower layer                                              0
mnemonic                                                                          0
Total: All usual residents                                                        0
Address one year ago is the same as the address of enumeration                    0
Address one year ago is student term-time or boarding school address in the UK    0
Migrant from within the UK: Address one year ago was in the UK                    0
Migrant from outside the UK: Address one year ago was outside the UK              0
dtype: int64

### west yorkshire street crime data
the street crime data contains the record of whole west yorkshire, but the study/research area is Leeds, so try to remove datas out of Leeds

In [12]:
# check the actually data in each conlums by show the first form (data of 2021/4) as an example
CrimeWestyorkshire2021_List[0]

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context
0,d9e3417dad8742d480aea5d30b11ae788ed6b7aa267a29...,2021-04,West Yorkshire Police,West Yorkshire Police,-1.905890,53.930295,On or near Brown Bank Lane,E01010646,Bradford 001A,Burglary,Investigation complete; no suspect identified,
1,bc4e6efed170af421d98c39e2854e91bf3b933cfdcb4b2...,2021-04,West Yorkshire Police,West Yorkshire Police,-1.877784,53.948129,On or near The Paddock,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect,
2,5fd3d231aad8778fa624377045059a8dc4aa3779c3b9c7...,2021-04,West Yorkshire Police,West Yorkshire Police,-1.877784,53.948129,On or near The Paddock,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect,
3,7f2d605e3e74585a07e14aa0b04d9a78221e128fe9f98d...,2021-04,West Yorkshire Police,West Yorkshire Police,-1.879359,53.946225,On or near The Acres,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect,
4,c525f6487edef046c82c0adb00492c9a8bc8de3ec29d71...,2021-04,West Yorkshire Police,West Yorkshire Police,-1.887983,53.945918,On or near Burns Hill,E01010647,Bradford 001B,Drugs,Local resolution,
...,...,...,...,...,...,...,...,...,...,...,...,...
26559,01c4ba97c4c9e4ddec7a778ec1bfda556cb6c530a0909c...,2021-04,West Yorkshire Police,West Yorkshire Police,,,No Location,,,Other crime,Investigation complete; no suspect identified,
26560,62553e361b5f53b1701b15fd17e6508759b69f459b41f3...,2021-04,West Yorkshire Police,West Yorkshire Police,,,No Location,,,Other crime,Unable to prosecute suspect,
26561,69bbd15b2c8a1b6a26bc1d6c54d31982a1676bd8903828...,2021-04,West Yorkshire Police,West Yorkshire Police,,,No Location,,,Other crime,Status update unavailable,
26562,60a8617763b6e44f26d971c110a9f4c02450ab78c0e956...,2021-04,West Yorkshire Police,West Yorkshire Police,,,No Location,,,Other crime,Status update unavailable,


in the migrant indicator, the '2021 super output area - lower layer' is lsoa name in 2021census, while 'mnemonic' is lsoa code. This form also have these two variables named as 'LSOA name' and 'LSOA code'

#### remove null

before starting filting, noticing taht some rows don't have information about location, latitude and longitude, and MOST IMPORTANT, LSOA code&name
the LSOA name is the key variable we used for scale the data to study area - Leeds, null data will prevent filting, so for this data, remove the null/nan/none first

In [28]:
# firstly, have a look of each month's data's null cell information
nullofcrimeWY = [None]*9
for index in range(9):
    nullofcrimeWY[index] = CrimeWestyorkshire2021_List[index].isna().sum()
    nullofcrimeWY[index]['total row'] = CrimeWestyorkshire2021_List[index].shape[0]# add another conlumn to have a total number to compare with
pandas.DataFrame(nullofcrimeWY)

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,total row
0,4000,0,0,0,529,529,0,529,529,0,4000,26564,26564
1,3564,0,0,0,589,589,0,589,589,0,3564,27377,27377
2,4048,0,0,0,632,632,0,632,632,0,4048,28971,28971
3,4168,0,0,0,612,612,0,612,612,0,4168,29547,29547
4,3629,0,0,0,641,641,0,641,641,0,3629,28107,28107
5,2939,0,0,0,671,671,0,671,671,0,2939,27980,27980
6,3044,0,0,0,620,620,0,620,620,0,3044,29381,29381
7,2218,0,0,0,585,585,0,585,585,0,2218,28041,28041
8,1632,0,0,0,493,493,0,493,493,0,1632,25793,25793


In [30]:
# create a new variable for no none west yorkshire crime data list
CrimeWestyorkshire2021_List_N = [None]*9
# drop the rows don't have the most important lsoa code
for dropcrimeindex in range(9):
    currentForm  = CrimeWestyorkshire2021_List[dropcrimeindex]
    CrimeWestyorkshire2021_List_N[dropcrimeindex] = currentForm.loc[currentForm['LSOA name'].notna()]

for index in range(9):
    nullofcrimeWY[index] = CrimeWestyorkshire2021_List_N[index].isna().sum()
    nullofcrimeWY[index]['total row'] = CrimeWestyorkshire2021_List_N[index].shape[0]# add another conlumn to have a total number to compare with
pandas.DataFrame(nullofcrimeWY)
# by removing null data in 'LSOA name', the null data in 'Longitude ''Latitude''Location'and'LSOA code'are also removed
# This indicates that these removed cases have consistency in the absence of these variables, so we could skip the steps of removing Nan of them

Unnamed: 0,Crime ID,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,Context,total row
0,3958,0,0,0,0,0,0,0,0,0,3958,26035,26035
1,3517,0,0,0,0,0,0,0,0,0,3517,26788,26788
2,3997,0,0,0,0,0,0,0,0,0,3997,28339,28339
3,4121,0,0,0,0,0,0,0,0,0,4121,28935,28935
4,3600,0,0,0,0,0,0,0,0,0,3600,27466,27466
5,2906,0,0,0,0,0,0,0,0,0,2906,27309,27309
6,3015,0,0,0,0,0,0,0,0,0,3015,28761,28761
7,2201,0,0,0,0,0,0,0,0,0,2201,27456,27456
8,1624,0,0,0,0,0,0,0,0,0,1624,25300,25300


For rest three variables contains lots of Nan rows: (removing irrelevant partly starts here too)
1. From the previous example, the 'Crime ID' variable contains the data of 64 characters consisted by integers and lowercase letters. This IDs are ambiguous and can't comtribute to this project since it's not data. It's whole conlumn is useless, So drop this conlumn instead of the Nan rows of it.
2. 'Last outcome category' have 13 kinds of reslut tpye, might useful for later analysis. So drop the rows with null cell of 'Last outcome category'.
3. 'Context' has same values with the total number of rows of the forms, which means this conlumn is completely empty. So drop it all.


In [None]:
# drop whole conlumn of 'Crime ID' and 'Context', also remove null rows in 'Last outcome category' by using subset parameter
for index in range(9):
    CrimeWestyorkshire2021_List_N[index].drop(columns=['Crime ID','Context'], inplace=True)
    CrimeWestyorkshire2021_List_N[index].dropna(subset=['Last outcome category'],inplace=True)

In [36]:
# check the null information again 
for index in range(9):
    nullofcrimeWY[index] = CrimeWestyorkshire2021_List_N[index].isna().sum()
    nullofcrimeWY[index]['total row'] = CrimeWestyorkshire2021_List_N[index].shape[0]# add another conlumn to have a total number to compare with
pandas.DataFrame(nullofcrimeWY)
# success, now the Nan are all removed

Unnamed: 0,Month,Reported by,Falls within,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,total row
0,0,0,0,0,0,0,0,0,0,0,22077
1,0,0,0,0,0,0,0,0,0,0,23271
2,0,0,0,0,0,0,0,0,0,0,24342
3,0,0,0,0,0,0,0,0,0,0,24814
4,0,0,0,0,0,0,0,0,0,0,23866
5,0,0,0,0,0,0,0,0,0,0,24403
6,0,0,0,0,0,0,0,0,0,0,25746
7,0,0,0,0,0,0,0,0,0,0,25255
8,0,0,0,0,0,0,0,0,0,0,23676


#### remove irrelevant
1. From the previous example, the 'Reported by' and 'Falls within' variable are character string. Because when downloading, the 'Forces' choice on https://data.police.uk/data/ is 'West Yorkshire Police' , so in this data these 2 variables only have one value - 'West Yorkshire Police'. So these two variables are also useless, remove/drop them.
2. For 'Longitude','Latitude','Location', they contains location information, might be useful in future, so keep them.	
3. For 'Crime type', useful for classfication, keep it.
4. For 'Month', not sure if it will be used, keep it too.


In [37]:
# drop whole conlumn of 'Reported by' and 'Falls within'
for index in range(9):
    CrimeWestyorkshire2021_List_N[index].drop(columns=['Reported by','Falls within'], inplace=True)
CrimeWestyorkshire2021_List_N[0]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CrimeWestyorkshire2021_List_N[index].drop(columns=['Reported by','Falls within'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CrimeWestyorkshire2021_List_N[index].drop(columns=['Reported by','Falls within'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  CrimeWestyorkshire2021_List_N[index].drop(columns=['Reported by','Falls within'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in th

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category
0,2021-04,-1.905890,53.930295,On or near Brown Bank Lane,E01010646,Bradford 001A,Burglary,Investigation complete; no suspect identified
1,2021-04,-1.877784,53.948129,On or near The Paddock,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect
2,2021-04,-1.877784,53.948129,On or near The Paddock,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect
3,2021-04,-1.879359,53.946225,On or near The Acres,E01010646,Bradford 001A,Violence and sexual offences,Unable to prosecute suspect
4,2021-04,-1.887983,53.945918,On or near Burns Hill,E01010647,Bradford 001B,Drugs,Local resolution
...,...,...,...,...,...,...,...,...
26030,2021-04,-1.323984,53.591239,On or near Grove Street,E01011872,Wakefield 045D,Vehicle crime,Status update unavailable
26031,2021-04,-1.331689,53.591255,On or near Holmsley Grove,E01011872,Wakefield 045D,Violence and sexual offences,Unable to prosecute suspect
26032,2021-04,-1.331445,53.590462,On or near Marion Close,E01011872,Wakefield 045D,Violence and sexual offences,Investigation complete; no suspect identified
26033,2021-04,-1.324140,53.589981,On or near Grove Mount,E01011872,Wakefield 045D,Violence and sexual offences,Unable to prosecute suspect


Start scaling data range

In [38]:
# this form also have lsoa name, use this by 'Leeds' to filte it
# create a new variable to store scaled data 
CrimeLeeds2021_List = [None]*9
for currentFormindex in range(9):
    currentForm = CrimeWestyorkshire2021_List_N[currentFormindex]
    CrimeLeeds2021_List[currentFormindex] = currentForm.loc[currentForm['LSOA name'].str.contains('Leeds')]

In [42]:
# have a look at new leeds street crime data
CrimeLeeds2021_List[0].info()
# well scaled

<class 'pandas.core.frame.DataFrame'>
Index: 7697 entries, 13172 to 22211
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Month                  7697 non-null   object 
 1   Longitude              7697 non-null   float64
 2   Latitude               7697 non-null   float64
 3   Location               7697 non-null   object 
 4   LSOA code              7697 non-null   object 
 5   LSOA name              7697 non-null   object 
 6   Crime type             7697 non-null   object 
 7   Last outcome category  7697 non-null   object 
dtypes: float64(2), object(6)
memory usage: 541.2+ KB


In [41]:
# also have a look on new dataframe's nullcell information and each month's total street crime number
nullofcrimeLe = [None]*9
for index in range(9):
    nullofcrimeLe[index] = CrimeLeeds2021_List[index].isna().sum()
    nullofcrimeLe[index]['total row'] = CrimeLeeds2021_List[index].shape[0]# add another conlumn to have a total number to compare with
pandas.DataFrame(nullofcrimeLe)

Unnamed: 0,Month,Longitude,Latitude,Location,LSOA code,LSOA name,Crime type,Last outcome category,total row
0,0,0,0,0,0,0,0,0,7697
1,0,0,0,0,0,0,0,0,8331
2,0,0,0,0,0,0,0,0,8657
3,0,0,0,0,0,0,0,0,8910
4,0,0,0,0,0,0,0,0,8844
5,0,0,0,0,0,0,0,0,8907
6,0,0,0,0,0,0,0,0,9523
7,0,0,0,0,0,0,0,0,9240
8,0,0,0,0,0,0,0,0,8623


### LSOA shape file
#### remove irrelevant
the originial file contains the LSOAs of ENgland and wales, since the study/analysis area is Leeds, remove unnecessary areas to scale it

In [43]:
# have a look of what kindof  data each conlumns actually contains
LSOA_EW_2021

Unnamed: 0,LSOA21CD,LSOA21NM,BNG_E,BNG_N,LONG,LAT,GlobalID,geometry
0,E01000001,City of London 001A,532123,181632,-0.097140,51.5182,ba0bf04c-03e6-4748-acf5-9dd1219c1860,"POLYGON ((532151.537 181867.433, 532152.500 18..."
1,E01000002,City of London 001B,532480,181715,-0.091970,51.5188,f4b205a5-5f68-4161-a014-5a2f51681597,"POLYGON ((532634.497 181926.016, 532632.048 18..."
2,E01000003,City of London 001C,532239,182033,-0.095320,51.5217,76c0a1fd-e446-4c40-9d5d-c98d689f23d5,"POLYGON ((532153.703 182165.155, 532158.250 18..."
3,E01000005,City of London 001E,533581,181283,-0.076270,51.5147,13d65a8b-5e13-443a-b355-5775794de964,"POLYGON ((533619.062 181402.364, 533639.868 18..."
4,E01000006,Barking and Dagenham 016A,544994,184274,0.089317,51.5387,06953962-0ae1-417a-8357-b314a1fb5716,"POLYGON ((545126.852 184310.838, 545145.213 18..."
...,...,...,...,...,...,...,...,...
35667,W01002036,Vale of Glamorgan 005G,317939,172435,-3.182170,51.4449,f1813c4c-3ba5-410d-8d38-3b3f07f8be6e,"POLYGON ((317808.500 172697.500, 317828.403 17..."
35668,W01002037,Vale of Glamorgan 005H,318527,172406,-3.173710,51.4448,05ee0cfe-ebd3-41ee-990e-bc789acf97d7,"POLYGON ((319033.579 172604.209, 319009.058 17..."
35669,W01002038,Vale of Glamorgan 014G,306491,167360,-3.345520,51.3975,145d894e-1553-44c7-91f0-c2f3e70ac846,"POLYGON ((306372.000 168726.000, 306363.000 16..."
35670,W01002039,Vale of Glamorgan 014H,306564,166023,-3.344110,51.3855,ca1b5894-4c3c-49fc-8dbf-ab0aa45d7d3b,"POLYGON ((306830.979 166442.608, 306834.000 16..."


In [29]:
# use LSOA21NM to filter the lsoas needed
# create a new variable with new name to distinguish it from the original variable
LSOA_leeds_2021 = LSOA_EW_2021.loc[LSOA_EW_2021['LSOA21NM'].str.contains('Leeds')]

In [32]:
LSOA_leeds_2021.info()
# the row number is 488, equals to 'Migrant_Indicator_dataonly', means it was well scaled

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 488 entries, 10719 to 33048
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype   
---  ------    --------------  -----   
 0   LSOA21CD  488 non-null    object  
 1   LSOA21NM  488 non-null    object  
 2   BNG_E     488 non-null    int64   
 3   BNG_N     488 non-null    int64   
 4   LONG      488 non-null    float64 
 5   LAT       488 non-null    float64 
 6   GlobalID  488 non-null    object  
 7   geometry  488 non-null    geometry
dtypes: float64(2), geometry(1), int64(2), object(3)
memory usage: 34.3+ KB


This shape file has 488 total rows with 488 non-null rows, means this data has no none. No need to clean null. 

# Data frame merge
## make crime data  to 1 table
The street crime data of Leeds is still stored as a list of dataframe which mapped to months from April to December.
This is not convenient for later analysis, so merge it to one dataframe. The 'Month' variable kept by last part also works for this part for better identification.

In [45]:
# use pandas.concat() to do this
CrimeLeeds2021 = pandas.concat(CrimeLeeds2021_List)

In [46]:
#have a look on the merged dataframe
CrimeLeeds2021.info()

<class 'pandas.core.frame.DataFrame'>
Index: 78732 entries, 13172 to 21476
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Month                  78732 non-null  object 
 1   Longitude              78732 non-null  float64
 2   Latitude               78732 non-null  float64
 3   Location               78732 non-null  object 
 4   LSOA code              78732 non-null  object 
 5   LSOA name              78732 non-null  object 
 6   Crime type             78732 non-null  object 
 7   Last outcome category  78732 non-null  object 
dtypes: float64(2), object(6)
memory usage: 5.4+ MB


# Analysising Process 

# Result - final visualisations 
repeat the code of two result visualisations for clear cognition

## non-spatial 

## spatial 