#### This code performs analysis specifically on the burglary data from the Metropolitan files, aiming to provide a comprehensive overview of the data and gain insights into its characteristics.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
import seaborn as sns
import geopandas as gpd

In [None]:
# Read the parquet file containing burglary data in the Metropolitan Police area in the UK
burglary_metropolitan_df = pd.read_parquet('burglary.parquet')

# Display the first 10 rows of the DataFrame
burglary_metropolitan_df.head(10)

In [None]:
# Get the length of the burglary_metropolitan_df DataFrame
length = len(burglary_metropolitan_df)

# Print the length
print(length)

In [None]:
# Display information about the burglary_metropolitan_df DataFrame
burglary_metropolitan_df.info()

In [None]:
# Filter the burglary_metropolitan_df DataFrame to retrieve rows where the 'Crime ID' column is null
null_crime_id_rows = burglary_metropolitan_df[burglary_metropolitan_df['Crime ID'].isnull()]

# Display the filtered DataFrame
null_crime_id_rows

In [None]:
# Count the number of null values in the 'Crime ID' column of the burglary_metropolitan_df DataFrame
null_crime_id_count = sum(burglary_metropolitan_df['Crime ID'].isnull())

# Print the count of null values
print(null_crime_id_count)

In [None]:
# Filter the burglary_metropolitan_df DataFrame to retrieve rows where the 'Longitude' column is null
null_longitude_rows = burglary_metropolitan_df[burglary_metropolitan_df['Longitude'].isnull()]

# Display the filtered DataFrame
null_longitude_rows

In [None]:
# Print the counts of null values for the columns 'Longitue', 'Latitude' and 'LSOA code'
sum(burglary_metropolitan_df['Longitude'].isnull()), sum(burglary_metropolitan_df['Latitude'].isnull()), sum(burglary_metropolitan_df['LSOA code'].isnull())

In [None]:
# Retrieve unique values from the 'Falls within' column of the burglary_metropolitan_df DataFrame
police_centers = burglary_metropolitan_df['Falls within'].unique()

# Display the unique police center values
print(police_centers)

In [None]:
# Calculate the number of unique police centers in the 'Falls within' column of the burglary_metropolitan_df DataFrame
num_police_centers = len(police_centers)

# Print the number of unique police centers
print(num_police_centers)

In [None]:
# Count the number of null values in the 'Falls within' column of the burglary_metropolitan_df DataFrame
null_falls_within_count = sum(burglary_metropolitan_df['Falls within'].isnull())

# Print the count of null values
print(null_falls_within_count)

In [None]:
# Count the occurrences of each unique value in the 'Falls within' column of the burglary_metropolitan_df DataFrame
falls_within_counts = burglary_metropolitan_df['Falls within'].value_counts()

# Display the value counts
print(falls_within_counts)

In [None]:
# Calculate the distribution of values in the 'Falls within' column
value_counts = burglary_metropolitan_df['Falls within'].value_counts().head(15)

# Create a larger figure
plt.figure(figsize=(15, 10))

# Plot the distribution as a bar chart
plt.bar(value_counts.index, value_counts.values)
plt.xlabel('Value', fontsize=12)
plt.ylabel('Frequency', fontsize=12)

# Add frequency counts as text labels on top of each bar
for i, v in enumerate(value_counts.values):
    plt.text(i, v, str(v), color='black', fontweight='bold', ha='center', fontsize=10)

# Tilt the x-axis labels
plt.xticks(rotation=45, ha='right')

# Add a title to the plot
plt.title('The distribution of crimes within the police centers', fontsize=14)

# Show the plot
plt.show()

In [None]:
# Retrieve unique values from the 'Crime type' column of the burglary_metropolitan_df DataFrame
crime_types = burglary_metropolitan_df['Crime type'].unique()

# Display the unique crime types
print(crime_types)

In [None]:
# Calculate the number of unique crime types in the 'Crime type' column of the burglary_metropolitan_df DataFrame
num_crime_types = len(crime_types)

# Print the number of unique crime types
print(num_crime_types)

In [None]:
# Count the number of null values in the 'Crime type' column of the burglary_metropolitan_df DataFrame
null_crime_type_count = sum(burglary_metropolitan_df['Crime type'].isnull())

# Print the count of null values
print(null_crime_type_count)

In [None]:
# Calculate the count of null values in each column of the burglary_metropolitan_df DataFrame
null_value_counts = burglary_metropolitan_df.isnull().sum()

# Print the count of null values
print(null_value_counts)

In [None]:
# Create a new DataFrame 'cleaned_data' by dropping rows with null values in the 'Longitude' column
cleaned_data = burglary_metropolitan_df.dropna(subset=['Longitude'])

# Display the cleaned DataFrame
cleaned_data

In [None]:
# Calculate the count of null values in each column of the cleaned_data DataFrame
null_value_counts = cleaned_data.isnull().sum()

# Print the count of null values
print(null_value_counts)

In [None]:
# Get unique LSOA codes from the 'LSOA code' column in the cleaned_data dataframe
unique_lsoa_codes = cleaned_data['LSOA code'].unique()

# Print the unique LSOA codes
print(unique_lsoa_codes)

In [None]:
# Extracting the year from the Month column
cleaned_data.loc[:, 'Year'] = pd.to_datetime(cleaned_data['Month']).dt.year.copy()

# Extracting the month from the Month column
cleaned_data.loc[:, 'Month'] = pd.to_datetime(cleaned_data['Month']).dt.month.copy()

In [None]:
# Filter the dataframe for a specific month and year
target_month = 5
target_year = 2022

filtered_df = cleaned_data[
    (cleaned_data['Month'] == target_month) &
    (cleaned_data['Year'] == target_year)
].copy()

# Group the filtered data by LSOA code and count the number of crimes in each area
crime_count = filtered_df.groupby('LSOA code').size().reset_index(name='Count')

# Merge the crime count with the filtered dataframe
df_merged = pd.merge(filtered_df, crime_count, on='LSOA code')

# Create a base map centered around the Metropolitan Police Service area
map_heat = folium.Map(location=[51.5074, -0.1278], zoom_start=10)

# Create a heat map layer using seaborn's kdeplot
heat_layer = sns.kdeplot(
    data=df_merged,
    x='Longitude',
    y='Latitude',
    weights='Count',
    fill=True,
    cmap='viridis',
    alpha=0.6,
    levels=150
).get_figure()

# Save the heat map image
heat_layer.savefig('Pictures\Barnet visual\London_burglary_heatmap.png')

# Create a Folium ImageOverlay from the heat map image
image_overlay = folium.raster_layers.ImageOverlay(
    name='Crime Heat Map',
    image='crime_heatmap.png',
    bounds=[[df_merged['Latitude'].min(), df_merged['Longitude'].min()],
            [df_merged['Latitude'].max(), df_merged['Longitude'].max()]],
    opacity=0.6,
)

# Add the image overlay to the map
image_overlay.add_to(map_heat)

# Save the map as an HTML file
map_heat.save('Pictures/Barnet visual/London_burglary_heatmap_over_map.html')

# Display the map
map_heat

In [None]:
# Group the data by LSOA code and count the number of crimes in each area
crime_count = cleaned_data.groupby('LSOA code').size().reset_index(name='Count')

# Create a base map centered around the Metropolitan Police Service area
map_crimes = folium.Map(location=[51.5074, -0.1278], zoom_start=10)

# Add the Choropleth layer to the map
folium.Choropleth(
    geo_data='London.geojson',  # GeoJSON file containing LSOA boundaries
    name='Crime Count',
    data=crime_count,
    columns=['LSOA code', 'Count'],
    key_on='feature.properties.lsoa11cd',  # Update with the correct key
    fill_color='OrRd',  # Change the color scheme, e.g., 'BuPu', 'YlGn', 'OrRd', etc.
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Crimes',
).add_to(map_crimes)

# Save the map as an HTML file
map_crimes.save('Pictures/Barnet visual/London_burglary_choropleth.html')

# Display the map
map_crimes

In [None]:
# Filter the dataframe for a specific month and year
target_month = 10
target_year = 2022

filtered_df = cleaned_data[(cleaned_data['Month'] == target_month) & (cleaned_data['Year'] == target_year)]

# Group the filtered data by LSOA code and count the number of crimes in each area
crime_count = filtered_df.groupby('LSOA code').size().reset_index(name='Count')

# Create a base map centered around the Metropolitan Police Service area
map_crimes = folium.Map(location=[51.5074, -0.1278], zoom_start=10)

# Add the Choropleth layer to the map
folium.Choropleth(
    geo_data='London.geojson',  # GeoJSON file containing LSOA boundaries
    name='Crime Count',
    data=crime_count,
    columns=['LSOA code', 'Count'],
    key_on='feature.properties.lsoa11cd',  # Update with the correct key
    fill_color='OrRd',  # Change the color scheme, e.g., 'BuPu', 'YlGn', 'OrRd', etc.
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Crimes',
).add_to(map_crimes)

# Display the map
map_crimes

In [None]:
# Load the LSOA boundary data for London
london_lsoa = gpd.read_file('London.geojson')

# Filter the LSOA data for Barnet
barnet_lsoa = london_lsoa[london_lsoa['lsoa11nm'].str.contains('Barnet')]

# Display the LSOAs for Barnet
print(barnet_lsoa['lsoa11cd'])

In [None]:
# Filter the data for Barnet
barnet_data = cleaned_data[cleaned_data['LSOA code'].isin(barnet_lsoa['lsoa11cd'])]

# Group the filtered data by LSOA code and count the number of crimes in each area
crime_count = barnet_data.groupby('LSOA code').size().reset_index(name='Count')

# Create a base map centered around Barnet
map_crimes = folium.Map(location=[51.611, -0.207], zoom_start=11)  # Adjust the coordinates and zoom level as per your preference

# Add the Choropleth layer to the map
folium.Choropleth(
    geo_data='London.geojson',  # GeoJSON file containing LSOA boundaries
    name='Crime Count',
    data=crime_count,
    columns=['LSOA code', 'Count'],
    key_on='feature.properties.lsoa11cd',  # Update with the correct key
    fill_color='OrRd',  # Change the color scheme, e.g., 'BuPu', 'YlGn', 'OrRd', etc.
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Crimes',
).add_to(map_crimes)

# Save the map as an HTML file
map_crimes.save('Pictures\Barnet visual\Barnet_burglary_choropleth.html')

# Display the map
map_crimes

In [None]:
# Define the target month and year
target_month = 10
target_year = 2022

# Filter the data for Barnet, specific month, and year
barnet_data = cleaned_data[
    (cleaned_data['LSOA code'].isin(barnet_lsoa['lsoa11cd'])) &
    (cleaned_data['Month'] == target_month) &
    (cleaned_data['Year'] == target_year)
]

# Group the filtered data by LSOA code and count the number of crimes in each area
crime_count = barnet_data.groupby('LSOA code').size().reset_index(name='Count')

# Create a base map centered around Barnet
map_crimes = folium.Map(location=[51.611, -0.207], zoom_start=11)  # Adjust the coordinates and zoom level as per your preference

# Add the Choropleth layer to the map
folium.Choropleth(
    geo_data='London.geojson',  # GeoJSON file containing LSOA boundaries
    name='Crime Count',
    data=crime_count,
    columns=['LSOA code', 'Count'],
    key_on='feature.properties.lsoa11cd',  # Update with the correct key
    fill_color='YlOrRd',  # Choose a color scheme
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name='Number of Crimes',
).add_to(map_crimes)

# Display the map
map_crimes