In [None]:
import os
import pandas as pd
import geopandas as gpd
import folium

In [None]:
 # --- Configuration ---
csv_filename = 'data.csv'
geojson_path = r'C:\Users\20232645\Desktop\TUe\Y2\Q4\Data Challenge 2\4CBLW00-20-MD-CBL\LSOA boundries of feb 2025\combined_lsoa.geojson'
output_dir = 'choropleth_maps'
chunk_size = 1_500_000

gdf_lsoas = gpd.read_file(geojson_path)
london_lsoa_codes = set(gdf_lsoas['Description']) 

# London's bounding box
lat_min, lat_max = 51.2867602, 51.6918741
lon_min, lon_max = -0.5103751, 0.3340155

columns_to_keep = ['Month', 'Latitude', 'Longitude', 'Location', 'LSOA code', 'Crime type']

# Ensure output directory exists
os.makedirs(output_dir, exist_ok = True)

# --- Handle bad lines ---
bad_line_counter = 0
def handle_bad_line(bad_line):
    global bad_line_counter
    bad_line_counter += 1
    return None

# --- Load and preprocess data in chunks ---
csv_path = os.path.join(os.getcwd(), csv_filename)
chunks = pd.read_csv(csv_path,
                     usecols = columns_to_keep,
                     chunksize = chunk_size,
                     engine = 'python',
                     on_bad_lines = handle_bad_line)

dfs = []
for i, chunk in enumerate(chunks):
    chunk = chunk.dropna(subset = ['Latitude', 'Longitude', 'LSOA code'])
    chunk = chunk[
    (chunk['Latitude'] >= lat_min) & (chunk['Latitude'] <= lat_max) &
    (chunk['Longitude'] >= lon_min) & (chunk['Longitude'] <= lon_max)
]
    print(f"Processed chunk {i+1}")
    dfs.append(chunk)

df_london = pd.concat(dfs, ignore_index = True)
df_london['Month'] = pd.to_datetime(df_london['Month'], errors = 'coerce')
print(f"Total valid rows: {len(df_london)} | Skipped bad rows: {bad_line_counter}")

Processed chunk 1
Processed chunk 2
Processed chunk 3
Processed chunk 4
Processed chunk 5
Processed chunk 6
Processed chunk 7
Processed chunk 8
Processed chunk 9
Processed chunk 10
Processed chunk 11
Processed chunk 12
Processed chunk 13
Processed chunk 14
Processed chunk 15
Processed chunk 16
Processed chunk 17
Processed chunk 18
Processed chunk 19
Processed chunk 20
Processed chunk 21
Processed chunk 22
Processed chunk 23
Processed chunk 24
Processed chunk 25
Processed chunk 26
Processed chunk 27
Processed chunk 28
Processed chunk 29
Processed chunk 30
Processed chunk 31
Processed chunk 32
Processed chunk 33
Processed chunk 34
Processed chunk 35
Processed chunk 36
Processed chunk 37
Processed chunk 38
Processed chunk 39
Processed chunk 40
Processed chunk 41
Processed chunk 42
Processed chunk 43
Processed chunk 44
Processed chunk 45
Processed chunk 46
Processed chunk 47
Processed chunk 48
Processed chunk 49
Processed chunk 50
Processed chunk 51
Processed chunk 52
Processed chunk 53
Pr

In [31]:
# --- Load LSOA GeoJSON ---
lsoa_geo = gpd.read_file(r'C:\Users\20232645\Desktop\TUe\Y2\Q4\Data Challenge 2\4CBLW00-20-MD-CBL\LSOA boundries of feb 2025\combined_lsoa.geojson')
# print(lsoa_geo.columns)
# print(lsoa_geo.head())

# --- Generate choropleth maps by year ---
years = df_london['Month'].dt.year.dropna().unique()

for year in sorted(years):
    print(f"Generating map for {year}...")
    df_year = df_london[df_london['Month'].dt.year == year]
    lsoa_counts = df_year.groupby('LSOA code').size().reset_index(name='crime_count')

    merged = lsoa_geo.merge(lsoa_counts, left_on='Description', right_on='LSOA code', how='left')
    merged['crime_count'] = merged['crime_count'].fillna(0)

    # Create folium map
    m = folium.Map(location=[51.5074, -0.1278], zoom_start=10)
    folium.Choropleth(
        geo_data=merged,
        name='choropleth',
        data=merged,
        columns=['Description', 'crime_count'],
        key_on='feature.properties.Description',
        fill_color='YlOrRd',
        fill_opacity=0.7,
        line_opacity=0.2,
        legend_name=f'Crime Count per LSOA ({year})'
    ).add_to(m)

    # # Optional: Add popups
    # folium.GeoJsonTooltip(fields=['LSOA11CD', 'crime_count'],
    #                       aliases=["LSOA Code:", "Crimes:"],
    #                       sticky=False).add_to(folium.GeoJson(merged).add_to(m))

    m.save(os.path.join(output_dir, f'crime_density_{year}.html'))

print("All maps generated.")

All maps generated.
