In [14]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [15]:
crimes = pd.read_csv('datasets//Chicago_Crimes.csv')

In [18]:
crimes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249123 entries, 0 to 249122
Data columns (total 22 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   ID                    249123 non-null  int64         
 1   Case Number           249123 non-null  object        
 2   Date                  249123 non-null  datetime64[ns]
 3   Block                 249123 non-null  object        
 4   IUCR                  249123 non-null  object        
 5   Primary Type          249123 non-null  object        
 6   Description           249123 non-null  object        
 7   Location Description  248266 non-null  object        
 8   Arrest                249123 non-null  bool          
 9   Domestic              249123 non-null  bool          
 10  Beat                  249123 non-null  int64         
 11  District              249123 non-null  int64         
 12  Ward                  249123 non-null  int64         
 13 

In [19]:
crimes['Date'] = pd.to_datetime(crimes['Date'], errors='coerce')
crimes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 249123 entries, 0 to 249122
Data columns (total 22 columns):
 #   Column                Non-Null Count   Dtype         
---  ------                --------------   -----         
 0   ID                    249123 non-null  int64         
 1   Case Number           249123 non-null  object        
 2   Date                  249123 non-null  datetime64[ns]
 3   Block                 249123 non-null  object        
 4   IUCR                  249123 non-null  object        
 5   Primary Type          249123 non-null  object        
 6   Description           249123 non-null  object        
 7   Location Description  248266 non-null  object        
 8   Arrest                249123 non-null  bool          
 9   Domestic              249123 non-null  bool          
 10  Beat                  249123 non-null  int64         
 11  District              249123 non-null  int64         
 12  Ward                  249123 non-null  int64         
 13 

In [20]:
crimes['Year'] = crimes['Date'].dt.year

<h1>MOTORCYCLE THEFT</h1>
<h3>2001 - 2010</h3>

In [21]:
crimes['Primary Type'].unique()

array(['THEFT', 'OTHER OFFENSE', 'MOTOR VEHICLE THEFT',
       'WEAPONS VIOLATION', 'BATTERY', 'ASSAULT',
       'CRIMINAL SEXUAL ASSAULT', 'CRIMINAL TRESPASS', 'CRIMINAL DAMAGE',
       'DECEPTIVE PRACTICE', 'SEX OFFENSE', 'ROBBERY', 'NARCOTICS',
       'HOMICIDE', 'INTERFERENCE WITH PUBLIC OFFICER', 'BURGLARY',
       'ARSON', 'OFFENSE INVOLVING CHILDREN', 'INTIMIDATION',
       'PUBLIC PEACE VIOLATION', 'CONCEALED CARRY LICENSE VIOLATION',
       'KIDNAPPING', 'STALKING', 'LIQUOR LAW VIOLATION', 'PROSTITUTION',
       'GAMBLING', 'OBSCENITY', 'PUBLIC INDECENCY', 'HUMAN TRAFFICKING',
       'OTHER NARCOTIC VIOLATION', 'NON-CRIMINAL'], dtype=object)

In [22]:
m_theft = crimes[crimes['Primary Type'] == 'MOTOR VEHICLE THEFT']
m_theft.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

In [23]:
crimes['Year'].unique()

array([2024, 2025], dtype=int32)

In [24]:
import folium 
from folium.plugins import HeatMap

# Aggregate incidents by location
aggregated_df = m_theft.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

# Normalize weights (optional)
aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())

# Prepare heat_data
heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

# Create a base map
m = folium.Map(location=[30, 40], zoom_start=4)

# Add heatmap layer
HeatMap(heat_data).add_to(m)

# Save or display the map
m.save('terrorism_heatmap.html')
m

<h1>HEAT MAP FOR DIFFERENT PRIMARY TYPE</h1>

In [25]:
primary_types = [
    'THEFT', 'OTHER OFFENSE', 'MOTOR VEHICLE THEFT',
    'WEAPONS VIOLATION', 'BATTERY', 'ASSAULT',
    'CRIMINAL SEXUAL ASSAULT', 'CRIMINAL TRESPASS', 'CRIMINAL DAMAGE',
    'DECEPTIVE PRACTICE', 'SEX OFFENSE', 'ROBBERY', 'NARCOTICS',
    'HOMICIDE', 'INTERFERENCE WITH PUBLIC OFFICER', 'BURGLARY',
    'ARSON', 'OFFENSE INVOLVING CHILDREN', 'INTIMIDATION',
    'PUBLIC PEACE VIOLATION', 'CONCEALED CARRY LICENSE VIOLATION',
    'KIDNAPPING', 'STALKING', 'LIQUOR LAW VIOLATION', 'PROSTITUTION',
    'GAMBLING', 'OBSCENITY', 'PUBLIC INDECENCY', 'HUMAN TRAFFICKING',
    'OTHER NARCOTIC VIOLATION', 'NON-CRIMINAL'
]

for crime in primary_types:
    df = crimes[crimes['Primary Type'] == crime].dropna(subset=['Latitude', 'Longitude'])
    if df.empty:
        continue

    agg = df.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')
    min_c, max_c = agg['incident_count'].min(), agg['incident_count'].max()
    agg['normalized_weight'] = 1 if min_c == max_c else (agg['incident_count'] - min_c) / (max_c - min_c)

    heat_data = agg[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()
    m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=11)
    HeatMap(heat_data).add_to(m)

    m.save(f"heatmap_{crime.replace(' ', '_').lower()}.html")
    print(f"Saved heatmap for {crime}")

Saved heatmap for THEFT
Saved heatmap for OTHER OFFENSE
Saved heatmap for MOTOR VEHICLE THEFT
Saved heatmap for WEAPONS VIOLATION
Saved heatmap for BATTERY
Saved heatmap for ASSAULT
Saved heatmap for CRIMINAL SEXUAL ASSAULT
Saved heatmap for CRIMINAL TRESPASS
Saved heatmap for CRIMINAL DAMAGE
Saved heatmap for DECEPTIVE PRACTICE
Saved heatmap for SEX OFFENSE
Saved heatmap for ROBBERY
Saved heatmap for NARCOTICS
Saved heatmap for HOMICIDE
Saved heatmap for INTERFERENCE WITH PUBLIC OFFICER
Saved heatmap for BURGLARY
Saved heatmap for ARSON
Saved heatmap for OFFENSE INVOLVING CHILDREN
Saved heatmap for INTIMIDATION
Saved heatmap for PUBLIC PEACE VIOLATION
Saved heatmap for CONCEALED CARRY LICENSE VIOLATION
Saved heatmap for KIDNAPPING
Saved heatmap for STALKING
Saved heatmap for LIQUOR LAW VIOLATION
Saved heatmap for PROSTITUTION
Saved heatmap for GAMBLING
Saved heatmap for OBSCENITY
Saved heatmap for PUBLIC INDECENCY
Saved heatmap for HUMAN TRAFFICKING
Saved heatmap for OTHER NARCOTIC VI

In [26]:
m

<h2>The heatmaps will reveal geographical hotspots where each crime type occurs most frequently.
For example, you might see that THEFT incidents cluster heavily in commercial or downtown areas, while BURGLARY hotspots might be more residential. The heat map theft might show a dense hotspot downtown indicating a high concentration of theft incidents in that area. The countries that most affected by the crime is the CHICAGO.</h2>

<h1>NARCOTICS HEATMAP</h1>

In [27]:
narcotics = crimes[crimes['Primary Type'] == 'NARCOTICS']

aggregated_df = narcotics.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[narcotics['Latitude'].mean(), narcotics['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_narcotics.html')
m

<h2>In this heat map, the average latitude and longitude of all narcotics incidents, which means the heatmap will focus on the general area where narcotics crimes occur, improving visualization relevance.we can see that many of the narcotics crime are most concentrated geographically. Areas with higher normalized weights represent hotspots with more frequent narcotics incidents. This helps identify neighborhoods or districts where narcotics enforcement efforts may need to be intensified. </h2>

<h1>ASSAULT HEATMAP</h1>

In [28]:
assault = crimes[crimes['Primary Type'] == 'ASSAULT']

aggregated_df = assault.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[assault['Latitude'].mean(), assault['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_assault.html')
m

<h2>The heatmap centers around the mean latitude and longitude of all assault incidents, ensuring the heatmap focuses on the core geographic area where assaults occur. Law enforcement can use this heatmap to prioritize patrols and interventions in the most affected areas. It may also help in planning community outreach or violence prevention programs targeted at high-risk neighborhoods. The normalization method depends on the range of incident counts; if counts are very close, the heatmap may show subtle differences. The heatmap also aggregates all assault incidents over the dataset's timeframe, so it doesn't reveal temporal trends (e.g., seasonal spikes or time-of-day patterns).</h2>

<h1>ROBBERY HEATMAP</h1>

In [29]:
robbery = crimes[crimes['Primary Type'] == 'ROBBERY']

aggregated_df = robbery.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[robbery['Latitude'].mean(), robbery['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_robbery.html')
m

<h2>The heatmap highlights locations where robbery incidents are most densely clustered. Areas with higher normalized weights indicate hotspots with frequent robberies, which could be commercial districts, transit hubs, or high-traffic pedestrian areas. This helps distinguish between areas with moderate robbery activity and those with significantly higher incident rates, even if absolute counts vary. Police departments can use this heatmap to prioritize patrols and surveillance in robbery hotspots, potentially reducing crime through increased presence. Community safety programs can target these areas for awareness campaigns or preventative measures.</h2>

<h1>HOMICIDE HEATMAP</h1>

In [30]:
homicide = crimes[crimes['Primary Type'] == 'HOMICIDE']

aggregated_df = homicide.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[homicide['Latitude'].mean(), homicide['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_homicide.html')
m

<h2>In this heatmap, we can see clearly, show geographic hotspots of homicides, with the highest densities around areas such as: Gage Park, Greater Grand Crossing, Oakwood Cemetery area, These neighborhoods are known to have historically higher rates of violent crime, and your data confirms this spatial pattern. The normalization of incident counts allows you to see which locations have the most intense concentration of homicides relative to others. Hotspots with normalized weights close to 1 represent the most critical areas with frequent homicide incidents. City planners and policymakers can use this information to address underlying factors such as poverty, lack of opportunities, and community infrastructure.</h2>

<h1>BURGLARY HEATMAP</h1>

In [31]:
burglary = crimes[crimes['Primary Type'] == 'BURGLARY']

aggregated_df = burglary.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[burglary['Latitude'].mean(), burglary['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_burglary.html')
m

<h2>This heatmap centers on the average latitude and longitude of burglary incidents, focusing the visualization on the general area affected by burglaries. Since burglary incidents are slight, the heatmap may appear more diffuse or show only mild concentrations. From my observation that "the country has slight crime," it suggests that burglary incidents are relatively low or dispersed across the area you are analyzing. This means there are no strong or dense burglary hotspots, indicating burglary may not be a major issue in this region compared to other crime types. Burglary incidents with missing location data are excluded, so ensure data completeness for accurate mapping.</h2>

<h1>PROSTITUTION HEATMAP</h1>

In [35]:
prostitute = crimes[crimes['Primary Type'] == 'PROSTITUTION']

aggregated_df = prostitute.groupby(['Latitude', 'Longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) / \
                                     (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())


heat_data = aggregated_df[['Latitude', 'Longitude', 'normalized_weight']].values.tolist()

m = folium.Map(location=[prostitute['Latitude'].mean(), prostitute['Longitude'].mean()], zoom_start=11)


HeatMap(heat_data).add_to(m)


m.save('heatmap_prostitute.html')
m

<h2>The heatmap clearly shows that kidnapping incidents are relatively rare in the Chicago area, with only a small number of reported cases scattered throughout. This visualization confirms that kidnapping is not a widespread issue in this region. Since prostitution incidents tend to be fewer and more localized, the heatmap will likely show small clusters or sparse points rather than widespread hotspots. Locations with higher normalized weights indicate areas with more frequent prostitution-related incidents. Prostitution crimes may be underreported or hidden, so the heatmap might not capture the full extent.</h2>