In [1]:
# Custom Modules
from utils.data_loader import load_crime_dataset
from utils.maps import ChicagoMap


# OS modules
from pathlib import Path

# Dependencies
from sodapy import Socrata
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import geopandas as gpd
import plotly.express as px

# VisualConfiguration
sns.set_theme(style="ticks", color_codes=True)

# Misc
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Location of stored dataset
dataset_path = Path('../datasets/chicago-crime-data.csv')

if dataset_path.exists():
    print(f"File found: {dataset_path.name}")
else:
    load_crime_dataset()

File found: chicago-crime-data.csv


In [3]:
# Converting result into dataframe
crime_df = pd.read_csv(dataset_path)

# Convert date column to datetime format
crime_df['date'] = pd.to_datetime(crime_df['date'])

print(crime_df.shape)
print(crime_df.columns)
crime_df.head(4)

(1404754, 22)
Index(['id', 'case_number', 'date', 'block', 'iucr', 'primary_type',
       'description', 'location_description', 'arrest', 'domestic', 'beat',
       'district', 'ward', 'community_area', 'fbi_code', 'x_coordinate',
       'y_coordinate', 'year', 'updated_on', 'latitude', 'longitude',
       'location'],
      dtype='object')


Unnamed: 0,id,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,ward,community_area,fbi_code,x_coordinate,y_coordinate,year,updated_on,latitude,longitude,location
0,11662417,JC232642,2019-04-21 12:30:00,009XX E 80TH ST,031A,ROBBERY,ARMED - HANDGUN,RESIDENCE,False,False,...,8.0,44.0,3,1184044.0,1852159.0,2019,2023-09-14T15:41:59.000,41.7495,-87.601157,"{'latitude': '41.749500329', 'longitude': '-87..."
1,12990873,JG161829,2019-08-17 13:14:00,008XX N KARLOV AVE,1751,OFFENSE INVOLVING CHILDREN,CRIMINAL SEXUAL ABUSE BY FAMILY MEMBER,RESIDENCE,True,True,...,37.0,23.0,17,1148899.0,1905351.0,2019,2023-09-14T15:41:59.000,41.896215,-87.728572,"{'latitude': '41.89621515', 'longitude': '-87...."
2,11630496,JC193727,2019-03-16 11:35:00,045XX N LINCOLN AVE,0890,THEFT,FROM BUILDING,BAR OR TAVERN,False,False,...,47.0,4.0,6,,,2019,2019-03-23T16:03:16.000,,,
3,11632505,JC196841,2019-03-20 01:00:00,013XX W HOOD AVE,0810,THEFT,OVER $500,OTHER,False,False,...,48.0,77.0,6,,,2019,2019-03-27T16:10:11.000,,,


In [4]:
com_trend = pd.read_csv("../assets/maps/community_trend.csv")

In [5]:
com_trend

Unnamed: 0,Community,Most Frequent Crime,total_crimes,total_arrests,Arrest Rate
0,ALBANY PARK,THEFT,12735,1417,11.13
1,ARCHER HEIGHTS,THEFT,5080,448,8.82
2,ARMOUR SQUARE,THEFT,5949,727,12.22
3,ASHBURN,THEFT,12340,977,7.92
4,AUBURN GRESHAM,BATTERY,38318,6196,16.17
...,...,...,...,...,...
72,WEST LAWN,THEFT,9264,944,10.19
73,WEST PULLMAN,BATTERY,21179,3194,15.08
74,WEST RIDGE,THEFT,20413,1924,9.43
75,WEST TOWN,THEFT,39241,3419,8.71


In [6]:
community_df = pd.read_csv('../assets/maps/comm-areas.csv')
community_df.columns

Index(['the_geom', 'PERIMETER', 'AREA', 'COMAREA_', 'COMAREA_ID', 'AREA_NUMBE',
       'COMMUNITY', 'AREA_NUM_1', 'SHAPE_AREA', 'SHAPE_LEN'],
      dtype='object')

In [7]:
crime_df = crime_df.rename(columns={'community_area': 'community_number'})
community_df = community_df.rename(columns={'AREA_NUM_1': 'community_number','COMMUNITY': 'community_name'})
crime_df = crime_df.merge(community_df[['community_name', 'community_number']], on='community_number', how='left')

In [8]:
crime_df = crime_df.merge(com_trend, left_on='community_name',right_on="Community", how='right')

In [9]:
crime_df = crime_df.drop('community_name', axis=1)

In [10]:
crime_df.columns

Index(['id', 'case_number', 'date', 'block', 'iucr', 'primary_type',
       'description', 'location_description', 'arrest', 'domestic', 'beat',
       'district', 'ward', 'community_number', 'fbi_code', 'x_coordinate',
       'y_coordinate', 'year', 'updated_on', 'latitude', 'longitude',
       'location', 'Community', 'Most Frequent Crime', 'total_crimes',
       'total_arrests', 'Arrest Rate'],
      dtype='object')

In [11]:
crime_df.to_csv('D:/DPA/Project/dataset/crime_df.csv',index=False)