In [4]:
# OS modules
from pathlib import Path

# Dependencies
from sodapy import Socrata
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import geopandas as gpd
import plotly.express as px

# VisualConfiguration
sns.set_theme(style="ticks", color_codes=True)

# Misc
import warnings
warnings.filterwarnings('ignore')

In [5]:
# Location of stored dataset
dataset_path = Path('../datasets/chicago-crime-data.csv')

if dataset_path.exists():
    print(f"File found: {dataset_path.name}")
else:
    load_crime_dataset()

File found: chicago-crime-data.csv


In [6]:
# Converting result into dataframe
crime_df = pd.read_csv(dataset_path)

# Convert date column to datetime format
crime_df['date'] = pd.to_datetime(crime_df['date'])

print(crime_df.shape)
print(crime_df.columns)
crime_df.head(4)

(1408934, 22)
Index(['id', 'case_number', 'date', 'block', 'iucr', 'primary_type',
       'description', 'location_description', 'arrest', 'domestic', 'beat',
       'district', 'ward', 'community_area', 'fbi_code', 'year', 'updated_on',
       'x_coordinate', 'y_coordinate', 'latitude', 'longitude', 'location'],
      dtype='object')


Unnamed: 0,id,case_number,date,block,iucr,primary_type,description,location_description,arrest,domestic,...,ward,community_area,fbi_code,year,updated_on,x_coordinate,y_coordinate,latitude,longitude,location
0,11641644,JC207234,2019-03-31 09:30:00,005XX N ogden ave,890,THEFT,FROM BUILDING,RESTAURANT,False,False,...,1.0,24.0,6,2019,2019-04-07T16:05:59.000,,,,,
1,11642710,JC209088,2019-03-31 01:00:00,015XX N WELLS ST,890,THEFT,FROM BUILDING,BAR OR TAVERN,False,False,...,27.0,8.0,6,2019,2019-04-07T16:05:59.000,,,,,
2,11642709,JC208560,2019-03-31 11:00:00,0000X E MONROE ST,890,THEFT,FROM BUILDING,HOTEL/MOTEL,False,False,...,42.0,32.0,6,2019,2019-04-07T16:05:59.000,,,,,
3,11641640,JC207466,2019-03-31 12:30:00,037XX W Ogden Ave,820,THEFT,$500 AND UNDER,ATHLETIC CLUB,False,False,...,24.0,29.0,6,2019,2019-04-07T16:05:59.000,,,,,


In [7]:
com_trend = pd.read_csv("../assets/maps/community_trend.csv")

In [8]:
com_trend

Unnamed: 0,Community,Most Frequent Crime,total_crimes,total_arrests,Arrest Rate
0,ALBANY PARK,THEFT,12779,1423,11.14
1,ARCHER HEIGHTS,THEFT,5095,451,8.85
2,ARMOUR SQUARE,THEFT,5971,732,12.26
3,ASHBURN,THEFT,12383,981,7.92
4,AUBURN GRESHAM,BATTERY,38415,6210,16.17
...,...,...,...,...,...
72,WEST LAWN,THEFT,9308,950,10.21
73,WEST PULLMAN,BATTERY,21227,3202,15.08
74,WEST RIDGE,THEFT,20482,1936,9.45
75,WEST TOWN,THEFT,39384,3425,8.70


In [9]:
community_df = pd.read_csv('../assets/maps/comm-areas.csv')
community_df.columns

Index(['the_geom', 'PERIMETER', 'AREA', 'COMAREA_', 'COMAREA_ID', 'AREA_NUMBE',
       'COMMUNITY', 'AREA_NUM_1', 'SHAPE_AREA', 'SHAPE_LEN'],
      dtype='object')

In [10]:
crime_df = crime_df.rename(columns={'community_area': 'community_number'})
community_df = community_df.rename(columns={'AREA_NUM_1': 'community_number','COMMUNITY': 'community_name'})
crime_df = crime_df.merge(community_df[['community_name', 'community_number']], on='community_number', how='left')

In [11]:
crime_df = crime_df.merge(com_trend, left_on='community_name',right_on="Community", how='right')

In [12]:
crime_df = crime_df.drop('community_name', axis=1)

In [13]:
crime_df.columns

Index(['id', 'case_number', 'date', 'block', 'iucr', 'primary_type',
       'description', 'location_description', 'arrest', 'domestic', 'beat',
       'district', 'ward', 'community_number', 'fbi_code', 'year',
       'updated_on', 'x_coordinate', 'y_coordinate', 'latitude', 'longitude',
       'location', 'Community', 'Most Frequent Crime', 'total_crimes',
       'total_arrests', 'Arrest Rate'],
      dtype='object')

In [14]:
crime_df.to_csv('D:/DPA/Project/dataset/crime_df.csv',index=False)