In [12]:
import numpy as np
import pandas as pd

In [13]:
crimes = pd.read_csv('./csv/chicago_crimes_2021.csv')
weather = pd.read_csv('./csv/chicago_temps_2021.csv')
community = pd.read_csv('./csv/chicago_areas.csv')

In [14]:
crimes.sample()

Unnamed: 0,crime_date,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,community_area_id,latitude,longitude
166087,2021-10-25 17:00:00,bittersweet pl,theft,$500 and under,street,False,False,3,41.955683,-87.648044


Let's split the timestamp into seperate columns of 'date' and 'time'.  Then drop the original 'timestamp' and reorder the columns.

In [15]:
# Create new date and time column by extracting from timestamp
crimes['date'] = pd.to_datetime(crimes['crime_date']).dt.date
crimes['time'] = pd.to_datetime(crimes['crime_date']).dt.time
# Drop the original timestamp
crimes = crimes.drop(['crime_date'], axis=1)
# Reorder columns
crimes = crimes[['date', 'time', 'city_block', 'crime_primary_type',	'crime_primary_description', 'crime_location_description',	'arrest', 'domestic', 'community_area_id',	'latitude', 'longitude']]

We will also capitilize the first letter of every word.

In [16]:
# Capitilize the first letter of every word
crimes['city_block'] = crimes['city_block'].str.title()
crimes['crime_primary_type'] = crimes['crime_primary_type'].str.title()
crimes['crime_primary_description'] = crimes['crime_primary_description'].str.title()
crimes['crime_location_description'] = crimes['crime_location_description'].str.title()

In [17]:
crimes['arrest']=crimes['arrest'].replace([True, False], ['Yes', 'No'])
crimes['domestic']=crimes['arrest'].replace([True, False], ['Yes', 'No'])

In [18]:
crimes.sample(10)

Unnamed: 0,date,time,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,community_area_id,latitude,longitude
46732,2021-04-08,12:46:00,Ogden Ave,Deceptive Practice,Credit Card Fraud,Bank,Yes,Yes,28,41.879682,-87.668201
9473,2021-01-20,18:48:00,Champlain Ave,Battery,Domestic Battery Simple,Residence,No,No,50,41.686553,-87.607078
172584,2021-11-05,20:26:00,Lamon Ave,Weapons Violation,Unlawful Possession - Handgun,Sidewalk,Yes,Yes,25,41.883853,-87.747847
168313,2021-08-24,12:00:00,Hoyne Ave,Deceptive Practice,Illegal Use Cash Card,Alley,No,No,59,41.835039,-87.67787
121130,2021-08-14,12:25:00,14Th Pl,Assault,Simple,Street,No,No,28,41.862416,-87.662205
23358,2021-02-21,13:58:00,Jarvis Ave,Battery,Simple,Sidewalk,No,No,1,42.015962,-87.671939
79088,2021-06-06,15:25:00,Keeler Ave,Battery,Domestic Battery Simple,Apartment,No,No,57,41.803247,-87.72835
110924,2021-07-28,17:54:00,Highland Ave,Other Offense,Violate Order Of Protection,Residence,Yes,Yes,77,41.997305,-87.66633
158023,2021-10-12,16:00:00,Damen Ave,Criminal Damage,To Vehicle,Street,No,No,31,41.851949,-87.675833
44793,2021-04-04,21:47:00,88Th St,Weapons Violation,Unlawful Possession - Handgun,Street,Yes,Yes,71,41.733948,-87.664636


Chicago neighborhoods are specifically named and have their own COMMUNITY AREA ID.  <br>
Below is a dataframe with the id's,  community names, population, area (sq. miles) and density (population / area)

In [19]:
community.sample(5)

Unnamed: 0,community_area_id,name,population,area_sq_mi,density
62,63,gage park,39540,2.2,17972.73
70,71,auburn gresham,44878,3.77,11903.98
65,66,chicago lawn,55931,3.53,15844.48
2,3,uptown,57182,2.32,24647.41
53,54,riverdale,7262,3.53,2057.22


In [20]:
merged = pd.merge(crimes, community, on='community_area_id')
crimes = merged

In [21]:
crimes.sample(5)

Unnamed: 0,date,time,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,community_area_id,latitude,longitude,name,population,area_sq_mi,density
7504,2021-07-27,18:02:00,Ingleside Ave,Battery,Aggravated - Handgun,Sidewalk,No,No,44,41.748984,-87.601482,chatham,31710,2.95,10749.15
169461,2021-04-15,02:00:00,Union Ave,Battery,Domestic Battery Simple,Residence,No,No,73,41.726423,-87.640921,washington heights,25065,2.86,8763.99
126057,2021-04-26,09:00:00,Lowe Ave,Deceptive Practice,Credit Card Fraud,Residence,No,No,49,41.69176,-87.63873,roseland,38816,4.82,8053.11
193489,2021-01-29,16:00:00,Cicero Ave,Deceptive Practice,Theft Of Lost / Mislaid Property,Department Store,No,No,65,41.754593,-87.741529,west lawn,33662,2.95,11410.85
118166,2021-05-10,12:20:00,Campbell Ave,Other Offense,Gun Offender - Duty To Register,Residence,Yes,Yes,66,41.771113,-87.685857,chicago lawn,55931,3.53,15844.48


In [22]:
crimes.rename(columns={'name':'community_name'})
crimes = crimes.drop(['community_area_id'], axis=1)

In [23]:
crimes.sample()

Unnamed: 0,date,time,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,latitude,longitude,name,population,area_sq_mi,density
141401,2021-11-24,12:30:00,Washtenaw Ave,Criminal Damage,To Vehicle,Street,No,No,41.990896,-87.69698,west ridge,77122,3.53,21847.59


We will also add the day of the week, teperature high's, low's and if there was any precipitation that day.

In [24]:
weather.sample()

Unnamed: 0,day_of_week,date,high_temp_f,low_temp_f,precipitation_in
77,fri,2021-10-15 0:00:00,62,51,0.02


In [25]:
weather['date'] = pd.to_datetime(weather['date']).dt.date

In [26]:
weather.sample()

Unnamed: 0,day_of_week,date,high_temp_f,low_temp_f,precipitation_in
355,sun,2021-01-10,29,24,


In [27]:
merged = pd.merge(crimes, weather, on='date')
crimes = merged

In [28]:
crimes.sample()

Unnamed: 0,date,time,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,latitude,longitude,name,population,area_sq_mi,density,day_of_week,high_temp_f,low_temp_f,precipitation_in
77412,2021-05-28,17:30:00,Franklin Blvd,Deceptive Practice,Financial Identity Theft Over $ 300,Other (Specify),No,No,41.890002,-87.712581,humboldt park,54165,3.6,15045.83,fri,49,44,0.29


Replace 'NaN' with 'No' if there wasnt any precipitation that day.

In [29]:
crimes['precipitation_in']=crimes['precipitation_in'].fillna('No')

In [30]:
crimes.sample()

Unnamed: 0,date,time,city_block,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,latitude,longitude,name,population,area_sq_mi,density,day_of_week,high_temp_f,low_temp_f,precipitation_in
53620,2021-04-17,00:01:00,Michigan Ave,Battery,Aggravated - Handgun,Street,No,No,41.764621,-87.62167,greater grand crossing,31471,3.55,8865.07,sat,56,35,No


Reorder the columns

In [31]:
crimes = crimes[['date', 'time',  'day_of_week', 'high_temp_f', 'low_temp_f', 'precipitation_in', 'crime_primary_type', 'crime_primary_description', 'crime_location_description', 'arrest', 'domestic', 'city_block', 'name', 'population', 'area_sq_mi', 'density', 'latitude', 'longitude']]

In [32]:
crimes.sample()

Unnamed: 0,date,time,day_of_week,high_temp_f,low_temp_f,precipitation_in,crime_primary_type,crime_primary_description,crime_location_description,arrest,domestic,city_block,name,population,area_sq_mi,density,latitude,longitude
103605,2021-07-13,15:45:00,tue,80,68,No,Theft,$500 And Under,Street,No,No,Artesian Ave,beverly,20027,3.18,6297.8,41.711419,-87.683054


Rename the columns

In [33]:
crimes.rename(columns = 
 	{'date' : 'Date',
 	'time' : 'Time',
 	'day_of_week' : 'Weekday',
 	'high_temp_f' : 'Hi (f)',
 	'low_temp_f' : 'Lo (f)',
 	'precipitation_in' : 'Precipitation',
 	'crime_primary_type' : 'Type',
 	'crime_primary_description' : 'Description',
 	'crime_location_description' : 'Location',
 	'arrest' : 'Arrest',
 	'domestic' : 'Domestic',
 	'city_block' : 'Street',
 	'name' : 'Community',
 	'population' : 'Population',
 	'area_sq_mi' : 'Area',
 	'density' : 'Density',
 	'latitude' : 'Latitude',
 	'longitude' : 'Longitude'},
	inplace=True)

In [34]:
crimes.sample()

Unnamed: 0,Date,Time,Weekday,Hi (f),Lo (f),Precipitation,Type,Description,Location,Arrest,Domestic,Street,Community,Population,Area,Density,Latitude,Longitude
183893,2021-11-20,19:46:00,sat,50,36,No,Criminal Damage,To Property,Apartment,No,No,Sheffield Ave,lake view,103050,3.12,33028.85,41.944801,-87.654255


In [36]:
crimes['Community'] = crimes['Community'].str.title()
crimes['Weekday'] = crimes['Weekday'].str.title()

In [39]:
crimes.sample(5)

Unnamed: 0,Date,Time,Weekday,Hi (f),Lo (f),Precipitation,Type,Description,Location,Arrest,Domestic,Street,Community,Population,Area,Density,Latitude,Longitude
165937,2021-10-21,16:26:00,Thu,65,44,No,Battery,Simple,Residence,No,No,Corliss Ave,Pullman,6820,2.12,3216.98,41.703254,-87.60389
186155,2021-11-25,15:00:00,Thu,48,23,No,Assault,Aggravated - Handgun,Street,No,No,Austin Blvd,Austin,96557,7.15,13504.48,41.887869,-87.774906
60234,2021-04-30,08:16:00,Fri,56,42,No,Deceptive Practice,Fraud Or Confidence Game,Apartment,No,No,Illinois St,Near North Side,105481,2.74,38496.72,41.890733,-87.638
166432,2021-11-19,00:18:00,Fri,40,26,No,Criminal Damage,Criminal Defacement,Apartment,No,No,Clark St,Lake View,103050,3.12,33028.85,41.940578,-87.651183
183559,2021-11-20,13:30:00,Sat,50,36,No,Criminal Damage,To Property,Apartment,No,No,74Th St,Greater Grand Crossing,31471,3.55,8865.07,41.76012,-87.621137


In [87]:
crimes['Type'].value_counts().iloc[: 20]

Battery                             39988
Theft                               39758
Criminal Damage                     24716
Assault                             20086
Deceptive Practice                  15710
Other Offense                       13588
Motor Vehicle Theft                 10410
Weapons Violation                    8865
Robbery                              7813
Burglary                             6546
Narcotics                            4072
Criminal Trespass                    3367
Offense Involving Children           1839
Criminal Sexual Assault              1428
Sex Offense                          1036
Homicide                              803
Public Peace Violation                596
Arson                                 515
Stalking                              356
Interference With Public Officer      307
Name: Type, dtype: int64

In [92]:
crimes['Description'].value_counts().iloc[: 25]

Simple                                     24283
Domestic Battery Simple                    19367
$500 And Under                             15654
Over $500                                  12630
To Property                                12208
To Vehicle                                 12004
Automobile                                  9484
Aggravated - Handgun                        7257
Retail Theft                                6085
Unlawful Possession - Handgun               5481
Financial Identity Theft $300 And Under     5104
From Building                               3734
Forcible Entry                              3678
Telephone Threat                            3649
Financial Identity Theft Over $ 300         2731
Aggravated - Other Dangerous Weapon         2705
Armed - Handgun                             2568
Reckless Firearm Discharge                  2457
Harassment By Telephone                     2307
Unlawful Entry                              2266
Harassment By Electr

In [88]:
crimes['Location'].value_counts().iloc[: 20]

Street                                    51310
Apartment                                 43253
Residence                                 31081
Sidewalk                                  11687
Parking Lot / Garage (Non Residential)     6324
Small Retail Store                         5300
Alley                                      4694
Restaurant                                 3650
Residence - Porch / Hallway                2932
Gas Station                                2921
Commercial / Business Office               2804
Other (Specify)                            2719
Vehicle Non-Commercial                     2585
Department Store                           2402
Residence - Yard (Front / Back)            2349
Residence - Garage                         2310
Grocery Food Store                         1948
Park Property                              1463
Hotel / Motel                              1199
Bar Or Tavern                              1136
Name: Location, dtype: int64

In [93]:
crimes['Date'].value_counts().iloc[: 20]

2021-07-31    779
2021-10-01    752
2021-06-06    715
2021-08-01    713
2021-06-19    710
2021-09-19    707
2021-07-29    704
2021-01-01    701
2021-10-02    695
2021-06-20    692
2021-10-03    685
2021-06-27    685
2021-09-13    680
2021-07-04    679
2021-09-01    676
2021-06-08    674
2021-09-16    670
2021-05-01    670
2021-07-01    668
2021-06-13    668
Name: Date, dtype: int64