Loading the data and setting thing up:

In [3]:
import pandas as pd
from dfply import *
crimedata = pd.read_csv("chicago_summer_2018_crime_data.csv")

Taking a peek at the dataset

In [4]:
crimedata.head()

Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
0,8,4,2018,Saturday,FROM BUILDING,APARTMENT,039XX W WASHINGTON BLVD,THEFT,11,28.0,False,False,,
1,7,26,2018,Thursday,POCKET-PICKING,RESTAURANT,005XX W MADISON ST,THEFT,1,42.0,False,False,,
2,6,24,2018,Sunday,BOGUS CHECK,GROCERY FOOD STORE,004XX E 34TH ST,DECEPTIVE PRACTICE,2,4.0,False,False,,
3,6,13,2018,Wednesday,SIMPLE,RESIDENCE,098XX S EXCHANGE AVE,ASSAULT,4,10.0,False,True,,
4,6,14,2018,Thursday,TO VEHICLE,STREET,001XX S WALLER AVE,CRIMINAL DAMAGE,15,29.0,False,False,,


Creating a new data frame with with only the crimes from month of August included.

In [6]:
crimeaugust = crimedata[crimedata['month']==8]
crimeaugust.head()

Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
0,8,4,2018,Saturday,FROM BUILDING,APARTMENT,039XX W WASHINGTON BLVD,THEFT,11,28.0,False,False,,
11,8,15,2018,Wednesday,$500 AND UNDER,RESIDENCE,074XX S PEORIA ST,THEFT,7,17.0,False,False,,
12,8,11,2018,Saturday,SIMPLE,STREET,107XX S RHODES AVE,BATTERY,5,9.0,False,False,,
13,8,25,2018,Saturday,OVER $500,RESTAURANT,002XX N MICHIGAN AVE,THEFT,1,42.0,False,False,,
14,8,16,2018,Thursday,ILLEGAL USE CASH CARD,BANK,070XX S ASHLAND AVE,DECEPTIVE PRACTICE,7,17.0,False,False,,


Now, we will use the groupby method to extract information of the crime type.

In [87]:
augcrimetype = crimeaugust.groupby(['primary_type', 'day'])['primary_type'].count().reset_index(name='occurence')

In [88]:
print(augcrimetype)

          primary_type  day  occurence
0                ARSON    2          2
1                ARSON    3          1
2                ARSON    4          1
3                ARSON    5          2
4                ARSON    6          3
..                 ...  ...        ...
681  WEAPONS VIOLATION   27         15
682  WEAPONS VIOLATION   28         13
683  WEAPONS VIOLATION   29         12
684  WEAPONS VIOLATION   30         22
685  WEAPONS VIOLATION   31         23

[686 rows x 3 columns]


Now we will use the pivot method to pivot the table to show crime type as rows and days as columns with 0 for missing values, we also assign a new column which will be the sum of all crime occurences for the month.

In [99]:
augtable = augcrimetype.pivot_table(values = 'occurence', index = 'primary_type', columns= 'day', fill_value=0)

augtable = augtable.assign(sum = augtable.sum(axis=1))

augtable

day,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,sum
primary_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARSON,0,2,1,1,2,3,0,3,1,0,...,0,0,1,0,0,0,1,2,2,37
ASSAULT,65,50,52,61,77,51,46,62,49,65,...,60,52,53,62,55,62,63,57,65,1826
BATTERY,155,150,157,202,198,130,122,125,147,137,...,124,133,168,207,144,138,104,127,128,4605
BURGLARY,38,41,30,48,29,48,34,27,44,41,...,36,42,33,22,51,46,38,40,46,1199
CONCEALED CARRY LICENSE VIOLATION,0,0,1,1,0,0,1,0,0,2,...,0,2,2,2,0,0,0,0,1,17
CRIM SEXUAL ASSAULT,9,2,8,11,7,6,4,1,4,7,...,3,2,5,8,2,3,4,5,2,143
CRIMINAL DAMAGE,81,82,74,87,97,74,70,69,75,74,...,87,78,100,93,88,94,78,86,75,2627
CRIMINAL TRESPASS,20,23,14,17,16,21,26,26,13,24,...,19,21,12,23,21,22,18,25,23,595
DECEPTIVE PRACTICE,71,54,78,36,42,62,56,49,41,57,...,40,58,43,27,53,50,46,50,65,1570
GAMBLING,6,3,1,0,0,0,3,1,1,3,...,1,1,1,0,0,1,1,0,2,39


Now we will create a new data frame with values where we divide sum value by each day's occurence of the crime type and the round it two 2 decimal places.

In [116]:
AugCrimeRate = (augtable
                    .div(augtable.iloc[:, -1], axis = 0)
                    .round(2)
               )

Here is the final data frame for the month of August.

In [118]:
AugCrimeRate

day,1,2,3,4,5,6,7,8,9,10,...,23,24,25,26,27,28,29,30,31,sum
primary_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARSON,0.0,0.05,0.03,0.03,0.05,0.08,0.0,0.08,0.03,0.0,...,0.0,0.0,0.03,0.0,0.0,0.0,0.03,0.05,0.05,1.0
ASSAULT,0.04,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,0.04,...,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.04,1.0
BATTERY,0.03,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.04,0.04,0.03,0.03,0.02,0.03,0.03,1.0
BURGLARY,0.03,0.03,0.03,0.04,0.02,0.04,0.03,0.02,0.04,0.03,...,0.03,0.04,0.03,0.02,0.04,0.04,0.03,0.03,0.04,1.0
CONCEALED CARRY LICENSE VIOLATION,0.0,0.0,0.06,0.06,0.0,0.0,0.06,0.0,0.0,0.12,...,0.0,0.12,0.12,0.12,0.0,0.0,0.0,0.0,0.06,1.0
CRIM SEXUAL ASSAULT,0.06,0.01,0.06,0.08,0.05,0.04,0.03,0.01,0.03,0.05,...,0.02,0.01,0.03,0.06,0.01,0.02,0.03,0.03,0.01,1.0
CRIMINAL DAMAGE,0.03,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.04,0.04,0.03,0.04,0.03,0.03,0.03,1.0
CRIMINAL TRESPASS,0.03,0.04,0.02,0.03,0.03,0.04,0.04,0.04,0.02,0.04,...,0.03,0.04,0.02,0.04,0.04,0.04,0.03,0.04,0.04,1.0
DECEPTIVE PRACTICE,0.05,0.03,0.05,0.02,0.03,0.04,0.04,0.03,0.03,0.04,...,0.03,0.04,0.03,0.02,0.03,0.03,0.03,0.03,0.04,1.0
GAMBLING,0.15,0.08,0.03,0.0,0.0,0.0,0.08,0.03,0.03,0.08,...,0.03,0.03,0.03,0.0,0.0,0.03,0.03,0.0,0.05,1.0
