In [1]:
## Program to reformat crime data and prensent the proportion of particular crime on each day of a month 

## Importing required libraries 
import pandas as pd

In [2]:
## Reading in the data 
data = pd.read_csv("chicago_summer_2018_crime_data.csv")

print("Displaying the head of the data : ")
data.head(10)

Displaying the head of the data : 


Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
0,8,4,2018,Saturday,FROM BUILDING,APARTMENT,039XX W WASHINGTON BLVD,THEFT,11,28.0,False,False,,
1,7,26,2018,Thursday,POCKET-PICKING,RESTAURANT,005XX W MADISON ST,THEFT,1,42.0,False,False,,
2,6,24,2018,Sunday,BOGUS CHECK,GROCERY FOOD STORE,004XX E 34TH ST,DECEPTIVE PRACTICE,2,4.0,False,False,,
3,6,13,2018,Wednesday,SIMPLE,RESIDENCE,098XX S EXCHANGE AVE,ASSAULT,4,10.0,False,True,,
4,6,14,2018,Thursday,TO VEHICLE,STREET,001XX S WALLER AVE,CRIMINAL DAMAGE,15,29.0,False,False,,
5,7,2,2018,Monday,CREDIT CARD FRAUD,RESIDENCE,083XX S JUSTINE ST,DECEPTIVE PRACTICE,6,21.0,False,False,,
6,6,1,2018,Friday,PREDATORY,RESIDENCE,087XX S COLFAX AVE,CRIM SEXUAL ASSAULT,4,7.0,False,False,,
7,7,25,2018,Wednesday,OVER $500,RESIDENCE,046XX S LAKE PARK AVE,THEFT,2,4.0,False,False,,
8,7,27,2018,Friday,CRIM SEX ABUSE BY FAM MEMBER,RESIDENCE,004XX E 40TH ST,OFFENSE INVOLVING CHILDREN,2,3.0,False,False,,
9,7,24,2018,Tuesday,FINANCIAL IDENTITY THEFT OVER $ 300,RESIDENCE,053XX S CORNELL AVE,DECEPTIVE PRACTICE,2,5.0,False,False,,


In [15]:
##Data-frame containing the incidents of a particular crime each day 

day_data = (data
            .filter(["primary_type","month","day"]) ## Filtering required columns
            .groupby(["primary_type","month","day"]) ##Grouping by required columns
            .agg({"day": "size"})  ##Getting the no. of days for each crime 
            .rename(columns = {"day" : "CountEachDay"}) ##Renaming column name 
            .reset_index())  ##To avoid losing the groupby columns

print("Printing the data frame : ")
print(day_data.head(10))




Printing the data frame : 
  primary_type  month  day  CountEachDay
0        ARSON      6    1             2
1        ARSON      6    2             1
2        ARSON      6    3             2
3        ARSON      6    4             1
4        ARSON      6    5             1
5        ARSON      6    6             1
6        ARSON      6    7             2
7        ARSON      6    8             1
8        ARSON      6   10             1
9        ARSON      6   11             3


In [16]:
##Data-frame containing the incidents of a particular crime in a month 
month_data = (data
              .filter(["primary_type","month", "day"])  ## Filtering required columns
              .groupby(["primary_type", "month"])  ##Grouping by required columns
              .agg({"day": "size"})  ##Getting the no. of days for each crime in a month 
              .rename(columns = {"day" : "Count_Month"}) ##Renaming column name 
              .reset_index())  ##To avoid losing the groupby columns 

print("Printing the data frame : ")
print(month_data.head(10))



Printing the data frame : 
  primary_type  month  Count_Month
0        ARSON      6           36
1        ARSON      7           39
2        ARSON      8           37
3      ASSAULT      6         1872
4      ASSAULT      7         1937
5      ASSAULT      8         1826
6      BATTERY      6         4635
7      BATTERY      7         4871
8      BATTERY      8         4605
9     BURGLARY      6         1056


In [25]:
## Joining the two created data-frames 
joined_data = day_data.merge(month_data,how = "left", on=["primary_type", "month"])

## Calculating the percentage on each day 
joined_data['PercentofCrime'] = round(joined_data['CountEachDay']/joined_data['Count_Month'],2)
joined_data = joined_data.drop(columns=["CountEachDay", "Count_Month"]) ##Dropping unnecessary columns

print("Printing joined data : ")
print(joined_data.head(10))




Printing joined data : 
  primary_type  month  day  PercentofCrime
0        ARSON      6    1            0.06
1        ARSON      6    2            0.03
2        ARSON      6    3            0.06
3        ARSON      6    4            0.03
4        ARSON      6    5            0.03
5        ARSON      6    6            0.03
6        ARSON      6    7            0.06
7        ARSON      6    8            0.03
8        ARSON      6   10            0.03
9        ARSON      6   11            0.08


In [26]:
## Creating new data-frame with pivot table and filling missing values with 0 
transformed_data = joined_data.pivot_table(values = "PercentofCrime", columns = "day", index = ["primary_type", "month"], 
                                          fill_value = 0)

print("The created data-frame is : ")
transformed_data





The created data-frame is : 


Unnamed: 0_level_0,day,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
primary_type,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
ARSON,6,0.06,0.03,0.06,0.03,0.03,0.03,0.06,0.03,0.00,0.03,...,0.06,0.03,0.14,0.00,0.03,0.03,0.00,0.03,0.03,0.00
ARSON,7,0.05,0.00,0.00,0.00,0.03,0.05,0.08,0.03,0.03,0.03,...,0.08,0.00,0.03,0.00,0.03,0.00,0.08,0.10,0.00,0.03
ARSON,8,0.00,0.05,0.03,0.03,0.05,0.08,0.00,0.08,0.03,0.00,...,0.00,0.00,0.00,0.03,0.00,0.00,0.00,0.03,0.05,0.05
ASSAULT,6,0.04,0.04,0.03,0.04,0.03,0.04,0.03,0.03,0.03,0.02,...,0.03,0.03,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.00
ASSAULT,7,0.04,0.04,0.03,0.03,0.03,0.04,0.03,0.04,0.03,0.03,...,0.03,0.03,0.04,0.03,0.03,0.03,0.02,0.04,0.02,0.04
ASSAULT,8,0.04,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,0.04,...,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.04
BATTERY,6,0.04,0.04,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.04,...,0.02,0.03,0.04,0.04,0.03,0.03,0.03,0.03,0.04,0.00
BATTERY,7,0.04,0.03,0.03,0.05,0.03,0.03,0.04,0.05,0.03,0.03,...,0.04,0.03,0.03,0.03,0.03,0.02,0.03,0.04,0.03,0.03
BATTERY,8,0.03,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.03,0.04,0.04,0.03,0.03,0.02,0.03,0.03
BURGLARY,6,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.02,0.03,...,0.04,0.03,0.04,0.04,0.04,0.05,0.04,0.03,0.03,0.00
