### Coding Discussion 3: Chicago Summer 2018 Crime Data
#### Zixun Hao 

#### Preparation 

In [9]:
import pandas as pd
# load the chicago crime data
dta = pd.read_csv('/Users/zixunhao/Desktop/Fall_2020_GU/Data_Science_I/coding_discussions_ppol564_fall2020/03_coding_discussion/chicago_summer_2018_crime_data.csv')
# take a look at the first 5 observations
dta.head()

Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
0,8,4,2018,Saturday,FROM BUILDING,APARTMENT,039XX W WASHINGTON BLVD,THEFT,11,28.0,False,False,,
1,7,26,2018,Thursday,POCKET-PICKING,RESTAURANT,005XX W MADISON ST,THEFT,1,42.0,False,False,,
2,6,24,2018,Sunday,BOGUS CHECK,GROCERY FOOD STORE,004XX E 34TH ST,DECEPTIVE PRACTICE,2,4.0,False,False,,
3,6,13,2018,Wednesday,SIMPLE,RESIDENCE,098XX S EXCHANGE AVE,ASSAULT,4,10.0,False,True,,
4,6,14,2018,Thursday,TO VEHICLE,STREET,001XX S WALLER AVE,CRIMINAL DAMAGE,15,29.0,False,False,,


#### Tidy the data, focus on crime types and days of the month

In [20]:
# group the data by crime type, month, and day
dta.filter(['primary_type','month','day']) # select only crime type, month, and day columns
dta_dailycrime = dta.groupby(["primary_type","day"]).size()  # group by crime type and day, get data
dta_dailycrime

primary_type       day
ARSON              1       4
                   2       3
                   3       3
                   4       2
                   5       4
                          ..
WEAPONS VIOLATION  27     46
                   28     51
                   29     66
                   30     56
                   31     39
Length: 802, dtype: int64

In [67]:
# calculate percentage of crime happended on each day of the month
percent = dta_dailycrime/dta.groupby(["primary_type"]).size()
# convert the results into data frame 
dta_percent=(percent
               .reset_index() # convert into data frame 
               .round(2) # round the percentage to two decimal point
               .rename(columns={"primary_type":"Primary Crime Type", "day":"Day of the Month", 0: "Percentage"})) # rename the columns
#show data frame
dta_percent

Unnamed: 0,Primary Crime Type,Day of the Month,Percentage
0,ARSON,1,0.04
1,ARSON,2,0.03
2,ARSON,3,0.03
3,ARSON,4,0.02
4,ARSON,5,0.04
...,...,...,...
797,WEAPONS VIOLATION,27,0.03
798,WEAPONS VIOLATION,28,0.03
799,WEAPONS VIOLATION,29,0.04
800,WEAPONS VIOLATION,30,0.03


#### Reshape the data from long to wide format

In [68]:
# Pivot the table of daily crime percentage to wide format, replace the missing data NaN with 0
dta_percent.pivot_table(values='Percentage',columns='Day of the Month', index='Primary Crime Type', fill_value=0)


Day of the Month,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
Primary Crime Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARSON,0.04,0.03,0.03,0.02,0.04,0.05,0.04,0.04,0.02,0.02,...,0.04,0.01,0.05,0.01,0.02,0.01,0.03,0.05,0.03,0.03
ASSAULT,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.02
BATTERY,0.04,0.04,0.03,0.04,0.03,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.02
BURGLARY,0.04,0.03,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,...,0.04,0.03,0.04,0.03,0.03,0.04,0.03,0.03,0.03,0.02
CONCEALED CARRY LICENSE VIOLATION,0.05,0.02,0.05,0.05,0.02,0.05,0.05,0.0,0.02,0.05,...,0.02,0.0,0.05,0.07,0.07,0.02,0.02,0.0,0.02,0.05
CRIM SEXUAL ASSAULT,0.06,0.02,0.04,0.05,0.04,0.04,0.03,0.04,0.03,0.03,...,0.03,0.03,0.02,0.03,0.05,0.03,0.03,0.03,0.03,0.01
CRIMINAL DAMAGE,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,...,0.04,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.02
CRIMINAL TRESPASS,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.04,0.04,0.03,...,0.03,0.04,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.02
DECEPTIVE PRACTICE,0.04,0.04,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,...,0.03,0.03,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03
GAMBLING,0.07,0.03,0.02,0.01,0.03,0.02,0.03,0.03,0.05,0.04,...,0.02,0.02,0.01,0.04,0.03,0.01,0.02,0.03,0.03,0.03
