# Coding Discussion 02
### Ella Zhang

## Preparation

In [1]:
# Load the package
import pandas as pd

# Read in the data
dat = pd.read_csv("chicago_summer_2018_crime_data.csv")

# Explore the data
dat.sample(5)

Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
9445,8,22,2018,Wednesday,TO PROPERTY,RESIDENCE,090XX S MUSKEGON AVE,CRIMINAL DAMAGE,4,10.0,False,False,41.730865,-87.555424
60667,6,18,2018,Monday,RETAIL THEFT,DRUG STORE,047XX N KEDZIE AVE,THEFT,17,33.0,False,False,41.96757,-87.708449
72969,6,1,2018,Friday,FINANCIAL IDENTITY THEFT OVER $ 300,,086XX S CARPENTER ST,DECEPTIVE PRACTICE,6,21.0,False,False,41.737261,-87.64973
62234,6,16,2018,Saturday,OBSTRUCTING IDENTIFICATION,STREET,082XX S MORGAN ST,INTERFERENCE WITH PUBLIC OFFICER,6,21.0,True,False,41.744505,-87.648719
53484,6,27,2018,Wednesday,TO STATE SUP LAND,CTA TRAIN,046XX N WESTERN AVE,CRIMINAL TRESPASS,19,47.0,True,False,41.966108,-87.688828


## Task

#### Using the data wrangling methods, create a new data frame where:
- the unit of observation is the crime type (i.e. primary_type),
- the column variables corresponds with the day of the month, and
- each cell is populated by the proportion of times that crime type was committed over all days of the month

### Count the number of crime types committed on different days

In [2]:
dat_small = (dat
             .filter(['primary_type','day'])  # Select primary_type and day
             .groupby(['primary_type','day']) # Group by two variables
             .size()                          # Count the number of observations
             .to_frame('count')               # Save counts to a new column
             .reset_index()
            )

# Explore dat_small
dat_small.sample(5)

Unnamed: 0,primary_type,day,count
101,BURGLARY,9,107
31,ASSAULT,1,207
609,PROSTITUTION,20,1
13,ARSON,14,3
56,ASSAULT,26,174


### Calculate the proportion of times that crime type was committed over all days of the month

In [3]:
def prop(x):
    """ Calculte the proportion of a variable over its sum """
    return x / x.sum()

proportion = (dat_small
              .groupby('primary_type') # Group by primary_type
              ['count']                # Select count
              .transform(prop)         # Perform prop function across the group
              .round(2)                # Round to the second decimal place
             )

# Save the proportion result to a new column
dat_small['proportion'] = proportion

# Explore updated dat_small
dat_small.sample(5)

Unnamed: 0,primary_type,day,count,proportion
542,OFFENSE INVOLVING CHILDREN,14,21,0.04
775,WEAPONS VIOLATION,5,64,0.04
484,NARCOTICS,11,83,0.03
359,INTERFERENCE WITH PUBLIC OFFICER,22,14,0.04
426,LIQUOR LAW VIOLATION,10,3,0.04


In [4]:
# Alter the structure from long to wide
newdf = (dat_small
         .pivot_table
         (values = 'proportion',
          index = 'primary_type',
          columns = 'day',
          fill_value = 0) # Fill in missing values with zeros
        )

# Print the final data frame
newdf

day,1,2,3,4,5,6,7,8,9,10,...,22,23,24,25,26,27,28,29,30,31
primary_type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ARSON,0.04,0.03,0.03,0.02,0.04,0.05,0.04,0.04,0.02,0.02,...,0.04,0.01,0.05,0.01,0.02,0.01,0.03,0.05,0.03,0.03
ASSAULT,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.02
BATTERY,0.04,0.04,0.03,0.04,0.03,0.03,0.03,0.03,0.03,0.03,...,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.02
BURGLARY,0.04,0.03,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,...,0.04,0.03,0.04,0.03,0.03,0.04,0.03,0.03,0.03,0.02
CONCEALED CARRY LICENSE VIOLATION,0.05,0.02,0.05,0.05,0.02,0.05,0.05,0.0,0.02,0.05,...,0.02,0.0,0.05,0.07,0.07,0.02,0.02,0.0,0.02,0.05
CRIM SEXUAL ASSAULT,0.06,0.02,0.04,0.05,0.04,0.04,0.03,0.04,0.03,0.03,...,0.03,0.03,0.02,0.03,0.05,0.03,0.03,0.03,0.03,0.01
CRIMINAL DAMAGE,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,0.03,...,0.04,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.03,0.02
CRIMINAL TRESPASS,0.04,0.03,0.03,0.03,0.03,0.03,0.03,0.04,0.04,0.03,...,0.03,0.04,0.04,0.03,0.03,0.04,0.04,0.03,0.03,0.02
DECEPTIVE PRACTICE,0.04,0.04,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03,...,0.03,0.03,0.03,0.03,0.03,0.04,0.03,0.03,0.03,0.03
GAMBLING,0.07,0.03,0.02,0.01,0.03,0.02,0.03,0.03,0.05,0.04,...,0.02,0.02,0.01,0.04,0.03,0.01,0.02,0.03,0.03,0.03
