# Coding Discussion 02
### Ella Zhang

## Preparation

In [1]:
# Load the package
import pandas as pd

# Read in the data
dat = pd.read_csv("chicago_summer_2018_crime_data.csv")

# Explore the data
dat.sample(5)

Unnamed: 0,month,day,year,day_of_week,description,location_description,block,primary_type,district,ward,arrest,domestic,latitude,longitude
65247,6,13,2018,Wednesday,DOMESTIC BATTERY SIMPLE,APARTMENT,120XX S LA SALLE ST,BATTERY,5,9.0,True,True,41.675289,-87.626187
69993,6,5,2018,Tuesday,$500 AND UNDER,SIDEWALK,038XX W OHIO ST,THEFT,11,27.0,False,False,41.891741,-87.72172
22725,8,5,2018,Sunday,FROM BUILDING,BAR OR TAVERN,0000X W HUBBARD ST,THEFT,18,42.0,False,False,41.890052,-87.628914
13777,8,16,2018,Thursday,CREDIT CARD FRAUD,AIRPORT BUILDING NON-TERMINAL - SECURE AREA,005XX W OHARE ST,DECEPTIVE PRACTICE,16,41.0,True,False,41.985413,-87.883781
14208,8,9,2018,Thursday,SIMPLE,OTHER,001XX N STATE ST,BATTERY,1,42.0,False,False,41.883475,-87.627877


## Task

#### Using the data wrangling methods, create a new data frame where:
- the unit of observation is the crime type (i.e. primary_type),
- the column variables corresponds with the day of the month, and
- each cell is populated by the proportion of times that crime type was committed over all days of the month

### Count the number of crime types committed on different days

In [2]:
dat_small = (dat
             .filter(['primary_type','day'])  # Select primary_type and day
             .groupby(['primary_type','day']) # Group by two variables
             .size()                          # Count the number of observations
             .to_frame('count')               # Save counts to a new column
             .reset_index()
            )

# Explore dat_small
dat_small.sample(5)

Unnamed: 0,primary_type,day,count
759,THEFT,20,629
642,PUBLIC PEACE VIOLATION,18,13
120,BURGLARY,28,113
39,ASSAULT,9,161
700,SEX OFFENSE,14,8


### Calculate the proportion of times that crime type was committed over all days of the month

In [3]:
def prop(x):
    """ Calculte the proportion of a variable over its sum """
    return x / x.sum()

proportion = (dat_small
              .groupby('primary_type') # Group by primary_type
              ['count']                # Select count
              .transform(prop)         # Perform prop function across the group
              .round(2)                # Round to the second decimal place
             )

# Save the proportion result to a new column
dat_small['proportion'] = proportion

# Explore updated dat_small
dat_small.sample(5)

Unnamed: 0,primary_type,day,count,proportion
733,STALKING,23,5,0.09
380,INTIMIDATION,15,2,0.04
509,NON-CRIMINAL,29,1,0.12
245,DECEPTIVE PRACTICE,2,171,0.04
584,OTHER OFFENSE,25,151,0.03


In [4]:
# Alter the structure from long to wide
newdf = (dat_small
         .pivot_table
         (values = 'proportion',
          index = 'primary_type',
          columns = 'day',
          fill_value = 0) # Fill in missing values with zeros
        )

# Print the final data frame
print(newdf)

day                                  1     2     3     4     5     6     7   \
primary_type                                                                  
ARSON                              0.04  0.03  0.03  0.02  0.04  0.05  0.04   
ASSAULT                            0.04  0.03  0.03  0.04  0.04  0.03  0.03   
BATTERY                            0.04  0.04  0.03  0.04  0.03  0.03  0.03   
BURGLARY                           0.04  0.03  0.03  0.03  0.03  0.04  0.03   
CONCEALED CARRY LICENSE VIOLATION  0.05  0.02  0.05  0.05  0.02  0.05  0.05   
CRIM SEXUAL ASSAULT                0.06  0.02  0.04  0.05  0.04  0.04  0.03   
CRIMINAL DAMAGE                    0.03  0.03  0.03  0.03  0.03  0.03  0.03   
CRIMINAL TRESPASS                  0.04  0.03  0.03  0.03  0.03  0.03  0.03   
DECEPTIVE PRACTICE                 0.04  0.04  0.03  0.03  0.03  0.04  0.03   
GAMBLING                           0.07  0.03  0.02  0.01  0.03  0.02  0.03   
HOMICIDE                           0.02  0.00  0.03 