In [1]:
import csv, os

In [2]:
from datetime import datetime 

In [3]:
import pandas as pd 

In [4]:
from collections import defaultdict

In [5]:
import altair as alt

In [6]:
data_dir = os.environ['DATA_DIR'] 
csv_file = os.path.join(data_dir, 'PortlandData.csv') 
portland_crimes = pd.read_csv(csv_file, parse_dates = ["OccurDate"])
portland_crimes = portland_crimes[portland_crimes.OccurDate >= datetime.strptime('1/1/2020', '%m/%d/%Y')].reset_index()

In [121]:
#Separate crimes before and after Measure 110 into two separate data frames
enactment_date = datetime.strptime('2/1/2021', "%m/%d/%Y")
pre_crimes = portland_crimes[portland_crimes.OccurDate < enactment_date]
post_crimes = portland_crimes[portland_crimes.OccurDate >= enactment_date]
pre_enactment_crime_count = pre_crimes.OffenseCategory.value_counts()
post_enactment_crime_count = post_crimes.OffenseCategory.value_counts()

In [122]:
pre_enactment_crime_count

Larceny Offenses                24640
Vandalism                        8967
Assault Offenses                 8498
Motor Vehicle Theft              7062
Burglary                         5915
Fraud Offenses                   3903
Robbery                          1085
Drug/Narcotic Offenses           1018
Weapon Law Violations             876
Arson                             560
Counterfeiting/Forgery            532
Sex Offenses                      490
Pornography/Obscene Material       89
Kidnapping/Abduction               72
Stolen Property Offenses           63
Homicide Offenses                  60
Embezzlement                       58
Extortion/Blackmail                33
Animal Cruelty Offenses            29
Prostitution Offenses              27
Human Trafficking Offenses         22
Name: OffenseCategory, dtype: int64

In [8]:
def create_default_dict(ddict, pre_count_by_crime, post_count_by_crime):
    for d in (pre_count_by_crime, post_count_by_crime):
        for key, value in d.items():
            ddict[key].append(value)
    return ddict
    

In [9]:
#Calculate percent change in crime comparing 2020 to measure 110 enactment, and measure 110 enactment to YTD 
def calculate_per_change(values): 
    for key, value in values.items():
            percentage = ((value[1] - value[0]) / value[0])
            values[key].append("{0:.2%}".format(percentage))
    return values
    

In [10]:
def count_by_offense_type(dd, per_change):
    df = pd.DataFrame.from_dict(dd, orient = 'index',
                                             columns = ["Pre-Measure 110", "Post-Measure 110", "Percent Change"] ) 
    df.index=dd.keys()
    return df 

In [11]:
#Crime By Count pre vs post measure 110

pre_vs_post_dict = create_default_dict(defaultdict(list), pre_enactment_crime_count, post_enactment_crime_count) 
percent_change = calculate_per_change(pre_vs_post_dict) 
pre_vs_post = count_by_offense_type(pre_vs_post_dict, percent_change)
#summary_line = pd.DataFrame[pre_vs_post["Pre-Measure 110"].sum(), pre_vs_post["Post-Measure 110"].sum() , ((pre_vs_post["Post-Measure 110"].sum())/pre_vs_post["Pre-Measure 110"].sum())/pre_vs_post["Pre-Measure 110"].sum(), index = ["Total"]]

In [123]:
#Group pre-crimes and post crimes by month per year 
pre_crimes = portland_crimes[portland_crimes.OccurDate < enactment_date].reset_index()
post_crimes = portland_crimes[portland_crimes.OccurDate >= enactment_date].reset_index()
pre_crimes_by_month = pre_crimes.MonthYear.value_counts().reset_index()
post_crimes_by_month = post_crimes.MonthYear.value_counts().reset_index()
pre_drug_crimes = pre_crimes[pre_crimes.OffenseCategory == 'Drug/Narcotic Offenses']
post_drug_crimes = post_crimes[post_crimes.OffenseCategory == 'Drug/Narcotic Offenses']
pre_drug_crimes_by_month = pre_drug_crimes.MonthYear.value_counts().reset_index()
post_drug_crimes_by_month = post_drug_crimes.MonthYear.value_counts().reset_index()

In [134]:
pre_crimes_by_month

Unnamed: 0,MonthYear,CrimeCount,MonthDate
0,"January, 2020",5278,2020-01-01
1,"February, 2020",4942,2020-02-01
2,"March, 2020",4570,2020-03-01
3,"April, 2020",4552,2020-04-01
4,"May, 2020",4719,2020-05-01
5,"June, 2020",4115,2020-06-01
6,"July, 2020",4882,2020-07-01
7,"August, 2020",5445,2020-08-01
8,"September, 2020",5385,2020-09-01
9,"October, 2020",5607,2020-10-01


In [218]:
def sort_by_month(crime_by_month_df): 
    crime_by_month_df = crime_by_month_df.MonthYear.value_counts().reset_index()
    crime_by_month_df["MonthDate"] = pd.to_datetime(crime_by_month_df['index'], format = "%B, %Y")
    crime_by_month_df = crime_by_month_df.sort_values("MonthDate", ascending = True).reset_index()
    #crime_by_month_df = pre_crimes_by_month = crime_by_month_df.drop('level_0', 1)
    crime_by_month_df = crime_by_month_df.rename( columns = {"index" : "MonthYear", "MonthYear" : "TotalCount"}) 
    
    return crime_by_month_df

In [252]:
def merge_crime_df(total_crime, drug_crime):
    merged = pd.merge(total_crime, drug_crime, on = ['MonthYear', 'MonthDate'])
    merged = merged.drop(['level_0_x', 'level_0_y', 'MonthDate'], axis=1)
    merged = merged.rename(columns = {'index' : 'Month By Year', 'TotalCount_x' : 'Total Crime' , 'TotalCount_y' : 'Drug Crime'})
    per_of_crime = (merged['Drug Crime'] / merged['Total Crime']) * 100
    merged['Percentage of Drug Related Crime (%)'] = per_of_crime
    return merged 

In [254]:
pre_measure_crimes_by_month = sort_by_month(pre_crimes) 
post_measure_crimes_by_month = sort_by_month(post_crimes)
pre_drug_crimes = sort_by_month(pre_crimes[pre_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
post_drug_crimes = sort_by_month(post_crimes[post_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
merged_pre = merge_crime_df(pre_measure_crimes_by_month, pre_drug_crimes)
merged_post = merge_crime_df(post_measure_crimes_by_month, post_drug_crimes)

Unnamed: 0,MonthYear,Total Crime,Drug Crime,Percentage of Drug Related Crime (%)
0,"January, 2020",5278,138,2.614627
1,"February, 2020",4942,146,2.95427
2,"March, 2020",4570,108,2.363239
3,"April, 2020",4552,139,3.053603
4,"May, 2020",4719,188,3.983895
5,"June, 2020",4115,20,0.486027
6,"July, 2020",4882,40,0.819336
7,"August, 2020",5445,28,0.514233
8,"September, 2020",5385,38,0.705664
9,"October, 2020",5607,50,0.891742
