In [1]:
import csv, os

In [2]:
from datetime import datetime 

In [3]:
import pandas as pd 

In [4]:
from collections import defaultdict

In [5]:
import altair as alt

In [44]:
data_dir = os.environ['DATA_DIR'] 
csv_file = os.path.join(data_dir, 'PortlandData.csv') 
portland_crimes = pd.read_csv(csv_file, parse_dates = ["OccurDate"])
portland_crimes = portland_crimes[portland_crimes.OccurDate >= datetime.strptime('1/1/2020', '%m/%d/%Y')].reset_index()
portland_crimes['DrugCrime'] = portland_crimes.OffenseCategory == 'Drug/Narcotic Offenses'
portland_crimes["MonthDate"] = pd.to_datetime(portland_crimes['MonthYear'], format = "%B, %Y")


In [45]:
#Separate crimes before and after Measure 110 into two separate data frames
enactment_date = datetime.strptime('2/1/2021', "%m/%d/%Y")
pre_crimes = portland_crimes[portland_crimes.OccurDate < enactment_date]
post_crimes = portland_crimes[portland_crimes.OccurDate >= enactment_date]
pre_enactment_crime_count = pre_crimes.OffenseCategory.value_counts()
post_enactment_crime_count = post_crimes.OffenseCategory.value_counts()
pre_and_post = pd.concat([pre_crimes, post_crimes])

In [130]:
#Figure out crime breakdown by crime against type
pre_count_by_crimesagainst = pd.DataFrame(pre_crimes.CrimeAgainst.value_counts())
post_count_by_crimesagainst = pd.DataFrame(post_crimes.CrimeAgainst.value_counts())
pre_vs_post_crimesagainst = pd.merge(pre_count_by_crimesagainst, post_count_by_crimesagainst, on = pre_count_by_crimesagainst.index, suffixes = ('_pre', '_post'))
pre_vs_post_crimesagainst = pre_vs_post_crimesagainst.rename(columns={"key_0": "CrimeType", "CrimeAgainst_pre": "Pre Measure 110", "CrimeAgainst_post" : "Post Measure 110"})

In [131]:
pre_vs_post_crimesagainst

Unnamed: 0,CrimeType,Pre Measure 110,Post Measure 110
0,Property,52818,54216
1,Person,9142,8933
2,Society,2039,1412


In [129]:
#Pie Chart seeing change in crime against categories pre and post measure 110 
barchart = alt.Chart(pre_crimes).mark_bar().encode(
    x = alt.X("CrimeAgainst:N"),
    y = alt.Y('sum(CaseNumber'),
    color = 'CrimeType:N',
)
barchart

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000). For information on how to plot larger datasets in Altair, see the documentation

alt.Chart(...)

In [123]:
def create_default_dict(ddict, pre_count_by_crime, post_count_by_crime):
    for d in (pre_count_by_crime, post_count_by_crime):
        for key, value in d.items():
            ddict[key].append(value)
    return ddict
    

In [9]:
#Calculate percent change in crime comparing 2020 to measure 110 enactment, and measure 110 enactment to YTD 
def calculate_per_change(values): 
    for key, value in values.items():
            percentage = ((value[1] - value[0]) / value[0])
            values[key].append("{0:.2%}".format(percentage))
    return values
    

In [10]:
def count_by_offense_type(dd, per_change):
    df = pd.DataFrame.from_dict(dd, orient = 'index',
                                             columns = ["Pre-Measure 110", "Post-Measure 110", "Percent Change"] ) 
    df.index=dd.keys()
    return df 

In [11]:
#Crime By Count pre vs post measure 110

pre_vs_post_dict = create_default_dict(defaultdict(list), pre_enactment_crime_count, post_enactment_crime_count) 
percent_change = calculate_per_change(pre_vs_post_dict) 
pre_vs_post = count_by_offense_type(pre_vs_post_dict, percent_change)
#summary_line = pd.DataFrame[pre_vs_post["Pre-Measure 110"].sum(), pre_vs_post["Post-Measure 110"].sum() , ((pre_vs_post["Post-Measure 110"].sum())/pre_vs_post["Pre-Measure 110"].sum())/pre_vs_post["Pre-Measure 110"].sum(), index = ["Total"]]

In [12]:
def sort_by_month(crime_by_month_df): 
    crime_by_month_df = crime_by_month_df.MonthYear.value_counts().reset_index()
    crime_by_month_df["MonthDate"] = pd.to_datetime(crime_by_month_df['index'], format = "%B, %Y")
    crime_by_month_df = crime_by_month_df.sort_values("MonthDate", ascending = True).reset_index()
    #crime_by_month_df = pre_crimes_by_month = crime_by_month_df.drop('level_0', 1)
    crime_by_month_df = crime_by_month_df.rename( columns = {"index" : "MonthYear", "MonthYear" : "TotalCount"}) 
    
    return crime_by_month_df

In [13]:
def merge_crime_df(total_crime, drug_crime):
    merged = pd.merge(total_crime, drug_crime, on = ['MonthYear', 'MonthDate'])
    merged = merged.drop(['level_0_x', 'level_0_y'], axis=1)
    merged = merged.rename(columns = {'index' : 'Month By Year', 'TotalCount_x' : 'Total Crime' , 'TotalCount_y' : 'Drug Crime'})
    per_of_crime = (merged['Drug Crime'] / merged['Total Crime']) * 100
    merged['Non-Drug Crime'] = merged['Total Crime'] - merged['Drug Crime']
    merged['Percentage of Drug Related Crime (%)'] = per_of_crime
    return merged 

In [14]:
#Merge tables 
pre_measure_crimes_by_month = sort_by_month(pre_crimes) 
post_measure_crimes_by_month = sort_by_month(post_crimes)
pre_drug_crimes = sort_by_month(pre_crimes[pre_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
post_drug_crimes = sort_by_month(post_crimes[post_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
merged_pre = merge_crime_df(pre_measure_crimes_by_month, pre_drug_crimes)
merged_post = merge_crime_df(post_measure_crimes_by_month, post_drug_crimes)

Unnamed: 0,level_0,MonthYear,TotalCount,MonthDate
0,3,"January, 2020",5278,2020-01-01
1,5,"February, 2020",4942,2020-02-01
2,10,"March, 2020",4570,2020-03-01
3,11,"April, 2020",4552,2020-04-01
4,8,"May, 2020",4719,2020-05-01
5,12,"June, 2020",4115,2020-06-01
6,6,"July, 2020",4882,2020-07-01
7,1,"August, 2020",5445,2020-08-01
8,2,"September, 2020",5385,2020-09-01
9,0,"October, 2020",5607,2020-10-01


In [55]:
merged = pd.concat([merged_pre, merged_post], axis = 0).reset_index()
merged

Unnamed: 0,index,MonthYear,Total Crime,MonthDate,Drug Crime,Non-Drug Crime,Percentage of Drug Related Crime (%)
0,0,"January, 2020",5278,2020-01-01,138,5140,2.614627
1,1,"February, 2020",4942,2020-02-01,146,4796,2.95427
2,2,"March, 2020",4570,2020-03-01,108,4462,2.363239
3,3,"April, 2020",4552,2020-04-01,139,4413,3.053603
4,4,"May, 2020",4719,2020-05-01,188,4531,3.983895
5,5,"June, 2020",4115,2020-06-01,20,4095,0.486027
6,6,"July, 2020",4882,2020-07-01,40,4842,0.819336
7,7,"August, 2020",5445,2020-08-01,28,5417,0.514233
8,8,"September, 2020",5385,2020-09-01,38,5347,0.705664
9,9,"October, 2020",5607,2020-10-01,50,5557,0.891742


In [54]:
bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Total Crime",
    color = "Drug
)

bar2 = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Drug Crime",
    color = 'red'
)
bar3 = alt.layer(bar, bar2)

In [25]:
nondrug_bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Non-Drug Crime",
)

drug_bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Drug Crime",
    color = "Drug Crime"
)



In [26]:
nondrug_bar + drug_bar