In [4]:
import csv, os

In [5]:
from datetime import datetime 

In [6]:
import pandas as pd 

In [7]:
from collections import defaultdict

In [8]:
import altair as alt

In [12]:
data_dir = os.environ['DATA_DIR'] 
csv_file = os.path.join(data_dir, 'PortlandData.csv') 
portland_crimes = pd.read_csv(csv_file, parse_dates = ["OccurDate"])
portland_crimes = portland_crimes[portland_crimes.OccurDate >= datetime.strptime('1/1/2020', '%m/%d/%Y')].reset_index()
portland_crimes['DrugCrime'] = portland_crimes.OffenseCategory == 'Drug/Narcotic Offenses'
portland_crimes["MonthDate"] = pd.to_datetime(portland_crimes['MonthYear'], format = "%B, %Y")


In [169]:
#Separate crimes before and after Measure 110 into two separate data frames
enactment_date = datetime.strptime('2/1/2021', "%m/%d/%Y")
pre_crimes = portland_crimes[portland_crimes.OccurDate < enactment_date]
post_crimes = portland_crimes[portland_crimes.OccurDate >= enactment_date]
pre_enactment_crime_count = pre_crimes.OffenseCategory.value_counts()
post_enactment_crime_count = post_crimes.OffenseCategory.value_counts()
pre_and_post = pd.concat([pre_crimes, post_crimes])
pre_enactment_crime_count

Larceny Offenses                24640
Vandalism                        8967
Assault Offenses                 8498
Motor Vehicle Theft              7062
Burglary                         5915
Fraud Offenses                   3903
Robbery                          1085
Drug/Narcotic Offenses           1018
Weapon Law Violations             876
Arson                             560
Counterfeiting/Forgery            532
Sex Offenses                      490
Pornography/Obscene Material       89
Kidnapping/Abduction               72
Stolen Property Offenses           63
Homicide Offenses                  60
Embezzlement                       58
Extortion/Blackmail                33
Animal Cruelty Offenses            29
Prostitution Offenses              27
Human Trafficking Offenses         22
Name: OffenseCategory, dtype: int64

In [170]:
pre_offensecategory_bar = alt.Chart(pre_enactment_crime_count).mark_bar().encode(
    x = alt.X('OffenseCategory:Q', title = 'Crime Count'),
    y = alt.Y('index:N', title = "Offense Category"),
    color=alt.condition(
        alt.datum.index == 'Drug/Narcotic Offenses', 
        alt.value('red'), 
        alt.value('lightgrey')),
).properties(title = "Pre Measure 110")
pre_offensecategory_bar

post_offensecategory_bar = alt.Chart(post_enactment_crime_count).mark_bar().encode(
    x = alt.X('OffenseCategory:Q', title = 'Crime Count'),
    y = alt.Y('index:N', title = "Offense Category"),
     color=alt.condition(
        alt.datum.index == 'Drug/Narcotic Offenses', 
        alt.value('red'), 
        alt.value('lightgrey')),
).properties(title = "Post Measure 110")
alt.hconcat(pre_offensecategory_bar,  post_offensecategory_bar)




SchemaValidationError: Invalid specification

        altair.vegalite.v4.schema.channels.ColorValue, validating 'additionalProperties'

        Additional properties are not allowed ('test' was unexpected)
        

alt.HConcatChart(...)

In [171]:
#Figure out crime breakdown by crime against type
pre_count_by_crimesagainst = pd.DataFrame(pre_crimes.CrimeAgainst.value_counts())
pre_count_by_crimesagainst = pre_count_by_crimesagainst.reset_index()
pre_count_by_crimesagainst = pre_count_by_crimesagainst.rename( columns = {"index" : "CrimeType" , "CrimeAgainst" : "CrimeCount"})
post_count_by_crimesagainst = pd.DataFrame(post_crimes.CrimeAgainst.value_counts())
post_count_by_crimesagainst = post_count_by_crimesagainst.reset_index()
post_count_by_crimesagainst = post_count_by_crimesagainst.rename( columns = {"index" : "CrimeType" , "CrimeAgainst" : "CrimeCount"})
pre_vs_post_crimesagainst = pd.merge(pre_count_by_crimesagainst, post_count_by_crimesagainst, on = ["CrimeType"] , suffixes = ('_pre', '_post'))
pre_vs_post_crimesagainst = pre_vs_post_crimesagainst.rename(columns={"key_0": "CrimeType", "CrimeAgainst_pre": "Pre Measure 110", "CrimeAgainst_post" : "Post Measure 110"})
pre_count_by_crimesagainst

Unnamed: 0,CrimeType,CrimeCount
0,Property,52818
1,Person,9142
2,Society,2039


In [210]:
base_pre = alt.Chart(pre_count_by_crimesagainst).mark_arc().encode(
    theta = alt.Theta(field = "CrimeCount", type = "quantitative"),
    color = alt.Color(field = "CrimeType", type = "nominal"),
).properties(title = "Pre Measure 110 Crimes by CrimeAgainst")

pie_pre = base_pre.mark_arc(outerRadius=120)
text_pre = base_pre.mark_text(radius=140, size=15).encode(text="CrimeCount:Q")

pre_piechart = pie_pre + text_pre

base_post = alt.Chart(post_count_by_crimesagainst).mark_arc().encode(
    theta = alt.Theta(field = "CrimeCount", type = "quantitative"),
    color = alt.Color(field = "CrimeType", type = "nominal"),
).properties(title = "Post Measure 110 Crimes by CrimeAgainst")

pie_post = base_post.mark_arc(outerRadius=120)
text_post = base_post.mark_text(radius=140, size=15).encode(text="CrimeCount:Q")

post_piechart = pie_post + text_post

pre_post_piecharts = alt.hconcat(pre_piechart, post_piechart)
pre_post_piecharts

In [173]:
def create_default_dict(ddict, pre_count_by_crime, post_count_by_crime):
    for d in (pre_count_by_crime, post_count_by_crime):
        for key, value in d.items():
            ddict[key].append(value)
    return ddict
    

In [174]:
#Calculate percent change in crime comparing 2020 to measure 110 enactment, and measure 110 enactment to YTD 
def calculate_per_change(values): 
    for key, value in values.items():
            percentage = ((value[1] - value[0]) / value[0])
            values[key].append("{0:.2%}".format(percentage))
    return values
    

In [175]:
def count_by_offense_type(dd, per_change):
    df = pd.DataFrame.from_dict(dd, orient = 'index',
                                             columns = ["Pre-Measure 110", "Post-Measure 110", "Percent Change"] ) 
    df.index=dd.keys()
    return df 

In [176]:
pre_enactment_crime_count, post_enactment_crime_count

(Larceny Offenses                24640
 Vandalism                        8967
 Assault Offenses                 8498
 Motor Vehicle Theft              7062
 Burglary                         5915
 Fraud Offenses                   3903
 Robbery                          1085
 Drug/Narcotic Offenses           1018
 Weapon Law Violations             876
 Arson                             560
 Counterfeiting/Forgery            532
 Sex Offenses                      490
 Pornography/Obscene Material       89
 Kidnapping/Abduction               72
 Stolen Property Offenses           63
 Homicide Offenses                  60
 Embezzlement                       58
 Extortion/Blackmail                33
 Animal Cruelty Offenses            29
 Prostitution Offenses              27
 Human Trafficking Offenses         22
 Name: OffenseCategory, dtype: int64,
 Larceny Offenses                24474
 Vandalism                       10141
 Motor Vehicle Theft              9504
 Assault Offenses         

In [186]:
#Crime By Count pre vs post measure 110

pre_vs_post_dict = create_default_dict(defaultdict(list), pre_enactment_crime_count, post_enactment_crime_count) 
percent_change = calculate_per_change(pre_vs_post_dict) 
pre_vs_post = count_by_offense_type(pre_vs_post_dict, percent_change)
pre_vs_post

Unnamed: 0,Pre-Measure 110,Post-Measure 110,Percent Change
Larceny Offenses,24640,24474,-0.67%
Vandalism,8967,10141,13.09%
Assault Offenses,8498,8294,-2.40%
Motor Vehicle Theft,7062,9504,34.58%
Burglary,5915,5022,-15.10%
Fraud Offenses,3903,2947,-24.49%
Robbery,1085,1261,16.22%
Drug/Narcotic Offenses,1018,365,-64.15%
Weapon Law Violations,876,841,-4.00%
Arson,560,429,-23.39%


In [183]:
def sort_by_month(crime_by_month_df): 
    crime_by_month_df = crime_by_month_df.MonthYear.value_counts().reset_index()
    crime_by_month_df["MonthDate"] = pd.to_datetime(crime_by_month_df['index'], format = "%B, %Y")
    crime_by_month_df = crime_by_month_df.sort_values("MonthDate", ascending = True).reset_index()
    #crime_by_month_df = pre_crimes_by_month = crime_by_month_df.drop('level_0', 1)
    crime_by_month_df = crime_by_month_df.rename( columns = {"index" : "MonthYear", "MonthYear" : "TotalCount"}) 
    
    return crime_by_month_df

In [157]:
def merge_crime_df(total_crime, drug_crime):
    merged = pd.merge(total_crime, drug_crime, on = ['MonthYear', 'MonthDate'])
    merged = merged.drop(['level_0_x', 'level_0_y'], axis=1)
    merged = merged.rename(columns = {'index' : 'Month By Year', 'TotalCount_x' : 'Total Crime' , 'TotalCount_y' : 'Drug Crime'})
    per_of_crime = (merged['Drug Crime'] / merged['Total Crime']) * 100
    merged['Non-Drug Crime'] = merged['Total Crime'] - merged['Drug Crime']
    merged['Percentage of Drug Related Crime (%)'] = per_of_crime
    return merged 

In [158]:
#Merge tables 
pre_measure_crimes_by_month = sort_by_month(pre_crimes) 
post_measure_crimes_by_month = sort_by_month(post_crimes)
pre_drug_crimes = sort_by_month(pre_crimes[pre_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
post_drug_crimes = sort_by_month(post_crimes[post_crimes.OffenseCategory == 'Drug/Narcotic Offenses'])
merged_pre = merge_crime_df(pre_measure_crimes_by_month, pre_drug_crimes)
merged_post = merge_crime_df(post_measure_crimes_by_month, post_drug_crimes)

In [159]:
merged = pd.concat([merged_pre, merged_post], axis = 0).reset_index()
merged

Unnamed: 0,index,MonthYear,Total Crime,MonthDate,Drug Crime,Non-Drug Crime,Percentage of Drug Related Crime (%)
0,0,"January, 2020",5278,2020-01-01,138,5140,2.614627
1,1,"February, 2020",4942,2020-02-01,146,4796,2.95427
2,2,"March, 2020",4570,2020-03-01,108,4462,2.363239
3,3,"April, 2020",4552,2020-04-01,139,4413,3.053603
4,4,"May, 2020",4719,2020-05-01,188,4531,3.983895
5,5,"June, 2020",4115,2020-06-01,20,4095,0.486027
6,6,"July, 2020",4882,2020-07-01,40,4842,0.819336
7,7,"August, 2020",5445,2020-08-01,28,5417,0.514233
8,8,"September, 2020",5385,2020-09-01,38,5347,0.705664
9,9,"October, 2020",5607,2020-10-01,50,5557,0.891742


In [145]:
bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Total Crime",
)

bar2 = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Drug Crime",
    color = 'red'
)
bar3 = alt.layer(bar, bar2)
bar3

ValueError: red encoding field is specified without a type; the type cannot be inferred because it does not match any column in the data.

alt.LayerChart(...)

In [154]:
nondrug_bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = ["Non-Drug Crime", "Drug Crime"],
)
nondrug_bar

drug_bar = alt.Chart(merged).mark_bar().encode(
    x = alt.X("MonthDate"),
    y = "Drug Crime",
)


In [139]:
drug_bar

In [140]:
bar