# San Francisco crimes analysis

## Imports and reading data set

In [128]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

ds = pd.read_csv("data/sanfrancisco_incidents_summer_2014.csv", parse_dates=["Date"])

## Crimes and arrests number per day

In [None]:
crimes_per_day = ds.pivot_table("IncidntNum", aggfunc="count", index='Date')

arrests = ds[(ds.Resolution == 'ARREST, BOOKED') | (ds.Resolution == 'ARREST, CITED')]
arrests_per_day = arrests.pivot_table("IncidntNum", aggfunc='count', index='Date')

fig, ax = plt.subplots(2, 1, figsize=(16, 8), sharex=True)
fig.subplots_adjust(hspace=0.1)

color_cycle = plt.rcParams['axes.color_cycle']
with pd.plot_params.use('x_compat', True):
    crimes_per_day.plot(ax=ax[0], title="Number of crimes", ylim=(200, 450), color=color_cycle[0])
    arrests_per_day.plot(ax=ax[1], title="Number of arrests", ylim=(0, 150), color=color_cycle[1])

fig.savefig('figs/daily_trend.png', bbox_inches='tight')

## Crimes number for 10 top common categories

In [127]:
crimes_per_cat = ds.pivot_table("IncidntNum", aggfunc="count", index='Category')
arrests_per_cat = arrests.pivot_table("IncidntNum", aggfunc='count', index='Category')

per_cat = pd.DataFrame({"crimes_per_cat": crimes_per_cat, "arrests_per_cat": arrests_per_cat})
per_cat = per_cat.fillna(0)
per_cat = per_cat.sort('crimes_per_cat', ascending=False).head(10)
per_cat["arrests_perc"] = per_cat.arrests_per_cat * 100 / per_cat.crimes_per_cat
per_cat = per_cat.sort('arrests_perc', ascending=True)
per_cat = per_cat.drop('crimes_per_cat', 1)
per_cat = per_cat.drop('arrests_per_cat', 1)

ax = per_cat.plot(kind="bar", alpha=0.7)
ax.legend().set_visible(False)

plt.xlabel('Top 10 common crimes')
plt.ylabel('Percentage of arrests [%]')
plt.savefig('figs/arrests_perc_per_cat.png', bbox_inches='tight')




## Number of arrests per week for missing person and vehicle theft

In [135]:
top_cat = ds[(ds.Category == 'MISSING PERSON') | (ds.Category == 'VEHICLE THEFT')]

crimes_per_day_of_week = top_cat.pivot_table("IncidntNum", columns=["Category"], aggfunc="count", index='DayOfWeek')
crimes_per_day_of_week = crimes_per_day_of_week.reindex(index = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"])

ax = pd.DataFrame(crimes_per_day_of_week).plot(kind="bar", alpha=0.7)
ax.legend(loc='upper center', bbox_to_anchor=(0.5, 1.05),
          ncol=3, fancybox=True, shadow=True)

plt.xlabel('Day of week')
plt.ylabel('Number of arrests')

plt.savefig('figs/top_crimes_per_week.png', bbox_inches='tight')
