# C S 329E Group Project

## Essential Imports and Data Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
import plotly.express as px
import plotly.io as pio

In [None]:
df = pd.read_csv('austin_crime_v1.csv')

## Sanity Checks

In [None]:
df.head()

Unnamed: 0,Offense,Category,Year,Time of Day,Occurred DateTime,Report DateTime,Location Type,Census Block Group,Latitude,Longitude,APD Sector,APD District,Council District
0,FAMILY DISTURBANCE,family,2003,Morning,2003-01-29 05:30:00,2002-11-29 05:30:00,RESIDENCE / HOME,4530341000.0,30.440748,-97.744242,Adam,3,6.0
1,DEADLY CONDUCT,aggravated assault,2003,Night,2003-01-01 00:01:00,2003-01-01 00:01:00,RESIDENCE / HOME,4530021000.0,30.309733,-97.680095,Ida,4,4.0
2,BURGLARY NON RESIDENCE,burglary,2003,Night,2003-01-01 00:02:00,2003-01-01 00:02:00,COMMERCIAL / OFFICE BUILDING,4530024000.0,30.20925,-97.78459,David,3,2.0
3,DEADLY CONDUCT,aggravated assault,2003,Night,2003-01-01 00:03:00,2003-01-01 00:03:00,RESIDENCE / HOME,4530402000.0,30.325781,-97.688573,Ida,2,4.0
4,RESISTING ARREST OR SEARCH,obstruction,2003,Night,2003-01-01 00:06:00,2003-01-01 00:06:00,RESIDENCE / HOME,4530009000.0,30.260928,-97.706736,Charlie,2,3.0


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 127088 entries, 0 to 2453275
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Offense             127087 non-null  object 
 1   Category            127088 non-null  object 
 2   Year                127087 non-null  float64
 3   Time of Day         127087 non-null  object 
 4   Occurred DateTime   127087 non-null  object 
 5   Report DateTime     127087 non-null  object 
 6   Location Type       127087 non-null  object 
 7   Census Block Group  119371 non-null  float64
 8   Latitude            119371 non-null  float64
 9   Longitude           119371 non-null  float64
 10  APD Sector          127086 non-null  object 
 11  APD District        127086 non-null  object 
 12  Council District    127086 non-null  float64
dtypes: float64(5), object(8)
memory usage: 17.6+ MB


In [None]:
df.shape

(127087, 13)

In [None]:
df.isnull().sum()

Unnamed: 0,0
Offense,0
Category,0
Year,0
Time of Day,0
Occurred DateTime,0
Report DateTime,0
Location Type,0
Census Block Group,7716
Latitude,7716
Longitude,7716


## Visualization 2: Linked Histograms

In [None]:
sample_df = df.sample(1000, random_state=42)
sample_df.to_csv("austin_crime_sample.csv", index=False)
test = pd.DataFrame(df[['Category', 'Offense','Time of Day']].value_counts())
test.reset_index(drop=False, inplace=True)
category_sum = test.groupby('Category')['count'].sum().reset_index()
category_sum.columns = ['Category', 'cat_count']
TODorder = ["Morning", "Afternoon", "Evening", "Night"]
test['Time of Day'] = pd.Categorical(test['Time of Day'], categories=TODorder, ordered=True)
test = test.merge(category_sum, on='Category', how='left')
test

Unnamed: 0,Category,Offense,Time of Day,count,cat_count
0,theft,BURGLARY OF VEHICLE,Night,8455,38909
1,theft,BURGLARY OF VEHICLE,Evening,4461,38909
2,traffic,DWI,Night,4065,5417
3,theft,THEFT,Afternoon,3798,38909
4,property,CRIMINAL MISCHIEF,Night,3779,9597
...,...,...,...,...,...
958,traffic,DUI - AGE 16 AND UNDER,Evening,1,5417
959,traffic,DWI - DRUG RECOGNITION EXPERT,Evening,1,5417
960,traffic,DWI - DRUG RECOGNITION EXPERT,Morning,1,5417
961,fraud,BANK KITING,Morning,1,3590


In [None]:
selection = alt.selection_single(
    fields=['Category'],
    empty='all'
)

frequency = alt.Chart(test).mark_bar().encode(
    alt.X('Category:N', title="Crime Category"),
    alt.Y('count:Q', title="Frequency"),
    alt.Color('Time of Day:O',scale=alt.Scale
              (domain=["Morning", "Afternoon", "Evening", "Night"]
                ,range=["#F1C40F", "#5DADE2", "#4C78A8", "#2C3E50"])),
    opacity= alt.condition(selection, alt.value(1), alt.value(0.3)),
    tooltip=[alt.Tooltip('cat_count:N', title="Total Count"),
              alt.Tooltip('Offense:N', title="Offense"),
              alt.Tooltip('count:Q', title="Count"),
              alt.Tooltip('Category:N', title="Crime Category")]

).properties(
    title='Austin Crime Frequency Distribution',
    width=500,
).add_selection(
    selection
)

time = alt.Chart(test).mark_bar().encode(
    alt.X('Time of Day:O', title="Time of Day", sort=["Morning", "Afternoon", "Evening", "Night"]),
    alt.Y('count:Q', title="Frequency"),
    alt.Color('Time of Day:N',scale=alt.Scale
              (domain=["Morning", "Afternoon", "Evening", "Night"]
                ,range=["#F1C40F", "#5DADE2", "#4C78A8", "#2C3E50"])),
    opacity= alt.condition(selection, alt.value(1), alt.value(0.3)),
    tooltip=[alt.Tooltip('cat_count:N', title="Total Count"),
              alt.Tooltip('Offense:N', title="Offense"),
              alt.Tooltip('count:Q', title="Count"),
              alt.Tooltip('Category:N', title="Crime Category")]

).transform_filter(
    selection
).properties(
    title='Time of Day Distribution',
    width=500,
)

frequency & time

Deprecated since `altair=5.0.0`. Use selection_point instead.
  selection = alt.selection_single(
Deprecated since `altair=5.0.0`. Use add_params instead.
  ).add_selection(
