# First Project: Impact of Weather on Crime in Austin

In [1]:
# Dependencies 
import matplotlib.pyplot as plt
import pandas as pd
import datetime as dt
from classes.weather import Weather
from classes.crime import Crime
from classes.database import Database
import data

# Choose date range to retrieve data
start_str = "2018-01-01"
end_str = "2019-12-31"

# Choose methods of getting/saving data
crime_previously_saved = True
weather_previously_saved = True
in_db = True

# Choose file paths for saving/loading
crime_path = "data/crime.csv"
weather_path = "data/weather.csv"

ModuleNotFoundError: No module named 'data2'

## Retrieve Austin Crime Data (data.austintexas.gov)

In [None]:
if crime_previously_saved:
    # load from saved file
    df_crime = pd.read_csv(crime_path)
else:
    # call from API
    df_crime = data.get_crime_data(start_str, end_str)
    df_crime.to_csv(crime_path, index=False)

df_crime

## Perform API Call - Weather Data (Dark Sky)

In [None]:
if weather_previously_saved:
    # load from saved file
    df_weather = pd.read_csv(weather_path)
else:
    # call from API
    df_weather = data.get_weather_data(start_str, end_str)
    df_weather.to_csv(weather_path, index=False)

df_weather

## Merge Data Sources

In [None]:
df_result = pd.merge(
    df_weather,
    df_crime,
    left_on="Date",
    right_on="Epoch",
    how="outer"
)
df_result

# Store in a database

In [None]:
if not in_db:
    obj_database = Database()
    obj_database.df_save(df_result)

## Data Visualizations

#### Crime Rate vs. Temperature by Crime Type

In [None]:
temp_df = df_result[["Temperature", "Crime Type", "Date"]]
temp_df

In [None]:
# Top 5 Crime Types
top5_crime_types = temp_df["Crime Type"].value_counts().sort_values(ascending=False).head()
top5_crime_types

In [None]:
# Create temperature bins
bins = [0, 9.99, 19.99, 29.99, 39.99, 49.99, 59.99, 69.99, 79.99, 89.99, 99.99, 109.99, 119.99, 9999]
temp_ranges = ["<0", "10-19", "20-29", "30-39", "40-49", "50-59", "60-69", "70-79", "80-89", "90-99", "100-109", "110-119", ">120"]
temp_df["Temperature Group"] = pd.cut(temp_df["Temperature"], bins, labels = temp_ranges)
# df_1

In [None]:
# Filter for top 5 crime types
df_top5_crime_types = temp_df[temp_df["Crime Type"].apply(lambda x: x in top5_crime_types.index)]
# df_top5_crime_types

# Group by temperature bin to get hour counts
by_temp = df_top5_crime_types.groupby("Temperature Group")
hour_counts = by_temp["Date"].nunique()

# Group by temperature and crime type to get crime counts
df_group1 = df_top5_crime_types.groupby(["Crime Type", "Temperature Group"])["Date"]
crime_counts = df_group1.count()

# Calculate crime rates as crimes / hours within each temp bin
crime_rates = crime_counts / hour_counts

# Format DF
crime_rates_df = crime_rates.to_frame().reset_index().rename(columns={"Date": "Crime Rate"})
crime_rates_df

In [None]:
legend_values = crime_rates_df["Crime Type"].drop_duplicates()
legend_values

In [None]:
# Pivoting results dataframe
df_pivot1 = pd.pivot_table(crime_rates_df, values=["Crime Rate"], index=["Temperature Group"],
                   columns=["Crime Type"]).fillna(0)

df_pivot1

In [None]:
ax = df_pivot1.plot(figsize=(12,10), legend=False)
ax.set_prop_cycle(None)

df_pivot1.plot(figsize=(12,8), ax = ax, grid=True)
plt.title("Temperature Impact On Crime Rate By Crime Type", fontsize=20)
plt.legend(legend1, fontsize=12)
plt.xlabel("Temperature (F)", fontsize=20)
plt.ylabel("Crimes Per Hour", fontsize=20)
plt.tick_params(labelsize=18)

#### Crime Type vs. Moon Phase

In [None]:
df_2 = df_result[["Moon Phase", "Crime Type", "Date"]]
df_2

In [None]:
# Change moon phases to categories
obj_weather = Weather()
df_2.loc[:, "Moon Phase"] = df_2.loc[:, "Moon Phase"].apply(obj_weather.get_moon_description)
df_2

In [None]:
df_2["Moon Phase"].value_counts()

In [None]:
# Filter for top 5 crime location types
df_top5_crime_types = df_2[df_2["Crime Type"].apply(lambda x: x in top5_crime_types.index)]
# df_top5_crime_loc_types

# Group by temperature bin to get hour counts
by_phase = df_top5_crime_types.groupby("Moon Phase")
hour_counts = by_phase["Date"].nunique()

# Group by moon phase and crime type to get crime counts
df_group2 = df_top5_crime_types.groupby(["Crime Type", "Moon Phase"])["Date"]
crime_counts = df_group2.count()

# Calculate crime rates as crimes / hours within each moon phase
crime_rates2 = crime_counts / hour_counts

# DF
df_crime_rates2 = crime_rates2.to_frame().reset_index().rename(columns={"Date": "Crime Rate"})
df_crime_rates2

In [None]:
legend2 = df_crime_rates2["Crime Type"].drop_duplicates()
legend2

In [None]:
df_pivot2 = pd.pivot_table(df_crime_rates2, values=["Crime Rate"], index =["Moon Phase"],
                   columns=["Crime Type"]).fillna(0)

df_pivot2

In [None]:
ax = df_pivot2.plot(figsize=(12,10), legend=False)
ax.set_prop_cycle(None)

df_pivot2.plot(figsize=(12,8), ax = ax, grid=True)
plt.title("Moon Phase Impact on Crime Rate by Crime Type", fontsize=20)
plt.legend(legend2, fontsize=12)
plt.xlabel("")
plt.ylabel("Crimes Per Hour", fontsize=20)
plt.tick_params(labelsize=18)

#### Crime Rate vs. Precipitation Intensity by Crime Location

In [None]:
df_3 = df_result[["Precipitation Intensity", "Location Type", "Date"]]
df_3

In [None]:
# Top 5 Crime Types
top5_crime_locations = df_3["Location Type"].value_counts().sort_values(ascending=False).head()
top5_crime_locations

In [None]:
# View precipitation intensity range
min(df_3["Precipitation Intensity"]), max(df_3["Precipitation Intensity"])

In [None]:
# Create bins
labels = ['0-.0689', '.0689-.138', '.138-.207', '.207-.276', '.276-.345', '.345-.413', '.413-.482', '.482-.551', '.551-.620', '.620-.689']
df_3["Precipitation Intensity"] = pd.cut(df_3["Precipitation Intensity"], 10, labels=labels)
df_3["Precipitation Intensity"].drop_duplicates()

In [None]:
# Filter for top 5 location types
df_top5_location_types = df_3[df_3["Location Type"].apply(lambda x: x in top5_crime_locations.index)]
# df_top5_location_types

# Group by precipitation intensity to get hour counts
by_precip = df_top5_location_types.groupby("Precipitation Intensity")
hour_counts = by_precip["Date"].nunique()

# Group by precipitation intensity and crime type to get crime counts
df_group3 = df_top5_location_types.groupby(["Location Type", "Precipitation Intensity"])["Date"]
crime_counts = df_group3.count()

# Calculate crime rates as crimes / hours within each precip. intensity bin
crime_rates3 = crime_counts / hour_counts

# DF
df_crime_rates3 = crime_rates3.to_frame().reset_index().rename(columns={"Date": "Crime Rate"})
df_crime_rates3

In [None]:
legend3 = df_crime_rates3["Location Type"].drop_duplicates()
legend3

In [None]:
df_pivot3 = pd.pivot_table(df_crime_rates3, values=["Crime Rate"], index =["Precipitation Intensity"],
                   columns=["Location Type"]).fillna(0)

df_pivot3

In [None]:
ax = df_pivot3.plot(figsize=(12,10), legend=False)
ax.set_prop_cycle(None)

df_pivot3.plot(figsize=(12,8), ax = ax, grid=True)
plt.title("Precipitation Impact on Crime Rate by Location Type", fontsize=20)
plt.legend(legend3, fontsize=12)
plt.xlabel("Precipitation Intensity (in/h)", fontsize=20)
plt.ylabel("Crimes Per Hour", fontsize=20)
plt.tick_params(labelsize=10)

## Written Analysis and Conclusions

Refer to ReadMe for analysis.