In [None]:
import sqlalchemy as db
import pandas as pd
import numpy as np

# Database setup
engine = db.create_engine("postgresql://postgres:Yosemite1!@127.0.0.1:5432/crime_corona")
connection = engine.connect()
metadata = db.MetaData()
crimes = db.Table('crimes', metadata, autoload=True, autoload_with=engine)

# Linear Regression dependencies
from pathlib import Path
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from sklearn.linear_model import LinearRegression


In [None]:
print(crimes.columns.keys())

In [None]:
query = db.select([crimes])
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultSet[:3]

In [None]:
crime_df = pd.DataFrame(ResultSet)
crime_df.columns = ResultSet[0].keys()
crime_df.head(3)

In [None]:
# Group By Week
crime_df['weekNumber'] = pd.to_datetime(crime_df['dateReported']).dt.week
crime_df.head(3)

Linear Regression - All Crimes

In [None]:
# Create a crime count by week for linear regression -- All Crimes
crime_countweek_df = crime_df.groupby('weekNumber').count()[['Number']]
crime_countweek_df = crime_countweek_df.reset_index()
crime_countweek_df.head(3)

In [None]:
# Assign Input and Output Components
X = crime_countweek_df.weekNumber.values.reshape(-1,1)
y = crime_countweek_df.Number

# Instantiate model
model = LinearRegression()

# Fit / Train Model
model.fit(X, y)

# Predict
y_pred = model.predict(X)
print(y_pred.shape)

In [None]:
# Plot regression line for All crimes
plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.xlabel('Week Number')
plt.ylabel('Number of Incidents')
plt.xticks(np.arange(0, 20, 2))
plt.title('All Crime - By Week')
plt.savefig('ML_Images/allCrimePlt.png')

plt.show()

Linear Regression - Domestic Violence

In [None]:
# Domestic Violence is 273.5 PC, create a dataframe for just 273.5 PC data
crime_dv_df = crime_df.loc[(crime_df['offenseCode'] == "273.5 (A) PC") | (crime_df['offenseCode'] == "273.5 (F)(1) PC") | (crime_df['offenseCode'] == "243 (E)(1) PC")]
print(crime_dv_df.shape)

In [None]:
# Create a crime count for DV by week for linear regression
crime_countweek_dv_df = crime_dv_df.groupby('weekNumber').count()[['Number']]
crime_countweek_dv_df = crime_countweek_dv_df.reset_index()
crime_countweek_dv_df.head(3)

In [None]:
# Assign Input and Output Components
X = crime_countweek_dv_df.weekNumber.values.reshape(-1,1)
y = crime_countweek_dv_df.Number

# Instantiate model
model = LinearRegression()

# Fit / Train Model
model.fit(X, y)

# Predict
y_pred = model.predict(X)
print(y_pred.shape)

In [None]:
# Plot regression line for All crimes
plt.scatter(X, y)
plt.plot(X, y_pred, color='red')
plt.xlabel('Week Number')
plt.ylabel('Number of Incidents')
plt.xticks(np.arange(0, 20, 2))
plt.title('All Crime - By Week')
plt.savefig('ML_Images/DVSctrPlt.png')

plt.show()