In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import session
from sqlalchemy import create_engine, func
import psycopg2
from psycopg2 import OperationalError, errorcodes, errors
import numpy as np

In [5]:
file_path = 'complete_2020.csv'
covid_crime_df= pd.read_csv(file_path)
covid_crime_df.head(10)

Unnamed: 0,zipcode,Mar20,Apr20,May20,Jun20,Jul20,Aug20,Sep20,Oct20,Nov20,Dec20,incident_number,offense_description,family_violence,date,location_type,category_description
0,78704,62,1523,3098,5391,17394,24714,25147,30246,36122,51696,20205000000.0,ROBBERY BY ASSAULT,N,1/1/20,STREETS / HWY / ROAD / ALLEY,Robbery
1,78756,12,106,190,392,1742,2529,2465,3161,4016,5891,20205000000.0,THEFT FROM BUILDING,N,1/1/20,RESIDENCE / HOME,Theft
2,78759,34,446,649,1413,7266,12513,12781,15712,18367,25876,20205000000.0,THEFT,N,1/1/20,RESIDENCE / HOME,Theft
3,78723,19,576,2009,4303,14693,21775,21202,25520,28927,36150,202021300.0,AUTO THEFT,N,1/1/20,PARKING LOTS / GARAGE,Auto Theft
4,78759,34,446,649,1413,7266,12513,12781,15712,18367,25876,202031300.0,BURGLARY OF RESIDENCE,N,1/1/20,RESIDENCE / HOME,Burglary
5,78758,29,1441,4605,9574,29238,44361,43162,51030,55571,67401,20206900000.0,BURGLARY OF VEHICLE,N,1/1/20,PARKING LOTS / GARAGE,Theft
6,78704,62,1523,3098,5391,17394,24714,25147,30246,36122,51696,20206900000.0,BURGLARY OF VEHICLE,N,1/1/20,RESIDENCE / HOME,Theft
7,78758,29,1441,4605,9574,29238,44361,43162,51030,55571,67401,20208000000.0,BURGLARY OF VEHICLE,N,1/1/20,PARKING LOTS / GARAGE,Theft
8,78741,56,2205,7412,12483,35322,49771,48667,57925,63387,77183,20205000000.0,THEFT OF AUTO PARTS,N,1/1/20,RESIDENCE / HOME,Theft
9,78702,44,505,1119,2927,12372,18154,17971,21526,25382,33378,20205000000.0,THEFT,N,1/1/20,RESIDENCE / HOME,Theft


In [6]:
# Check data types
covid_crime_df.dtypes

zipcode                   int64
Mar20                     int64
Apr20                     int64
May20                     int64
Jun20                     int64
Jul20                     int64
Aug20                     int64
Sep20                     int64
Oct20                     int64
Nov20                     int64
Dec20                     int64
incident_number         float64
offense_description      object
family_violence          object
date                     object
location_type            object
category_description     object
dtype: object

In [7]:
covid_crime_df["rates"] = covid_crime_df["Mar20"] + covid_crime_df["Apr20"] + covid_crime_df["May20"] + covid_crime_df["Jun20"] + covid_crime_df["Jul20"] + covid_crime_df["Aug20"] + covid_crime_df["Sep20"] + covid_crime_df["Oct20"] + covid_crime_df["Nov20"] + covid_crime_df["Dec20"]

In [8]:
covid_crime_df.head()

Unnamed: 0,zipcode,Mar20,Apr20,May20,Jun20,Jul20,Aug20,Sep20,Oct20,Nov20,Dec20,incident_number,offense_description,family_violence,date,location_type,category_description,rates
0,78704,62,1523,3098,5391,17394,24714,25147,30246,36122,51696,20205000000.0,ROBBERY BY ASSAULT,N,1/1/20,STREETS / HWY / ROAD / ALLEY,Robbery,195393
1,78756,12,106,190,392,1742,2529,2465,3161,4016,5891,20205000000.0,THEFT FROM BUILDING,N,1/1/20,RESIDENCE / HOME,Theft,20504
2,78759,34,446,649,1413,7266,12513,12781,15712,18367,25876,20205000000.0,THEFT,N,1/1/20,RESIDENCE / HOME,Theft,95057
3,78723,19,576,2009,4303,14693,21775,21202,25520,28927,36150,202021300.0,AUTO THEFT,N,1/1/20,PARKING LOTS / GARAGE,Auto Theft,155174
4,78759,34,446,649,1413,7266,12513,12781,15712,18367,25876,202031300.0,BURGLARY OF RESIDENCE,N,1/1/20,RESIDENCE / HOME,Burglary,95057


In [9]:
#Drop columns
covid_crime_df = covid_crime_df.drop(columns=['Mar20', 'Apr20', 'May20', 'Jun20', 'Jul20', 'Aug20', 'Sep20', 'Oct20','Nov20', 'Dec20'], axis=1)

In [10]:
# Drop additional columns
covid_crime_df = covid_crime_df.drop(columns=['offense_description', 'family_violence', 'date', 'location_type', 'category_description'])

In [11]:
covid_crime_df

Unnamed: 0,zipcode,incident_number,rates
0,78704,2.020500e+10,195393
1,78756,2.020500e+10,20504
2,78759,2.020500e+10,95057
3,78723,2.020213e+08,155174
4,78759,2.020313e+08,95057
...,...,...,...
40153,78665,,1387
40154,78642,,31
40155,78605,,8
40156,76574,,75


In [12]:
covid_crime_df.dropna()

Unnamed: 0,zipcode,incident_number,rates
0,78704,2.020500e+10,195393
1,78756,2.020500e+10,20504
2,78759,2.020500e+10,95057
3,78723,2.020213e+08,155174
4,78759,2.020313e+08,95057
...,...,...,...
40131,78759,2.021207e+08,95057
40132,78701,2.020366e+10,56747
40133,78758,2.020366e+10,306412
40134,78759,2.020505e+10,95057


In [13]:
final_df = covid_crime_df[np.isfinite(covid_crime_df).all(1)]

In [14]:
final_df = final_df.reset_index()

In [15]:
y = final_df['incident_number']
X = final_df.drop(columns= 'incident_number')

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30, random_state=75)

In [17]:
classifier = LogisticRegression(solver='lbfgs', random_state=1)
classifier

LogisticRegression(random_state=1)

In [18]:
LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
   intercept_scaling=1, max_iter=100, multi_class='warn', penalty='12',
   random_state=1, solver='lbfgs', tol=0.0001, warm_start=False)

LogisticRegression(multi_class='warn', penalty='12', random_state=1)

In [None]:
classifier.fit(X_train, y_train)

In [None]:
predictions = classifier.predict(X_test)
pd.DataFrame({"Prediction": predictions, "Actual": y_test})

In [None]:
accuracy_score(y_test, predictions)