## Model Training

#### 1.1 Import Data and Required Packages
##### Importing Pandas, Numpy, Matplotlib, Seaborn and Warings Library.

In [10]:
# Basic Import
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns
import os
# Modelling
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler, LabelEncoder
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
import warnings
warnings.filterwarnings('ignore')


#### Import the CSV Data as Pandas DataFrame

In [43]:
file_path = os.path.join('data', 'filtered_data.csv')
df = pd .read_csv(file_path)

#### Show Top 5 Records

In [44]:
df.head()

Unnamed: 0,Location,Latitude,Longitude,Area_ID,Reporting_District_no,Part 1-2,Victim_Age,Victim_Sex,Victim_Descent,Premise_Code,...,Status,Crime_Category,Hours_Occurred,Reported_Year,Reported_Month,Reported_Weekday,Occurred_Day,Occurred_Weekday,Modus_Operandi_num_code,Time_Difference_Log
0,4500 CARPENTER AV,34.1522,-118.391,15,1563,1,75,M,W,101,...,IC,Property Crimes,18.0,2020,3,0,6,4,1,4.290459
1,45TH ST,34.0028,-118.2391,13,1367,1,41,M,H,216,...,IC,Property Crimes,13.75,2020,2,3,27,3,8,0.0
2,600 E MARTIN LUTHER KING JR BL,34.0111,-118.2653,13,1343,2,67,M,B,501,...,IC,Property Crimes,6.083333,2020,8,4,21,4,2,0.0
3,14900 ORO GRANDE ST,34.2953,-118.459,19,1924,1,61,M,H,101,...,IC,Property Crimes,18.0,2020,11,6,6,4,2,3.89182
4,7100 S VERMONT AV,33.9787,-118.2918,12,1245,1,0,X,X,401,...,IC,Property Crimes,11.5,2020,2,1,25,1,6,0.0


In [45]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19903 entries, 0 to 19902
Data columns (total 21 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Location                 19903 non-null  object 
 1   Latitude                 19903 non-null  float64
 2   Longitude                19903 non-null  float64
 3   Area_ID                  19903 non-null  int64  
 4   Reporting_District_no    19903 non-null  int64  
 5   Part 1-2                 19903 non-null  int64  
 6   Victim_Age               19903 non-null  int64  
 7   Victim_Sex               19903 non-null  object 
 8   Victim_Descent           19903 non-null  object 
 9   Premise_Code             19903 non-null  int64  
 10  Weapon_Used_Code         19903 non-null  float64
 11  Status                   19903 non-null  object 
 12  Crime_Category           19903 non-null  object 
 13  Hours_Occurred           19903 non-null  float64
 14  Reported_Year         

### Convert already encoded labels into float

In [49]:
encoded_features=['Area_ID','Reporting_District_no','Part 1-2','Premise_Code','Weapon_Used_Code','Modus_Operandi_num_code']
df[encoded_features]=df[encoded_features].astype(int)

#### Preparing X and Y variables

In [50]:
X=df.drop('Crime_Category',axis=1)

In [51]:
X.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19903 entries, 0 to 19902
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Location                 19903 non-null  object 
 1   Latitude                 19903 non-null  float64
 2   Longitude                19903 non-null  float64
 3   Area_ID                  19903 non-null  int32  
 4   Reporting_District_no    19903 non-null  int32  
 5   Part 1-2                 19903 non-null  int32  
 6   Victim_Age               19903 non-null  int64  
 7   Victim_Sex               19903 non-null  object 
 8   Victim_Descent           19903 non-null  object 
 9   Premise_Code             19903 non-null  int32  
 10  Weapon_Used_Code         19903 non-null  int32  
 11  Status                   19903 non-null  object 
 12  Hours_Occurred           19903 non-null  float64
 13  Reported_Year            19903 non-null  int64  
 14  Reported_Month        