In [1]:
# Initial Library Imports
import pandas as pd
import numpy as np
from pathlib import Path
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
# Import Data from csv
oracle_path = Path('dataset/fraud_oracle.csv')
oracle_data = pd.read_csv(oracle_path)

In [3]:
# View Oracle Dataframe
oracle_data.head()

Unnamed: 0,Month,WeekOfMonth,DayOfWeek,Make,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,...,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,NumberOfSuppliments,AddressChange_Claim,NumberOfCars,Year,BasePolicy
0,Dec,5,Wednesday,Honda,Urban,Tuesday,Jan,1,Female,Single,...,3 years,26 to 30,No,No,External,none,1 year,3 to 4,1994,Liability
1,Jan,3,Wednesday,Honda,Urban,Monday,Jan,4,Male,Single,...,6 years,31 to 35,Yes,No,External,none,no change,1 vehicle,1994,Collision
2,Oct,5,Friday,Honda,Urban,Thursday,Nov,2,Male,Married,...,7 years,41 to 50,No,No,External,none,no change,1 vehicle,1994,Collision
3,Jun,2,Saturday,Toyota,Rural,Friday,Jul,1,Male,Married,...,more than 7,51 to 65,Yes,No,External,more than 5,no change,1 vehicle,1994,Liability
4,Jan,5,Monday,Honda,Urban,Tuesday,Feb,2,Female,Single,...,5 years,31 to 35,No,No,External,none,no change,1 vehicle,1994,Collision


In [4]:
# List all columns
oracle_data.columns

Index(['Month', 'WeekOfMonth', 'DayOfWeek', 'Make', 'AccidentArea',
       'DayOfWeekClaimed', 'MonthClaimed', 'WeekOfMonthClaimed', 'Sex',
       'MaritalStatus', 'Age', 'Fault', 'PolicyType', 'VehicleCategory',
       'VehiclePrice', 'FraudFound_P', 'PolicyNumber', 'RepNumber',
       'Deductible', 'DriverRating', 'Days_Policy_Accident',
       'Days_Policy_Claim', 'PastNumberOfClaims', 'AgeOfVehicle',
       'AgeOfPolicyHolder', 'PoliceReportFiled', 'WitnessPresent', 'AgentType',
       'NumberOfSuppliments', 'AddressChange_Claim', 'NumberOfCars', 'Year',
       'BasePolicy'],
      dtype='object')

In [5]:
# Check columns we're unsure of
oracle_data['AgeOfPolicyHolder'].value_counts()

31 to 35    5593
36 to 40    4043
41 to 50    2828
51 to 65    1392
26 to 30     613
over 65      508
16 to 17     320
21 to 25     108
18 to 20      15
Name: AgeOfPolicyHolder, dtype: int64

In [6]:
# Check columns we're unsure of
oracle_data['DayOfWeekClaimed'].value_counts()

Monday       3757
Tuesday      3375
Wednesday    2951
Thursday     2660
Friday       2497
Saturday      127
Sunday         52
0               1
Name: DayOfWeekClaimed, dtype: int64

In [7]:
# Drop single row where 'DayOfWeekClaimed' equals 0
nodayofweek = oracle_data[(oracle_data['DayOfWeekClaimed'] == '0')].index
oracle_data.drop(nodayofweek , inplace=True)

In [8]:
# Check that the row was removed
oracle_data['DayOfWeekClaimed'].value_counts()

Monday       3757
Tuesday      3375
Wednesday    2951
Thursday     2660
Friday       2497
Saturday      127
Sunday         52
Name: DayOfWeekClaimed, dtype: int64

In [9]:
# Check columns we're unsure of
oracle_data['WeekOfMonthClaimed'].value_counts()

2    3720
3    3583
1    3449
4    3433
5    1234
Name: WeekOfMonthClaimed, dtype: int64

In [10]:
# Check columns we're unsure of
oracle_data['AgeOfVehicle'].value_counts()

7 years        5807
more than 7    3981
6 years        3448
5 years        1357
new             372
4 years         229
3 years         152
2 years          73
Name: AgeOfVehicle, dtype: int64

In [11]:
# Drop columns we don't have enough informnation about or need
oracle_data = oracle_data.drop(columns=['PolicyNumber', 'RepNumber', 'Days_Policy_Accident',
       'Days_Policy_Claim', 'NumberOfSuppliments','Make', 'DriverRating', 'Age', 'PolicyType',
                                        'Month', 'WeekOfMonth', 'DayOfWeek', 'Year'])
oracle_data.head()

Unnamed: 0,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,Fault,VehicleCategory,VehiclePrice,FraudFound_P,Deductible,PastNumberOfClaims,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,AddressChange_Claim,NumberOfCars,BasePolicy
0,Urban,Tuesday,Jan,1,Female,Single,Policy Holder,Sport,more than 69000,0,300,none,3 years,26 to 30,No,No,External,1 year,3 to 4,Liability
1,Urban,Monday,Jan,4,Male,Single,Policy Holder,Sport,more than 69000,0,400,none,6 years,31 to 35,Yes,No,External,no change,1 vehicle,Collision
2,Urban,Thursday,Nov,2,Male,Married,Policy Holder,Sport,more than 69000,0,400,1,7 years,41 to 50,No,No,External,no change,1 vehicle,Collision
3,Rural,Friday,Jul,1,Male,Married,Third Party,Sport,20000 to 29000,0,400,1,more than 7,51 to 65,Yes,No,External,no change,1 vehicle,Liability
4,Urban,Tuesday,Feb,2,Female,Single,Third Party,Sport,more than 69000,0,400,none,5 years,31 to 35,No,No,External,no change,1 vehicle,Collision


In [12]:
# Make Age of Vehicle column numerical
oracle_data.loc[oracle_data['AgeOfVehicle'] == 'new', 'AgeOfVehicle'] = '1'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '2 years', 'AgeOfVehicle'] = '2'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '3 years', 'AgeOfVehicle'] = '3'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '4 years', 'AgeOfVehicle'] = '4'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '5 years', 'AgeOfVehicle'] = '5'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '6 years', 'AgeOfVehicle'] = '6'
oracle_data.loc[oracle_data['AgeOfVehicle'] == '7 years', 'AgeOfVehicle'] = '7'
oracle_data.loc[oracle_data['AgeOfVehicle'] == 'more than 7', 'AgeOfVehicle'] = '8'

oracle_data.head()

Unnamed: 0,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,Fault,VehicleCategory,VehiclePrice,FraudFound_P,Deductible,PastNumberOfClaims,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,AddressChange_Claim,NumberOfCars,BasePolicy
0,Urban,Tuesday,Jan,1,Female,Single,Policy Holder,Sport,more than 69000,0,300,none,3,26 to 30,No,No,External,1 year,3 to 4,Liability
1,Urban,Monday,Jan,4,Male,Single,Policy Holder,Sport,more than 69000,0,400,none,6,31 to 35,Yes,No,External,no change,1 vehicle,Collision
2,Urban,Thursday,Nov,2,Male,Married,Policy Holder,Sport,more than 69000,0,400,1,7,41 to 50,No,No,External,no change,1 vehicle,Collision
3,Rural,Friday,Jul,1,Male,Married,Third Party,Sport,20000 to 29000,0,400,1,8,51 to 65,Yes,No,External,no change,1 vehicle,Liability
4,Urban,Tuesday,Feb,2,Female,Single,Third Party,Sport,more than 69000,0,400,none,5,31 to 35,No,No,External,no change,1 vehicle,Collision


In [13]:
# View the 'MonthClaimed' column
oracle_data['MonthClaimed'].value_counts()

Jan    1446
May    1411
Mar    1348
Oct    1339
Jun    1293
Feb    1287
Nov    1285
Apr    1271
Sep    1242
Jul    1225
Dec    1146
Aug    1126
Name: MonthClaimed, dtype: int64

In [14]:
# Make MonthClaimed column numerical
oracle_data.loc[oracle_data['MonthClaimed'] == 'Jan', 'MonthClaimed'] = '1'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Feb', 'MonthClaimed'] = '2'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Mar', 'MonthClaimed'] = '3'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Apr', 'MonthClaimed'] = '4'
oracle_data.loc[oracle_data['MonthClaimed'] == 'May', 'MonthClaimed'] = '5'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Jun', 'MonthClaimed'] = '6'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Jul', 'MonthClaimed'] = '7'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Aug', 'MonthClaimed'] = '8'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Sep', 'MonthClaimed'] = '9'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Oct', 'MonthClaimed'] = '10'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Nov', 'MonthClaimed'] = '11'
oracle_data.loc[oracle_data['MonthClaimed'] == 'Dec', 'MonthClaimed'] = '12'

oracle_data.head()

Unnamed: 0,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,Fault,VehicleCategory,VehiclePrice,FraudFound_P,Deductible,PastNumberOfClaims,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,AddressChange_Claim,NumberOfCars,BasePolicy
0,Urban,Tuesday,1,1,Female,Single,Policy Holder,Sport,more than 69000,0,300,none,3,26 to 30,No,No,External,1 year,3 to 4,Liability
1,Urban,Monday,1,4,Male,Single,Policy Holder,Sport,more than 69000,0,400,none,6,31 to 35,Yes,No,External,no change,1 vehicle,Collision
2,Urban,Thursday,11,2,Male,Married,Policy Holder,Sport,more than 69000,0,400,1,7,41 to 50,No,No,External,no change,1 vehicle,Collision
3,Rural,Friday,7,1,Male,Married,Third Party,Sport,20000 to 29000,0,400,1,8,51 to 65,Yes,No,External,no change,1 vehicle,Liability
4,Urban,Tuesday,2,2,Female,Single,Third Party,Sport,more than 69000,0,400,none,5,31 to 35,No,No,External,no change,1 vehicle,Collision


In [15]:
# Make DayofWeekClaimed column numerical
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Monday', 'DayOfWeekClaimed'] = '1'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Tuesday', 'DayOfWeekClaimed'] = '2'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Wednesday', 'DayOfWeekClaimed'] = '3'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Thursday', 'DayOfWeekClaimed'] = '4'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Friday', 'DayOfWeekClaimed'] = '5'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Saturday', 'DayOfWeekClaimed'] = '6'
oracle_data.loc[oracle_data['DayOfWeekClaimed'] == 'Sunday', 'DayOfWeekClaimed'] = '7'

oracle_data.head()

Unnamed: 0,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,Fault,VehicleCategory,VehiclePrice,FraudFound_P,Deductible,PastNumberOfClaims,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,AddressChange_Claim,NumberOfCars,BasePolicy
0,Urban,2,1,1,Female,Single,Policy Holder,Sport,more than 69000,0,300,none,3,26 to 30,No,No,External,1 year,3 to 4,Liability
1,Urban,1,1,4,Male,Single,Policy Holder,Sport,more than 69000,0,400,none,6,31 to 35,Yes,No,External,no change,1 vehicle,Collision
2,Urban,4,11,2,Male,Married,Policy Holder,Sport,more than 69000,0,400,1,7,41 to 50,No,No,External,no change,1 vehicle,Collision
3,Rural,5,7,1,Male,Married,Third Party,Sport,20000 to 29000,0,400,1,8,51 to 65,Yes,No,External,no change,1 vehicle,Liability
4,Urban,2,2,2,Female,Single,Third Party,Sport,more than 69000,0,400,none,5,31 to 35,No,No,External,no change,1 vehicle,Collision


In [16]:
#View Column types
oracle_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15419 entries, 0 to 15419
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   AccidentArea         15419 non-null  object
 1   DayOfWeekClaimed     15419 non-null  object
 2   MonthClaimed         15419 non-null  object
 3   WeekOfMonthClaimed   15419 non-null  int64 
 4   Sex                  15419 non-null  object
 5   MaritalStatus        15419 non-null  object
 6   Fault                15419 non-null  object
 7   VehicleCategory      15419 non-null  object
 8   VehiclePrice         15419 non-null  object
 9   FraudFound_P         15419 non-null  int64 
 10  Deductible           15419 non-null  int64 
 11  PastNumberOfClaims   15419 non-null  object
 12  AgeOfVehicle         15419 non-null  object
 13  AgeOfPolicyHolder    15419 non-null  object
 14  PoliceReportFiled    15419 non-null  object
 15  WitnessPresent       15419 non-null  object
 16  Agen

In [17]:
# Convert newly numerical columns to integers
oracle_data['DayOfWeekClaimed'] = oracle_data['DayOfWeekClaimed'].astype('int')
oracle_data['MonthClaimed'] = oracle_data['MonthClaimed'].astype('int')
oracle_data['AgeOfVehicle'] = oracle_data['AgeOfVehicle'].astype('int')
oracle_data.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15419 entries, 0 to 15419
Data columns (total 20 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   AccidentArea         15419 non-null  object
 1   DayOfWeekClaimed     15419 non-null  int32 
 2   MonthClaimed         15419 non-null  int32 
 3   WeekOfMonthClaimed   15419 non-null  int64 
 4   Sex                  15419 non-null  object
 5   MaritalStatus        15419 non-null  object
 6   Fault                15419 non-null  object
 7   VehicleCategory      15419 non-null  object
 8   VehiclePrice         15419 non-null  object
 9   FraudFound_P         15419 non-null  int64 
 10  Deductible           15419 non-null  int64 
 11  PastNumberOfClaims   15419 non-null  object
 12  AgeOfVehicle         15419 non-null  int32 
 13  AgeOfPolicyHolder    15419 non-null  object
 14  PoliceReportFiled    15419 non-null  object
 15  WitnessPresent       15419 non-null  object
 16  Agen

In [18]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

In [19]:
# Create a list of the columns with categorical variables
categorical_variables = ['AccidentArea', 'Sex', 'MaritalStatus', 'Fault', 'VehicleCategory', 'VehiclePrice', 
                        'PastNumberOfClaims', 'AgeOfPolicyHolder', 'PoliceReportFiled', 'WitnessPresent', 
                        'AgentType', 'AddressChange_Claim', 'NumberOfCars', 'BasePolicy']

# Use the fit_transform method from the OneHotEncoder to encode the categorical variables
encoded_data = enc.fit_transform(oracle_data[categorical_variables])

In [20]:
# Create a DataFrame with the encoded variables
encoded_variables = pd.DataFrame(encoded_data,
    columns = enc.get_feature_names(categorical_variables))

encoded_variables.head()



Unnamed: 0,AccidentArea_Rural,AccidentArea_Urban,Sex_Female,Sex_Male,MaritalStatus_Divorced,MaritalStatus_Married,MaritalStatus_Single,MaritalStatus_Widow,Fault_Policy Holder,Fault_Third Party,...,AddressChange_Claim_no change,AddressChange_Claim_under 6 months,NumberOfCars_1 vehicle,NumberOfCars_2 vehicles,NumberOfCars_3 to 4,NumberOfCars_5 to 8,NumberOfCars_more than 8,BasePolicy_All Perils,BasePolicy_Collision,BasePolicy_Liability
0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0
1,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
3,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,...,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [21]:
# Seperate the numerical columns and concat with encoded features
numerical_variables = oracle_data[['DayOfWeekClaimed', 'MonthClaimed', 'WeekOfMonthClaimed', 
                                  'Deductible', 'AgeOfVehicle', 'FraudFound_P']]
encoded_oracle = pd.concat([encoded_variables, numerical_variables], axis=1)
encoded_oracle.dropna(inplace=True)
encoded_oracle.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15418 entries, 0 to 15418
Data columns (total 57 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   AccidentArea_Rural                  15418 non-null  float64
 1   AccidentArea_Urban                  15418 non-null  float64
 2   Sex_Female                          15418 non-null  float64
 3   Sex_Male                            15418 non-null  float64
 4   MaritalStatus_Divorced              15418 non-null  float64
 5   MaritalStatus_Married               15418 non-null  float64
 6   MaritalStatus_Single                15418 non-null  float64
 7   MaritalStatus_Widow                 15418 non-null  float64
 8   Fault_Policy Holder                 15418 non-null  float64
 9   Fault_Third Party                   15418 non-null  float64
 10  VehicleCategory_Sedan               15418 non-null  float64
 11  VehicleCategory_Sport               15418

In [22]:
# Assign X and y variables
y = encoded_oracle['FraudFound_P']
X = encoded_oracle.copy()
X.drop(columns='FraudFound_P', inplace=True)
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15418 entries, 0 to 15418
Data columns (total 56 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   AccidentArea_Rural                  15418 non-null  float64
 1   AccidentArea_Urban                  15418 non-null  float64
 2   Sex_Female                          15418 non-null  float64
 3   Sex_Male                            15418 non-null  float64
 4   MaritalStatus_Divorced              15418 non-null  float64
 5   MaritalStatus_Married               15418 non-null  float64
 6   MaritalStatus_Single                15418 non-null  float64
 7   MaritalStatus_Widow                 15418 non-null  float64
 8   Fault_Policy Holder                 15418 non-null  float64
 9   Fault_Third Party                   15418 non-null  float64
 10  VehicleCategory_Sedan               15418 non-null  float64
 11  VehicleCategory_Sport               15418

In [23]:
# Splitting into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [24]:
# Create instance for scaler
scaler = StandardScaler()

In [25]:
# Fit the scaler with X training info
X_scaler = scaler.fit(X_train)

In [26]:
# Transform the X data with scaler
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [27]:
# Imports for Running LR Model and reports
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.linear_model import LogisticRegression

In [28]:
# Create model instance
logistic_regression_model = LogisticRegression()

In [29]:
# Fit model
lr_model = logistic_regression_model.fit(X_train_scaled, y_train)

In [30]:
# Predict for Training Values
training_predictions = lr_model.predict(X_train_scaled)

In [31]:
# Create Dataframe to hold info
y_train_predictions = pd.DataFrame({'Predictions':training_predictions, 'Actual': y_train})
y_train_predictions.head()

Unnamed: 0,Predictions,Actual
9570,0.0,0.0
2863,0.0,1.0
6561,0.0,0.0
12879,0.0,0.0
7433,0.0,0.0


In [32]:
# View Confusion Matrix for Model compared to Training Data
train_conf_matrix = confusion_matrix(y_train, training_predictions)
train_conf_matrix

array([[10855,     1],
       [  705,     2]], dtype=int64)

In [33]:
# Put Confusion matrix into Dataframe
train_matrix = pd.DataFrame(train_conf_matrix, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
train_matrix

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,10855,1
Actual 1,705,2


In [34]:
# View Classification Report for Training Predictions
print(classification_report(y_train, training_predictions))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97     10856
         1.0       0.67      0.00      0.01       707

    accuracy                           0.94     11563
   macro avg       0.80      0.50      0.49     11563
weighted avg       0.92      0.94      0.91     11563



In [35]:
# Predict for testing values
testing_predictions = lr_model.predict(X_test_scaled)

# Review the predictions
testing_predictions

array([0., 0., 0., ..., 0., 0., 0.])

In [36]:
# Put Testing Predictions into Dataframe
y_test_predictions = pd.DataFrame({'Predictions':testing_predictions, 'Actual': y_test})
y_test_predictions.head()

Unnamed: 0,Predictions,Actual
11445,0.0,0.0
9417,0.0,0.0
8077,0.0,0.0
12321,0.0,0.0
2142,0.0,1.0


In [37]:
# View Confusion Matrix for Model compared to test Data
test_conf_matrix = confusion_matrix(y_test, testing_predictions)
test_conf_matrix

array([[3640,    0],
       [ 214,    1]], dtype=int64)

In [38]:
# Put Confusion matrix into Dataframe
test_matrix = pd.DataFrame(test_conf_matrix, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
test_matrix

Unnamed: 0,Predicted 0,Predicted 1
Actual 0,3640,0
Actual 1,214,1


In [39]:
# View Classification Report for Testing Predictions
print(classification_report(y_test, testing_predictions))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97      3640
         1.0       1.00      0.00      0.01       215

    accuracy                           0.94      3855
   macro avg       0.97      0.50      0.49      3855
weighted avg       0.95      0.94      0.92      3855



In [44]:
# View Model Accuracy Scores
print(f'Training Data Accuracy Score: {lr_model.score(X_train_scaled, y_train)}')
print(f'Testing Data Accuracy Score: {lr_model.score(X_test_scaled, y_test)}')

Training Data Accuracy Score: 0.9389431808354234
Testing Data Accuracy Score: 0.9444876783398184
