In [183]:
# Import the data
import pandas as pd
import numpy as np
from sklearn_pandas import DataFrameMapper
from sklearn.model_selection import train_test_split
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, StandardScaler
from sklearn.svm import SVC 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import AdaBoostClassifier
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import TomekLinks
from imblearn.combine import SMOTETomek
from sklearn.metrics import classification_report



# from ml_utils import train_test_split_marketing,\
#     fill_missing,\
#     build_encoders,\
#     encode_categorical,\
#     build_target_encoder,\
#     encode_target

df = pd.read_csv('vehicle_insurance_claim_fraud.csv').rename(columns={'FraudFound_P':'y'})
display(df.head())
display(df.info())

df_columns = df.columns

Unnamed: 0,Month,WeekOfMonth,DayOfWeek,Make,AccidentArea,DayOfWeekClaimed,MonthClaimed,WeekOfMonthClaimed,Sex,MaritalStatus,...,AgeOfVehicle,AgeOfPolicyHolder,PoliceReportFiled,WitnessPresent,AgentType,NumberOfSuppliments,AddressChange_Claim,NumberOfCars,Year,BasePolicy
0,Dec,5,Wednesday,Honda,Urban,Tuesday,Jan,1,Female,Single,...,3 years,26 to 30,No,No,External,none,1 year,3 to 4,1994,Liability
1,Jan,3,Wednesday,Honda,Urban,Monday,Jan,4,Male,Single,...,6 years,31 to 35,Yes,No,External,none,no change,1 vehicle,1994,Collision
2,Oct,5,Friday,Honda,Urban,Thursday,Nov,2,Male,Married,...,7 years,41 to 50,No,No,External,none,no change,1 vehicle,1994,Collision
3,Jun,2,Saturday,Toyota,Rural,Friday,Jul,1,Male,Married,...,more than 7,51 to 65,Yes,No,External,more than 5,no change,1 vehicle,1994,Liability
4,Jan,5,Monday,Honda,Urban,Tuesday,Feb,2,Female,Single,...,5 years,31 to 35,No,No,External,none,no change,1 vehicle,1994,Collision


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15420 entries, 0 to 15419
Data columns (total 33 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Month                 15420 non-null  object
 1   WeekOfMonth           15420 non-null  int64 
 2   DayOfWeek             15420 non-null  object
 3   Make                  15420 non-null  object
 4   AccidentArea          15420 non-null  object
 5   DayOfWeekClaimed      15420 non-null  object
 6   MonthClaimed          15420 non-null  object
 7   WeekOfMonthClaimed    15420 non-null  int64 
 8   Sex                   15420 non-null  object
 9   MaritalStatus         15420 non-null  object
 10  Age                   15420 non-null  int64 
 11  Fault                 15420 non-null  object
 12  PolicyType            15420 non-null  object
 13  VehicleCategory       15420 non-null  object
 14  VehiclePrice          15420 non-null  object
 15  y                     15420 non-null

None

In [184]:
# Split the data into training and testing sets
X = df.drop(columns='y')
y = df['y'].values.reshape(-1,1)

# Show the X_train dataset
X_train, X_test, y_train, y_test = train_test_split(X,y)
display(X_train.describe())

# How balanced is the data?
values, counts = np.unique(y, return_counts=True)
print(f"There are {counts[0]} non-fraudulent rows, and {counts[1]} fraud rows.")


Unnamed: 0,WeekOfMonth,WeekOfMonthClaimed,Age,PolicyNumber,RepNumber,Deductible,DriverRating,Year
count,11565.0,11565.0,11565.0,11565.0,11565.0,11565.0,11565.0,11565.0
mean,2.773973,2.694423,39.87981,7709.675054,8.508863,407.661046,2.480761,1994.865283
std,1.289179,1.258045,13.505911,4432.210002,4.594057,44.013094,1.121277,0.800818
min,1.0,1.0,0.0,1.0,1.0,300.0,1.0,1994.0
25%,2.0,2.0,31.0,3884.0,5.0,400.0,1.0,1994.0
50%,3.0,3.0,38.0,7717.0,9.0,400.0,2.0,1995.0
75%,4.0,4.0,49.0,11536.0,12.0,400.0,3.0,1996.0
max,5.0,5.0,80.0,15419.0,16.0,700.0,4.0,1996.0


There are 14497 non-fraudulent rows, and 923 fraud rows.


In [185]:
# Test for null values, should be 15420 rows
X.dropna().count().head(1)


Month    15420
dtype: int64

## Encoding the Data

In [186]:
# Encode the object columns, scale the numeric columns

def encode_feature(dataframe, feature, Model) :
    return pd.Series(np.ravel(Model.fit_transform(dataframe[feature].values.reshape(-1,1))), name=feature)

def encode_multi(dataframe, feature, Model) :
    encoder = Model.fit(dataframe[feature].values.reshape(-1, 1))
    return pd.DataFrame(encoder.transform(dataframe[feature].values.reshape(-1, 1)), columns=OneHotColumnNames(feature, encoder))

def OneHotColumnNames(feature, column) : 
    column_names = column.get_feature_names_out()
    output = []
    for column_name in column_names : 
        output.append(feature+" "+column_name)
    return output

    

In [187]:

def encode_fraud_data( unencoded_dataframe ) :

    ORD = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-1)
    SSC = StandardScaler()
    OHE = OneHotEncoder(handle_unknown='ignore', sparse_output=False)

    output = pd.DataFrame()

    output = pd.concat([output, encode_feature(unencoded_dataframe, "Month", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "WeekOfMonth", SSC)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "DayOfWeek", ORD)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "Make", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "AccidentArea", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "DayOfWeekClaimed", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "MonthClaimed", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "WeekOfMonthClaimed", SSC)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "Sex", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "MaritalStatus", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "Age", SSC)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "Fault", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "PolicyType", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "VehicleCategory", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "VehiclePrice", ORD)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "RepNumber", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "Deductible", SSC)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "DriverRating", SSC)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "Days_Policy_Accident", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "Days_Policy_Claim", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "PastNumberOfClaims", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "AgeOfVehicle", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "AgeOfPolicyHolder", ORD)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "PoliceReportFiled", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "WitnessPresent", OHE)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "AgentType", OHE)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "NumberOfSuppliments", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "AddressChange_Claim", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "NumberOfCars", ORD)], axis="columns")
    output = pd.concat([output, encode_feature(unencoded_dataframe, "Year", SSC)], axis="columns")
    output = pd.concat([output, encode_multi(unencoded_dataframe, "BasePolicy", OHE)], axis="columns")

    return output



In [188]:

# Encode the data
X_train_encoded = pd.DataFrame()
X_train_encoded = encode_fraud_data(X_train)
X_test_encoded = encode_fraud_data(X_test)

# Sometimes the train and test columns don't quite match up because of the 
# extra columns generated after a OneHotEncoding, so I have to do this to get 
# them to match.
X_test_encoded = X_test_encoded.reindex(columns=X_train_encoded.columns)
X_test_encoded = X_test_encoded.fillna(0)

# Flatten the y series out.  It's already "encoded" because its just 1 or 0.
y_train_flattened = np.ravel(y_train)
y_test_flattened = np.ravel(y_test)


## Examining Correlation to Determine Valuable Features

In [189]:
# Compute the correlation matrix
corr_matrix = X_train_encoded.corr()

# Unstack the correlation matrix
corr_unstacked = corr_matrix.unstack()

# Filter out self-correlations
corr_unstacked = corr_unstacked[corr_unstacked != 1]

# Sort the correlations by absolute value
sorted_corr = corr_unstacked.abs().sort_values(ascending=True)

# Display the top correlations
top_correlations = sorted_corr.head(25)

print("Top correlations:")
print(top_correlations)

Top correlations:
DayOfWeek                   RepNumber x0_5                0.000010
RepNumber x0_5              DayOfWeek                     0.000010
MonthClaimed                Sex x0_Male                   0.000014
Sex x0_Male                 MonthClaimed                  0.000014
MonthClaimed                Sex x0_Female                 0.000014
Sex x0_Female               MonthClaimed                  0.000014
RepNumber x0_15             MaritalStatus x0_Divorced     0.000022
MaritalStatus x0_Divorced   RepNumber x0_15               0.000022
VehicleCategory x0_Utility  NumberOfCars                  0.000027
NumberOfCars                VehicleCategory x0_Utility    0.000027
RepNumber x0_16             AddressChange_Claim           0.000030
AddressChange_Claim         RepNumber x0_16               0.000030
Make x0_VW                  RepNumber x0_14               0.000041
RepNumber x0_14             Make x0_VW                    0.000041
Fault x0_Third Party        RepNumber x0_11 

## Balancing the Datasets

In [190]:
# Now we need to balance the datasets

# Oversampling: Apply SMOTE to the training data to expand the fraud cases
smote = SMOTE()

# Undersampling: Apply Tomek Links, removing samples from the majority class 
# that are closest to the minority class.
tomek = TomekLinks()

# Hybrid Sampling: SMOTETomek - the combination of both methods above
smote_tomek = SMOTETomek()

X_train_smote, y_train_smote = smote.fit_resample(X_train_encoded, y_train_flattened)
X_train_tomek, y_train_tomek = tomek.fit_resample(X_train_encoded, y_train_flattened)
X_train_smote_tomek, y_train_smote_tomek = smote_tomek.fit_resample(X_train_encoded, y_train_flattened)

# How balanced is the data?
values, counts = np.unique(y_train_smote, return_counts=True)
print(f"After using SMOTE balancing, there are {counts[0]} non-fraudulent rows, and {counts[1]} fraud rows.")
values, counts = np.unique(y_train_tomek, return_counts=True)
print(f"After using Tomek Links balancing, there are {counts[0]} non-fraudulent rows, and {counts[1]} fraud rows.")
values, counts = np.unique(y_train_smote_tomek, return_counts=True)
print(f"After using SMOTETomek Hybrid balancing, there are {counts[0]} non-fraudulent rows, and {counts[1]} fraud rows.")


After using SMOTE balancing, there are 10869 non-fraudulent rows, and 10869 fraud rows.
After using Tomek Links balancing, there are 10607 non-fraudulent rows, and 696 fraud rows.
After using SMOTETomek Hybrid balancing, there are 10869 non-fraudulent rows, and 10869 fraud rows.


## Running the data through an unbalanced model for reference.

In [191]:

Models = [SVC, 
          KNeighborsClassifier,
          DecisionTreeClassifier,
          RandomForestClassifier,
          ExtraTreesClassifier,
          GradientBoostingClassifier,
          AdaBoostClassifier]

for Model in Models :
    
    model_unbalanced = Model().fit(X_train_encoded, y_train_flattened)
    y_pred_unbalanced = model_unbalanced.predict(X_test_encoded)
    print(f"{str(Model.__name__)} - No Balancing")
    print("==========================================================")
    print( "balanced_accuracy: ", balanced_accuracy_score(y_test_flattened, y_pred_unbalanced))
    print( "train_accuracy:", model_unbalanced.score(X_train_encoded, y_train_flattened))
    print( "test_accuracy:", model_unbalanced.score(X_test_encoded, y_test_flattened))
    print()    
    print(classification_report(y_test_flattened, y_pred_unbalanced, labels=[0,1]))
    print()    



SVC - No Balancing
balanced_accuracy:  0.5
train_accuracy: 0.9398184176394293
test_accuracy: 0.9411154345006485

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.00      0.00      0.00       227

    accuracy                           0.94      3855
   macro avg       0.47      0.50      0.48      3855
weighted avg       0.89      0.94      0.91      3855


KNeighborsClassifier - No Balancing
balanced_accuracy:  0.5006866564022362


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


train_accuracy: 0.9418071768266321
test_accuracy: 0.9385214007782101

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.08      0.00      0.01       227

    accuracy                           0.94      3855
   macro avg       0.51      0.50      0.49      3855
weighted avg       0.89      0.94      0.91      3855


DecisionTreeClassifier - No Balancing
balanced_accuracy:  0.5696261092141882
train_accuracy: 1.0
test_accuracy: 0.8972762645914397

              precision    recall  f1-score   support

           0       0.95      0.94      0.95      3628
           1       0.17      0.20      0.19       227

    accuracy                           0.90      3855
   macro avg       0.56      0.57      0.57      3855
weighted avg       0.90      0.90      0.90      3855


RandomForestClassifier - No Balancing
balanced_accuracy:  0.5
train_accuracy: 1.0
test_accuracy: 0.9411154345006485

              precision  

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier - No Balancing
balanced_accuracy:  0.5038540184274051
train_accuracy: 1.0
test_accuracy: 0.9405966277561608

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.33      0.01      0.02       227

    accuracy                           0.94      3855
   macro avg       0.64      0.50      0.49      3855
weighted avg       0.91      0.94      0.91      3855


GradientBoostingClassifier - No Balancing
balanced_accuracy:  0.5104619479428235
train_accuracy: 0.9430177258971033
test_accuracy: 0.9413748378728923

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.56      0.02      0.04       227

    accuracy                           0.94      3855
   macro avg       0.75      0.51      0.51      3855
weighted avg       0.92      0.94      0.92      3855






AdaBoostClassifier - No Balancing
balanced_accuracy:  0.5030246880600713
train_accuracy: 0.935408560311284
test_accuracy: 0.9351491569390402

              precision    recall  f1-score   support

           0       0.94      0.99      0.97      3628
           1       0.10      0.01      0.02       227

    accuracy                           0.94      3855
   macro avg       0.52      0.50      0.49      3855
weighted avg       0.89      0.94      0.91      3855




## Testing the models with SMOTE Oversampling

In [192]:

for Model in Models :

    model_smote = Model().fit(X_train_smote, y_train_smote)
    y_pred_smote = model_smote.predict(X_test_encoded)
    print(f"{str(Model.__name__)} - SMOTE Oversampling")
    print("==========================================================")
    print( "balanced_accuracy: ", balanced_accuracy_score(y_test_flattened, y_pred_smote))
    print( "train_accuracy:", model_smote.score(X_train_smote, y_train_smote))
    print( "test_accuracy:", model_smote.score(X_test_encoded, y_test_flattened))
    print()    
    print(classification_report(y_test_flattened, y_pred_smote, labels=[0,1]))
    print()    


SVC - SMOTE Oversampling
balanced_accuracy:  0.7494129118117043
train_accuracy: 0.8254209218879381
test_accuracy: 0.6915693904020752

              precision    recall  f1-score   support

           0       0.98      0.68      0.81      3628
           1       0.14      0.81      0.24       227

    accuracy                           0.69      3855
   macro avg       0.56      0.75      0.52      3855
weighted avg       0.93      0.69      0.77      3855


KNeighborsClassifier - SMOTE Oversampling
balanced_accuracy:  0.6343096280034388
train_accuracy: 0.8736774312264238
test_accuracy: 0.6536964980544747

              precision    recall  f1-score   support

           0       0.96      0.66      0.78      3628
           1       0.10      0.61      0.17       227

    accuracy                           0.65      3855
   macro avg       0.53      0.63      0.48      3855
weighted avg       0.91      0.65      0.75      3855


DecisionTreeClassifier - SMOTE Oversampling
balanced_accura



AdaBoostClassifier - SMOTE Oversampling
balanced_accuracy:  0.6605531378558349
train_accuracy: 0.9512834667402705
test_accuracy: 0.7963683527885862

              precision    recall  f1-score   support

           0       0.96      0.81      0.88      3628
           1       0.15      0.51      0.23       227

    accuracy                           0.80      3855
   macro avg       0.55      0.66      0.55      3855
weighted avg       0.92      0.80      0.84      3855




## Testing the models with Tomek Links Undersampling.

In [193]:

for Model in Models :

    model_tomek = Model().fit(X_train_tomek, y_train_tomek)
    y_pred_tomek = model_tomek.predict(X_test_encoded)
    print(f"{str(Model.__name__)} - Tomek Undersampling")
    print("==========================================================")
    print( "balanced_accuracy: ", balanced_accuracy_score(y_test_flattened, y_pred_tomek))
    print( "train_accuracy:", model_tomek.score(X_train_tomek, y_train_tomek))
    print( "test_accuracy:", model_tomek.score(X_test_encoded, y_test_flattened))
    print()    
    print(classification_report(y_test_flattened, y_pred_tomek, labels=[0,1]))
    print()    


SVC - Tomek Undersampling
balanced_accuracy:  0.5
train_accuracy: 0.9384234274086526
test_accuracy: 0.9411154345006485

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.00      0.00      0.00       227

    accuracy                           0.94      3855
   macro avg       0.47      0.50      0.48      3855
weighted avg       0.89      0.94      0.91      3855


KNeighborsClassifier - Tomek Undersampling
balanced_accuracy:  0.5024758486368869


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


train_accuracy: 0.9417853667168008
test_accuracy: 0.9380025940337224

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.12      0.01      0.02       227

    accuracy                           0.94      3855
   macro avg       0.53      0.50      0.49      3855
weighted avg       0.89      0.94      0.91      3855


DecisionTreeClassifier - Tomek Undersampling
balanced_accuracy:  0.572104386344098
train_accuracy: 1.0
test_accuracy: 0.8980544747081712

              precision    recall  f1-score   support

           0       0.95      0.94      0.95      3628
           1       0.18      0.20      0.19       227

    accuracy                           0.90      3855
   macro avg       0.56      0.57      0.57      3855
weighted avg       0.90      0.90      0.90      3855


RandomForestClassifier - Tomek Undersampling
balanced_accuracy:  0.5
train_accuracy: 1.0
test_accuracy: 0.9411154345006485

            

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


ExtraTreesClassifier - Tomek Undersampling
balanced_accuracy:  0.5060566615992113
train_accuracy: 1.0
test_accuracy: 0.9408560311284047

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.43      0.01      0.03       227

    accuracy                           0.94      3855
   macro avg       0.69      0.51      0.50      3855
weighted avg       0.91      0.94      0.91      3855


GradientBoostingClassifier - Tomek Undersampling
balanced_accuracy:  0.5125267741355779
train_accuracy: 0.9417853667168008
test_accuracy: 0.9413748378728923

              precision    recall  f1-score   support

           0       0.94      1.00      0.97      3628
           1       0.55      0.03      0.05       227

    accuracy                           0.94      3855
   macro avg       0.74      0.51      0.51      3855
weighted avg       0.92      0.94      0.92      3855






AdaBoostClassifier - Tomek Undersampling
balanced_accuracy:  0.5028868710810194
train_accuracy: 0.9347075997522781
test_accuracy: 0.9348897535667964

              precision    recall  f1-score   support

           0       0.94      0.99      0.97      3628
           1       0.10      0.01      0.02       227

    accuracy                           0.93      3855
   macro avg       0.52      0.50      0.49      3855
weighted avg       0.89      0.93      0.91      3855




## Testing the models with SMOTETomek Hybrid Balancing

In [194]:

for Model in Models :

    model_smote_tomek = Model().fit(X_train_smote_tomek, y_train_smote_tomek)
    y_pred_smote_tomek = model_smote_tomek.predict(X_test_encoded)

    print(f"{str(Model.__name__)} - SMOTETomek Hybrid Balancing")
    print("==========================================================")
    print( "balanced_accuracy: ", balanced_accuracy_score(y_test_flattened, y_pred_smote_tomek))
    print( "train_accuracy:", model_smote_tomek.score(X_train_smote_tomek, y_train_smote_tomek))
    print( "test_accuracy:", model_smote_tomek.score(X_test_encoded, y_test_flattened))
    print()    
    print(classification_report(y_test_flattened, y_pred_smote_tomek, labels=[0,1]))
    print()    


SVC - SMOTETomek Hybrid Balancing
balanced_accuracy:  0.7462479782795584
train_accuracy: 0.8264329745146748
test_accuracy: 0.6933852140077821

              precision    recall  f1-score   support

           0       0.98      0.69      0.81      3628
           1       0.14      0.81      0.24       227

    accuracy                           0.69      3855
   macro avg       0.56      0.75      0.52      3855
weighted avg       0.93      0.69      0.77      3855


KNeighborsClassifier - SMOTETomek Hybrid Balancing
balanced_accuracy:  0.6220560593329416
train_accuracy: 0.8745974790689116
test_accuracy: 0.650064850843061

              precision    recall  f1-score   support

           0       0.96      0.65      0.78      3628
           1       0.10      0.59      0.17       227

    accuracy                           0.65      3855
   macro avg       0.53      0.62      0.47      3855
weighted avg       0.91      0.65      0.74      3855


DecisionTreeClassifier - SMOTETomek Hybrid



AdaBoostClassifier - SMOTETomek Hybrid Balancing
balanced_accuracy:  0.6256131944882922
train_accuracy: 0.9452571533719754
test_accuracy: 0.835538261997406

              precision    recall  f1-score   support

           0       0.96      0.86      0.91      3628
           1       0.15      0.39      0.22       227

    accuracy                           0.84      3855
   macro avg       0.55      0.63      0.56      3855
weighted avg       0.91      0.84      0.87      3855


