In [1]:
from data_handler import DataHandler
from models import RandomForestModel, LogisticRegressionModel, KNNModel, SVMModel, BaggingModel, AdaBoostModel, XGBoostModel
from trainer import Trainer
import config
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
data_handler = DataHandler(config.DATA_PATH, config.DATA_PATH_TEST)
data_handler.load_data()
data_handler.split_data()
data_handler.preprocess_data()

## Random Forest Model

In [4]:
rand_f_model = RandomForestModel(**config.MODEL_PARAMS['random_forest'])
rand_f_trainer = Trainer(rand_f_model)

rand_f_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [5]:
rand_f_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8227604800490858,
 'recall': 0.8241999999999999,
 'f1': 0.8200326046256479,
 'accuracy': 0.8242,
 'roc_auc': 0.9023333333333332}

#### Evaluating the model on the train data

In [6]:
rand_f_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 0.8613971446019754,
 'recall': 0.86065,
 'f1': 0.8583732400619779,
 'accuracy': 0.86065,
 'roc_auc': 0.9225833333333332}

# Logistic Regression Model

In [7]:
log_reg_model = LogisticRegressionModel(**config.MODEL_PARAMS['logistic_regression'])
log_reg_trainer = Trainer(log_reg_model)

log_reg_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [8]:
log_reg_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8570096053780674,
 'recall': 0.8584999999999999,
 'f1': 0.8576099108558919,
 'accuracy': 0.8585,
 'roc_auc': 0.9213888888888888}

#### Evaluating the model on the train data

In [9]:
log_reg_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 0.8648031893880063,
 'recall': 0.8658333333333333,
 'f1': 0.8650701022548735,
 'accuracy': 0.8658333333333333,
 'roc_auc': 0.9254629629629629}

# KNN Model

In [10]:
knn_model = KNNModel(**config.MODEL_PARAMS['knn'])
knn_trainer = Trainer(knn_model)

knn_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [11]:
knn_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8710089050994853,
 'recall': 0.8696999999999999,
 'f1': 0.8697556138054349,
 'accuracy': 0.8697,
 'roc_auc': 0.9276111111111109}

#### Evaluating the model on the train data

In [12]:
knn_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'accuracy': 1.0, 'roc_auc': 1.0}

## SVM Model

In [32]:
svm_model = SVMModel(**config.MODEL_PARAMS['svm'])
svm_trainer = Trainer(svm_model)

svm_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test dataes

In [33]:
svm_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8726269840201184,
 'recall': 0.8576,
 'f1': 0.8595499950477622,
 'accuracy': 0.8576,
 'roc_auc': 0.9208888888888888}

#### Evaluating the model on the train data

In [34]:
svm_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 0.9999666722212964,
 'recall': 0.9999666666666667,
 'f1': 0.999966665277199,
 'accuracy': 0.9999666666666667,
 'roc_auc': 0.9999814814814816}

## Bagging Model

In [16]:
bagging_model = BaggingModel(**config.MODEL_PARAMS['bagging'])
bagging_trainer = Trainer(bagging_model)

bagging_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [17]:
bagging_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8707589105057192,
 'recall': 0.8695,
 'f1': 0.8695077848638734,
 'accuracy': 0.8695,
 'roc_auc': 0.9274999999999999}

#### Evaluating the model on the train data

In [18]:
bagging_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 0.9937876915574944,
 'recall': 0.9937833333333332,
 'f1': 0.993780478722709,
 'accuracy': 0.9937833333333334,
 'roc_auc': 0.9965462962962963}

## AdaBoost Model

In [19]:
ada_boost_model = AdaBoostModel(**config.MODEL_PARAMS['adaboost'])
ada_boost_trainer = Trainer(ada_boost_model)

ada_boost_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [25]:
ada_boost_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.6046877847377514,
 'recall': 0.5828,
 'f1': 0.5541333917987312,
 'accuracy': 0.5828,
 'roc_auc': 0.7682222222222221}

#### Evaluating the model on the train data

In [26]:
ada_boost_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 0.6022987020995517,
 'recall': 0.5805333333333333,
 'f1': 0.5545004110732009,
 'accuracy': 0.5805333333333333,
 'roc_auc': 0.766962962962963}

## XGBoost Model

In [28]:
xgboost_model = XGBoostModel(**config.MODEL_PARAMS['xgboost'])
xgboost_trainer = Trainer(xgboost_model)

xgboost_trainer.train(data_handler.X_train, data_handler.y_train)

#### Evaluating the model on the test data

In [29]:
xgboost_trainer.evaluate(data_handler.X_test, data_handler.y_test)

{'precision': 0.8843190686844322,
 'recall': 0.8850000000000001,
 'f1': 0.8843818201671407,
 'accuracy': 0.885,
 'roc_auc': 0.9361111111111112}

#### Evaluating the model on the train data

In [30]:
xgboost_trainer.evaluate(data_handler.X_train, data_handler.y_train)

{'precision': 1.0, 'recall': 1.0, 'f1': 1.0, 'accuracy': 1.0, 'roc_auc': 1.0}