# SFA - BDS Experiment

Balanced data set using the random undersampling technique with 1,652 records (826 positive cases and 826 negative cases).

In [None]:
import os
import sys
from dotenv import load_dotenv

load_dotenv(override=True)
print('ENV variables loaded successfully!')

module_path = os.path.abspath(os.path.join('../../..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from lib.env_var_keys import EnvVarKeys

pre_processed_dataset_path = os.getenv(EnvVarKeys.PRE_PROCESSED_DATASET_PATH_KEY.value)
df = pd.read_csv(pre_processed_dataset_path, sep=',', low_memory=False)

print(f'Pre-processed dataset shape: {df.shape}')

RANDOM_STATE = 28

In [None]:
from lib.dataframe_helper import vdrl_count

vdrl_count(df)

In [None]:
from sklearn.model_selection import train_test_split
from imblearn.under_sampling import RandomUnderSampler

from lib.dataframe_helper import fill_nan

df = df.drop(columns=['empty_count', 'empty_columns'], axis=1)
print(f'Shape: {df.shape}')

vdrl_count(df)

df = fill_nan(df)

# Create X and y
X = np.array(df.drop('VDRL_RESULT', axis=1))
y = np.array(df['VDRL_RESULT'])

undersampler = RandomUnderSampler(sampling_strategy='not minority', random_state=RANDOM_STATE)
X, y = undersampler.fit_resample(X, y)

print(f'\nShape after undersampling: ({X.shape[0]}, {X.shape[1] + 1})')

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)

train_unique, train_counts = np.unique(y_train, return_counts=True)
test_unique, test_counts = np.unique(y_test, return_counts=True)
print(f'Shape after splitting: train={X_train.shape} [0 = {train_counts[0]}, 1 = {train_counts[1]}] | test={X_test.shape} [0 = {test_counts[0]}, 1 = {test_counts[1]}]')

feature_names = df.drop('VDRL_RESULT', axis=1).columns.to_list()

## Telegram bot:

In [None]:
import telebot

from lib.classifier_helper import ClassifierHelper

telegram_bot_token_key = EnvVarKeys.TELEGRAM_TOKEN_KEY_A.value
clf_helper = ClassifierHelper(X_train, X_test, y_train, y_test, feature_names, True, True, telegram_bot_token_key, EnvVarKeys.TELEGRAM_CHAT_ID_KEY_A.value)

telegram_token = os.getenv(telegram_bot_token_key)
bot = telebot.TeleBot(telegram_token)

server_name = ''
exp_num = '4.2'
server_id = f'Bot A - {server_name} - {exp_num}'
running_clf = 'None'


@bot.message_handler(commands=[f'info'])
def print_info(message):
    bot.reply_to(message, f'Server - {server_id} | Executando {running_clf}')


@bot.message_handler(commands=[f'random_forest'])
def run_random_forest(message):
    bot.reply_to(message, 'Starting Random Forest...')
    global running_clf
    running_clf = 'Random Forest'
    clf_helper.exec_random_forest()
    running_clf = 'None'


@bot.message_handler(commands=[f'knn'])
def run_knn(message):
    bot.reply_to(message, 'Starting KNN...')
    global running_clf
    running_clf = 'KNN'
    clf_helper.exec_knn()
    running_clf = 'None'


@bot.message_handler(commands=[f'decision_tree'])
def run_decision_tree(message):
    bot.reply_to(message, 'Starting Decision Tree...')
    global running_clf
    running_clf = 'Decision Tree'
    clf_helper.exec_decision_tree()
    running_clf = 'None'


@bot.message_handler(commands=[f'ada'])
def run_ada(message):
    bot.reply_to(message, 'Starting AdaBoost...')
    global running_clf
    running_clf = 'AdaBoost'
    clf_helper.exec_ada_boost()
    running_clf = 'None'


@bot.message_handler(commands=[f'gradient'])
def run_gradient(message):
    bot.reply_to(message, 'Starting Gradient Boosting...')
    global running_clf
    running_clf = 'Gradient Boosting'
    clf_helper.exec_gradient_boosting()
    running_clf = 'None'


@bot.message_handler(commands=[f'svm'])
def run_svm(message):
    bot.reply_to(message, 'Starting SMV...')
    global running_clf
    running_clf = 'SVM'
    clf_helper.exec_svm()
    running_clf = 'None'


@bot.message_handler(commands=[f'mlp'])
def run_mlp(message):
    bot.reply_to(message, 'Starting MLP...')
    global running_clf
    running_clf = 'MLP'
    clf_helper.exec_mlp()
    running_clf = 'None'


@bot.message_handler(commands=[f'logistic'])
def run_logistic(message):
    bot.reply_to(message, 'Starting Logistic Regression...')
    global running_clf
    running_clf = 'Logistic Regression'
    clf_helper.exec_logistic_regression()
    running_clf = 'None'


@bot.message_handler(commands=[f'xgboost'])
def run_xgboost(message):
    bot.reply_to(message, 'Starting XGBoost...')
    global running_clf
    running_clf = 'XGBoost'
    clf_helper.exec_xgboost()
    running_clf = 'None'


@bot.message_handler(commands=[f'start'])
def send_menu(message):
    menu_text = f'''
    Hello, which ML technique do you want to execute?

    /random_forest - Random Forest
    /knn - KNN
    /decision_tree - Decision Tree
    /ada - AdaBoost
    /gradient - Gradient Boosting
    /svm - SVM
    /mlp - MLP
    /logistic - Logistic Regression
    /xgboost - XGBoost

To check which ML technique is running, type /info'''
    bot.reply_to(message, menu_text)


bot.infinity_polling()


## Local:

In [None]:
from lib.classifier_helper import ClassifierHelper

telegram_bot_token_key = EnvVarKeys.TELEGRAM_TOKEN_KEY_LOCAL.value
clf_helper = ClassifierHelper(X_train, X_test, y_train, y_test, feature_names, True, True, telegram_bot_token_key, EnvVarKeys.TELEGRAM_CHAT_ID_KEY_LOCAL.value)

# clf_helper.exec_random_forest()

# clf_helper.exec_knn()

# clf_helper.exec_decision_tree()

# clf_helper.exec_ada_boost()

# clf_helper.exec_gradient_boosting()

# clf_helper.exec_svm()

# clf_helper.exec_mlp()

# clf_helper.exec_logistic_regression()

# clf_helper.exec_xgboost()

clf_helper.exec_xgboost_gpu()