# Machine Learning

## Imports

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import time

## Code

### Prepar [Credit card dataset](https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud)

In [4]:
data = pd.read_csv('../data/creditcard.csv')

In [5]:
# Separar características e rótulo
X = data.drop('Class', axis=1)
y = data['Class']

In [6]:

# Balanceamento das classes usando SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

### Dividing the dataset

In [7]:

# Divisão dos dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Training and predict

In [15]:
# Dicionário para armazenar os resultados
results = {}

# Avaliação dos modelos
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    #'Random Forest': RandomForestClassifier(),
    #'Gradient Boosting': GradientBoostingClassifier(),
    #'SVM': SVC()
}

In [None]:

# Função para treinar e avaliar os modelos
def evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    execution_time = end_time - start_time

    return accuracy, precision, recall, f1, execution_time

In [16]:
for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

Unnamed: 0,Accuracy,Precision,Recall,F1 Score,Execution Time
Logistic Regression,0.980485,0.991089,0.969757,0.980307,2.262498
Decision Tree,0.998107,0.997243,0.998982,0.998111,46.811181
