# Machine Learning

## Imports

### Python imports

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from imblearn.over_sampling import SMOTE
import time

### Google drive mount

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Code

### Prepar [Credit card dataset](https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud)

In [5]:
google_drive_path = '/content/drive/MyDrive/creditcard.csv'
local_path = '../data/creditcard.csv'
data = pd.read_csv(google_drive_path)

In [6]:
# Separar características e rótulo
X = data.drop('Class', axis=1)
y = data['Class']

In [7]:
# Balanceamento das classes usando SMOTE
smote = SMOTE(random_state=42)
X_res, y_res = smote.fit_resample(X, y)

In [8]:
# Divisão dos dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_res, y_res, test_size=0.3, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

### Main function

In [10]:
# Função para treinar e avaliar os modelos
def evaluate_model(model, X_train, X_test, y_train, y_test):
    start_time = time.time()
    model.fit(X_train, y_train)
    end_time = time.time()

    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    execution_time = end_time - start_time

    return accuracy, precision, recall, f1, execution_time

### Test

In [11]:
# Dicionário para armazenar os resultados
results = {}

# Avaliação dos modelos
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'Gradient Boosting': GradientBoostingClassifier(),
    'SVM': SVC()
}

In [None]:
for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

### Separado

In [None]:
models = {
    'Logistic Regression': LogisticRegression()
}

for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

In [None]:
models = {
    'Decision Tree': DecisionTreeClassifier()
}

for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

In [None]:
models = {
    'Random Forest': RandomForestClassifier()
}

for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

In [None]:
models = {
    'Gradient Boosting': GradientBoostingClassifier()
}

for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df

In [None]:
models = {
    'SVM': SVC()
}

for name, model in models.items():
    accuracy, precision, recall, f1, exec_time = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Execution Time': exec_time
    }

# Exibir os resultados
results_df = pd.DataFrame(results).T
results_df