In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [3]:
# Cargar el dataset de tarjetas de crédito
data = pd.read_csv('/content/creditcard.csv')

# Explorar los primeros datos
print(data.head())


   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [4]:
# Tomar una muestra del dataset (10,000 filas)
data_sampled = data.sample(n=10000, random_state=42)

# Revisar el balance de clases
print(data_sampled['Class'].value_counts())


Class
0    9984
1      16
Name: count, dtype: int64


In [5]:
# Separar transacciones fraudulentas y no fraudulentas
fraud = data_sampled[data_sampled['Class'] == 1]
non_fraud = data_sampled[data_sampled['Class'] == 0].sample(n=len(fraud), random_state=42)

# Combinar de nuevo las transacciones en un dataset balanceado
balanced_data = pd.concat([fraud, non_fraud])

# Revisar el nuevo balance de clases
print(balanced_data['Class'].value_counts())


Class
1    16
0    16
Name: count, dtype: int64


In [6]:
# Separar las características y la etiqueta
X = balanced_data.drop('Class', axis=1)  # Eliminar la columna 'Class'
y = balanced_data['Class']  # La columna 'Class' es nuestra etiqueta

# Dividir en conjunto de entrenamiento (70%) y prueba (30%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [7]:
# Escalar los datos
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [8]:
# Probar con diferentes kernels
kernels = ['linear', 'rbf', 'poly']
for kernel in kernels:
    print(f"Entrenando SVM con kernel: {kernel}")

    # Ajustar hiperparámetros para rbf
    if kernel == 'rbf':
        model = SVC(kernel=kernel, random_state=42, gamma=0.1, C=1)
    else:
        model = SVC(kernel=kernel, random_state=42)

    # Entrenar el modelo
    model.fit(X_train_scaled, y_train)

    # Hacer predicciones
    y_pred = model.predict(X_test_scaled)

    # Calcular las métricas (con zero_division para manejar clases no predichas)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, zero_division=1)
    recall = recall_score(y_test, y_pred, zero_division=1)
    f1 = f1_score(y_test, y_pred, zero_division=1)

    # Mostrar los resultados
    print(f"Kernel: {kernel}")
    print(f"Exactitud: {accuracy}")
    print(f"Precisión: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}\n")


Entrenando SVM con kernel: linear
Kernel: linear
Exactitud: 0.7
Precisión: 0.75
Recall: 0.6
F1-Score: 0.6666666666666665

Entrenando SVM con kernel: rbf
Kernel: rbf
Exactitud: 0.7
Precisión: 0.6666666666666666
Recall: 0.8
F1-Score: 0.7272727272727272

Entrenando SVM con kernel: poly
Kernel: poly
Exactitud: 0.6
Precisión: 1.0
Recall: 0.2
F1-Score: 0.33333333333333337

