In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np
from utils import print_score

In [2]:
data = pd.read_csv("creditcard.csv")
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [10]:
X = data.drop('Class', axis=1)
y = data['Class']

X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.20, random_state=42)

In [11]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.fit_transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [12]:
model = LogisticRegression(solver='liblinear', random_state=42)
model.fit(X_train_scaled, y_train)

print_score(model, X_train, y_train, X_val, y_val, train=True)
print_score(model, X_train, y_train, X_val, y_val, train=False)



Train Result:
Accuracy Score: 99.79%
_______________________________________________
CLASSIFICATION REPORT:
                       0      1  accuracy      macro avg   weighted avg
precision       0.998189    0.0  0.997888       0.499095       0.996382
recall          0.999698    0.0  0.997888       0.499849       0.997888
f1-score        0.998943    0.0  0.997888       0.499471       0.997134
support    181946.000000  330.0  0.997888  182276.000000  182276.000000
_______________________________________________
Confusion Matrix: 
 [[181891     55]
 [   330      0]]





Validation Result:
Accuracy Score: 99.82%
_______________________________________________
CLASSIFICATION REPORT:
                      0          1  accuracy     macro avg  weighted avg
precision      0.998617   0.045455  0.998157      0.522036      0.997278
recall         0.999539   0.015625  0.998157      0.507582      0.998157
f1-score       0.999077   0.023256  0.998157      0.511167      0.997707
support    45505.000000  64.000000  0.998157  45569.000000  45569.000000
_______________________________________________
Confusion Matrix: 
 [[45484    21]
 [   63     1]]



In [17]:
from imblearn.under_sampling import RandomUnderSampler
print("Distribución original:", np.bincount(y_train))

undersampler = RandomUnderSampler(sampling_strategy=1, random_state=42)  # 1:1 ratio
X_under, y_under = undersampler.fit_resample(X_train_scaled, y_train)

print("Distribución después de undersampling:", np.bincount(y_under))

model_under = LogisticRegression(solver='liblinear', random_state=42)
model_under.fit(X_under, y_under)

print_score(model_under, X_under, y_under, X_val, y_val, train=True)
print_score(model_under, X_train, y_train, X_val, y_val, train=False)

Distribución original: [181946    330]
Distribución después de undersampling: [330 330]
Train Result:
Accuracy Score: 95.00%
_______________________________________________
CLASSIFICATION REPORT:
                    0           1  accuracy   macro avg  weighted avg
precision    0.920680    0.983713      0.95    0.952197      0.952197
recall       0.984848    0.915152      0.95    0.950000      0.950000
f1-score     0.951684    0.948195      0.95    0.949939      0.949939
support    330.000000  330.000000      0.95  660.000000    660.000000
_______________________________________________
Confusion Matrix: 
 [[325   5]
 [ 28 302]]

Validation Result:
Accuracy Score: 99.84%
_______________________________________________
CLASSIFICATION REPORT:
                      0     1  accuracy     macro avg  weighted avg
precision      0.998595   0.0  0.998376      0.499298      0.997193
recall         0.999780   0.0  0.998376      0.499890      0.998376
f1-score       0.999187   0.0  0.998376      



In [16]:
from sklearn.svm import SVC

model = SVC(kernel='rbf', random_state=42)
model.fit(X_train_scaled, y_train)

print_score(model, X_train_scaled, y_train, X_val, y_val, train=True)
print_score(model, X_train, y_train, X_val, y_val, train=False)

Train Result:
Accuracy Score: 99.96%
_______________________________________________
CLASSIFICATION REPORT:
                       0           1  accuracy      macro avg   weighted avg
precision       0.999670    0.978261  0.999638       0.988966       0.999632
recall          0.999967    0.818182  0.999638       0.909074       0.999638
f1-score        0.999819    0.891089  0.999638       0.945454       0.999622
support    181946.000000  330.000000  0.999638  182276.000000  182276.000000
_______________________________________________
Confusion Matrix: 
 [[181940      6]
 [    60    270]]





Validation Result:
Accuracy Score: 99.86%
_______________________________________________
CLASSIFICATION REPORT:
                      0     1  accuracy     macro avg  weighted avg
precision      0.998596   0.0  0.998596      0.499298      0.997193
recall         1.000000   0.0  0.998596      0.500000      0.998596
f1-score       0.999297   0.0  0.998596      0.499649      0.997894
support    45505.000000  64.0  0.998596  45569.000000  45569.000000
_______________________________________________
Confusion Matrix: 
 [[45505     0]
 [   64     0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
