# EXPLORATION - NOT MAIN PORTFOLIO
# DATE: 8/2/26

# FINDINGS TO BE ADDED TO MAIN:

In [7]:
import pandas as pd

In [8]:
data = pd.read_csv('creditcard.csv')

data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


we are still using the raw dataset

### Setup

In [12]:
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import recall_score, classification_report, precision_score, f1_score
from sklearn.model_selection import train_test_split
import numpy as np

we use RF for quick test, to determine which feature is useful or not to the final model performance

In [11]:
X = data.drop('Class', axis=1)
y = data['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1)

don't forget stratify so the train & test class distribution match the original -> especially important for fraud where the imbalance is impecable

### Baseline Performance

In [13]:
#just scale the time & amount

scaler = StandardScaler()
X_train_scaled = X_train.copy()
X_test_scaled = X_test.copy()
X_train_scaled[['Time', 'Amount']] = scaler.fit_transform(X_train[['Time', 'Amount']])
X_test_scaled[['Time', 'Amount']] = scaler.fit_transform(X_test[['Time', 'Amount']])


model = RandomForestClassifier(n_estimators=100, random_state=1, class_weight='balanced', n_jobs=1)

model.fit(X_train_scaled, y_train)
y_pred = model.predict(X_test_scaled)

print('Baseline Performance')
print(classification_report(y_test, y_pred))

baseline_metrics = {
    'precision' : precision_score(y_test, y_pred),
    'recall' : recall_score(y_test, y_pred),
    'f1' : f1_score(y_test, y_pred)
}

Baseline Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.92      0.85      0.88        98

    accuracy                           1.00     56962
   macro avg       0.96      0.92      0.94     56962
weighted avg       1.00      1.00      1.00     56962



### Experiment 1: Amount - Scaling vs Log Transform

test if log transform the amount is useful or scaling is enough

In [14]:
X_train_v1 = X_train.copy()
X_test_v1 = X_test.copy()

X_train_v1['Amount'] = np.log1p(X_train['Amount'])
X_test_v1['Amount'] = np.log1p(X_test['Amount'])

scaler1 = StandardScaler()
X_train_v1[['Time', 'Amount']] = scaler1.fit_transform(X_train_v1[['Time', 'Amount']])
X_test_v1[['Time', 'Amount']] = scaler1.fit_transform(X_test_v1[['Time', 'Amount']])

model.fit(X_train_v1, y_train)
y_pred_v1 = model.predict(X_test_v1)

print('With Log Transformed Amount')
print(classification_report(y_test, y_pred_v1))

log_metrics = {
    'precision' : precision_score(y_test, y_pred_v1),
    'recall' : recall_score(y_test, y_pred_v1),
    'f1' : f1_score(y_test, y_pred_v1)
}

print("\nCOMPARISON:")
print(f"Baseline Recall: {baseline_metrics['recall']:.4f}")
print(f"Log Amount Recall: {log_metrics['recall']:.4f}")
print(f"Improvement: {log_metrics['recall'] - baseline_metrics['recall']:.4f}")

With Log Transformed Amount
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.92      0.85      0.88        98

    accuracy                           1.00     56962
   macro avg       0.96      0.92      0.94     56962
weighted avg       1.00      1.00      1.00     56962


COMPARISON:
Baseline Recall: 0.8469
Log Amount Recall: 0.8469
Improvement: 0.0000


log transforming the amount actually produces the same result as just scaling. Because of this, we only going to scale the data without transforming it

## Experiment 2: Time Based Feature

test if engineering hour-based features helps the performance

In [17]:
#add hour features
X_train_v2 = X_train.copy()
X_test_v2 = X_test.copy()

X_train_v2['hour'] = (X_train_v2['Time']/3600) % 24
X_test_v2['hour'] = (X_test_v2['Time']/3600) % 24

X_train_v2['is_high_risk_hour'] = ((X_train_v2['hour']>=2)&(X_train_v2['hour']>=4)).astype(int)
X_test_v2['is_high_risk_hour'] = ((X_test_v2['hour']>=2)&(X_test_v2['hour']>=4)).astype(int)

#we just scale the data based on exp 1
scaler2 = StandardScaler()
X_train_v2[['Time', 'Amount','hour']] = scaler2.fit_transform(X_train_v2[['Time', 'Amount','hour']])
X_test_v2[['Time', 'Amount','hour']] = scaler2.fit_transform(X_test_v2[['Time', 'Amount','hour']])

model.fit(X_train_v2, y_train)
y_pred_v2 = model.predict(X_test_v2)

print('With Time Features')
print(classification_report(y_test, y_pred_v2))

time_metrics = {
    'precision' : precision_score(y_test, y_pred_v2),
    'recall' : recall_score(y_test, y_pred_v2),
    'f1' : f1_score(y_test, y_pred_v2)
}

print("\nCOMPARISON:")
print(f"Baseline Recall: {baseline_metrics['recall']:.4f}")
print(f"Log Amount Recall: {time_metrics['recall']:.4f}")
print(f"Improvement: {time_metrics['recall'] - baseline_metrics['recall']:.4f}")

With Time Features
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.92      0.85      0.88        98

    accuracy                           1.00     56962
   macro avg       0.96      0.92      0.94     56962
weighted avg       1.00      1.00      1.00     56962


COMPARISON:
Baseline Recall: 0.8469
Log Amount Recall: 0.8469
Improvement: 0.0000


### Experiment 3: Amount-Based Features

test micro transactions flag and amount categories

In [18]:
#add amount features
X_train_v3 = X_train.copy()
X_test_v3 = X_test.copy()

X_train_v3['is_micro'] = (X_train_v3['Amount'] < 10).astype(int)
X_test_v3['is_micro'] = (X_test_v3['Amount'] < 10).astype(int)

X_train_v3[['Time', 'Amount']] = scaler.fit_transform(X_train_v3[['Time', 'Amount']])
X_test_v3[['Time', 'Amount']] = scaler.fit_transform(X_test_v3[['Time', 'Amount']])

model.fit(X_train_v3, y_train)
y_pred_v3 = model.predict(X_test_v3)

print('With Amount Features')
print(classification_report(y_test, y_pred_v3))

amount_metrics = {
    'precision' : precision_score(y_test, y_pred_v3),
    'recall' : recall_score(y_test, y_pred_v3),
    'f1' : f1_score(y_test, y_pred_v3)
}

print("\nCOMPARISON:")
print(f"Baseline Recall: {baseline_metrics['recall']:.4f}")
print(f"Log Amount Recall: {amount_metrics['recall']:.4f}")
print(f"Improvement: {amount_metrics['recall'] - baseline_metrics['recall']:.4f}")

With Amount Features
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     56864
           1       0.93      0.85      0.89        98

    accuracy                           1.00     56962
   macro avg       0.97      0.92      0.94     56962
weighted avg       1.00      1.00      1.00     56962


COMPARISON:
Baseline Recall: 0.8469
Log Amount Recall: 0.8469
Improvement: 0.0000
