In [36]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

# Load the dataset
data = pd.read_csv("creditcard_fraud_eur.csv")

# Fill NaN values with mean (you can choose a different strategy)
data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)

print(data["Class"].unique())

# Separate features and target variable
X = data.drop("Class", axis=1)  # Replace "target_column_name" with your actual target column name
y = data["Class"]
y.fillna(1, inplace=True)

print(data["Class"].unique())

# Perform label encoding for categorical variables (if any)
label_encoder = LabelEncoder()
categorical_cols = X.columns  # Replace with your actual categorical column names
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Split the data into training and testing sets
for i in range(1, 10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Apply SMOTE oversampling to balance the classesss
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Apply Robust Scaling to the features
    scaler = RobustScaler()
    X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)

    # Create and train the Decision Tree Classifier
    classifier = RandomForestClassifier(random_state=(i+10))
    classifier.fit(X_train_resampled_scaled, y_train_resampled)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test_scaled)

    # Calculate evaluation metrics
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    print("F1 Score:", f1)
    print("Precision Score:", precision)
    print("Recall Score:", recall)
    print("Confusion Matrix:")
    print(confusion)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True

[0 1]
[0 1]
F1 Score: 0.8484848484848485
Precision Score: 0.8974358974358975
Recall Score: 0.8045977011494253
Confusion Matrix:
[[56867     8]
 [   17    70]]
F1 Score: 0.8484848484848484
Precision Score: 0.8641975308641975
Recall Score: 0.8333333333333334
Confusion Matrix:
[[56867    11]
 [   14    70]]
F1 Score: 0.8421052631578948
Precision Score: 0.8888888888888888
Recall Score: 0.8
Confusion Matrix:
[[56852    10]
 [   20    80]]
F1 Score: 0.8787878787878788
Precision Score: 0.8969072164948454
Recall Score: 0.8613861386138614
Confusion Matrix:
[[56851    10]
 [   14    87]]
F1 Score: 0.883248730964467
Precision Score: 0.9354838709677419
Recall Score: 0.8365384615384616
Confusion Matrix:
[[56852     6]
 [   17    87]]
F1 Score: 0.8571428571428571
Precision Score: 0.8571428571428571
Recall Score: 0.8571428571428571
Confusion Matrix:
[[56850    14]
 [   14    84]]
F1 Score: 0.8542713567839195
Precision Score: 0.8585858585858586
Recall Score: 0.85
Confusion Matrix:
[[56848    14]
 [   