In [32]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from imblearn.over_sampling import SMOTE
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

# Load the dataset
data = pd.read_csv("creditcard_fraud_eur.csv")

# Fill NaN values with mean (you can choose a different strategy)
data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)

print(data["Class"].unique())

# Separate features and target variable
X = data.drop("Class", axis=1)  # Replace "target_column_name" with your actual target column name
y = data["Class"]
y.fillna(1, inplace=True)

print(data["Class"].unique())

# Perform label encoding for categorical variables (if any)
label_encoder = LabelEncoder()
categorical_cols = X.columns  # Replace with your actual categorical column names
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Split the data into training and testing sets
for i in range(1, 10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Apply SMOTE oversampling to balance the classes
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Apply Robust Scaling to the features
    scaler = RobustScaler()
    X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)

    # Create and train the Decision Tree Classifier
    classifier = DecisionTreeClassifier(random_state=(i+10))
    classifier.fit(X_train_resampled_scaled, y_train_resampled)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test_scaled)

    # Calculate evaluation metrics
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    print("F1 Score:", f1)
    print("Precision Score:", precision)
    print("Recall Score:", recall)
    print("Confusion Matrix:")
    print(confusion)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True

[0 1]
[0 1]
F1 Score: 0.4421052631578947
Precision Score: 0.3181818181818182
Recall Score: 0.7241379310344828
Confusion Matrix:
[[56740   135]
 [   24    63]]
F1 Score: 0.5116279069767441
Precision Score: 0.3793103448275862
Recall Score: 0.7857142857142857
Confusion Matrix:
[[56770   108]
 [   18    66]]
F1 Score: 0.5179856115107915
Precision Score: 0.4044943820224719
Recall Score: 0.72
Confusion Matrix:
[[56756   106]
 [   28    72]]
F1 Score: 0.5206349206349207
Precision Score: 0.38317757009345793
Recall Score: 0.8118811881188119
Confusion Matrix:
[[56729   132]
 [   19    82]]
F1 Score: 0.5633802816901408
Precision Score: 0.4444444444444444
Recall Score: 0.7692307692307693
Confusion Matrix:
[[56758   100]
 [   24    80]]
F1 Score: 0.5853658536585367
Precision Score: 0.4444444444444444
Recall Score: 0.8571428571428571
Confusion Matrix:
[[56759   105]
 [   14    84]]
F1 Score: 0.5080385852090032
Precision Score: 0.3744075829383886
Recall Score: 0.79
Confusion Matrix:
[[56730   132]
 [