In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

# Load the dataset
data = pd.read_csv("creditcard_fraud_eur.csv")

# Fill NaN values with mean (you can choose a different strategy)
data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)

print(data["Class"].unique())

# Separate features and target variable
X = data.drop("Class", axis=1)  # Replace "target_column_name" with your actual target column name
y = data["Class"]
y.fillna(1, inplace=True)

print(data["Class"].unique())

# Perform label encoding for categorical variables (if any)
label_encoder = LabelEncoder()
categorical_cols = X.columns  # Replace with your actual categorical column names
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Split the data into training and testing sets
for i in range(1, 10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Apply SMOTE oversampling to balance the classes
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Apply Robust Scaling to the features
    scaler = RobustScaler()
    X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)
    #Intitialise XGBClassifier
    classifier = GradientBoostingClassifier(n_estimators=100, random_state=i)
    # Create and train the Decision Tree Classifier
    classifier.fit(X_train_resampled_scaled, y_train_resampled)

    # Make predictions on the test set
    y_pred = classifier.predict(X_test_scaled)

    # Calculate evaluation metrics
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    print("F1 Score:", f1)
    print("Precision Score:", precision)
    print("Recall Score:", recall)
    print("Confusion Matrix:")
    print(confusion)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True

[0 1]
[0 1]
F1 Score: 0.18726591760299624
Precision Score: 0.10504201680672269
Recall Score: 0.8620689655172413
Confusion Matrix:
[[56236   639]
 [   12    75]]
F1 Score: 0.1798561151079137
Precision Score: 0.1
Recall Score: 0.8928571428571429
Confusion Matrix:
[[56203   675]
 [    9    75]]
F1 Score: 0.22276621787025705
Precision Score: 0.12691771269177127
Recall Score: 0.91
Confusion Matrix:
[[56236   626]
 [    9    91]]
F1 Score: 0.21017699115044247
Precision Score: 0.11830635118306351
Recall Score: 0.9405940594059405
Confusion Matrix:
[[56153   708]
 [    6    95]]
F1 Score: 0.2135476463834673
Precision Score: 0.121251629726206
Recall Score: 0.8942307692307693
Confusion Matrix:
[[56184   674]
 [   11    93]]
F1 Score: 0.17821782178217824
Precision Score: 0.09868421052631579
Recall Score: 0.9183673469387755
Confusion Matrix:
[[56042   822]
 [    8    90]]
F1 Score: 0.21822541966426856
Precision Score: 0.12397820163487738
Recall Score: 0.91
Confusion Matrix:
[[56219   643]
 [    9  