In [29]:
pip install catboost

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2


In [30]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, RobustScaler
from imblearn.over_sampling import SMOTE
from catboost import CatBoostClassifier
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

# Load the dataset
data = pd.read_csv("creditcard_fraud_eur.csv")

# Fill NaN values with mean (you can choose a different strategy)
data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)

print(data["Class"].unique())

# Separate features and target variable
X = data.drop("Class", axis=1)  # Replace "target_column_name" with your actual target column name
y = data["Class"]
y.fillna(1, inplace=True)

print(data["Class"].unique())

# Perform label encoding for categorical variables (if any)
label_encoder = LabelEncoder()
categorical_cols = X.columns  # Replace with your actual categorical column names
for col in categorical_cols:
    X[col] = label_encoder.fit_transform(X[col])

# Split the data into training and testing sets
for i in range(1, 10):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=i)

    # Apply SMOTE oversampling to balance the classes
    smote = SMOTE(random_state=42)
    X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

    # Apply Robust Scaling to the features
    scaler = RobustScaler()
    X_train_resampled_scaled = scaler.fit_transform(X_train_resampled)
    X_test_scaled = scaler.transform(X_test)

    # Create and train the Decision Tree Classifier
    model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.1, loss_function='Logloss', verbose=False)
    model.fit(X_train_resampled_scaled, y_train_resampled)

    # Make predictions on the test set
    y_pred = model.predict(X_test_scaled)

    # Calculate evaluation metrics
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    confusion = confusion_matrix(y_test, y_pred)

    print("F1 Score:", f1)
    print("Precision Score:", precision)
    print("Recall Score:", recall)
    print("Confusion Matrix:")
    print(confusion)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, data.columns!='Class'].fillna(data.mean(), inplace=True

[ 0.  1. nan]
[0. 1.]
F1 Score: 0.8170731707317074
Precision Score: 0.788235294117647
Recall Score: 0.8481012658227848
Confusion Matrix:
[[39533    18]
 [   12    67]]
F1 Score: 0.7513227513227515
Precision Score: 0.6826923076923077
Recall Score: 0.8352941176470589
Confusion Matrix:
[[39512    33]
 [   14    71]]
F1 Score: 0.7484662576687117
Precision Score: 0.6630434782608695
Recall Score: 0.8591549295774648
Confusion Matrix:
[[39528    31]
 [   10    61]]
F1 Score: 0.7807486631016043
Precision Score: 0.7448979591836735
Recall Score: 0.8202247191011236
Confusion Matrix:
[[39516    25]
 [   16    73]]
F1 Score: 0.8222222222222222
Precision Score: 0.7956989247311828
Recall Score: 0.8505747126436781
Confusion Matrix:
[[39524    19]
 [   13    74]]
F1 Score: 0.7243243243243243
Precision Score: 0.5982142857142857
Recall Score: 0.9178082191780822
Confusion Matrix:
[[39512    45]
 [    6    67]]
F1 Score: 0.7904191616766467
Precision Score: 0.7415730337078652
Recall Score: 0.8461538461538461