In [38]:
import pandas as pd
data = pd.read_csv(r"credit card.csv")
data.head()
data.dropna(inplace=True)

In [39]:
data["type"] = data["type"].map({"CASH_OUT": 1, "PAYMENT": 2, 
                                 "CASH_IN": 3, "TRANSFER": 4,
                                 "DEBIT": 5})
data["isFraud"] = data["isFraud"].map({0: "No Fraud", 1: "Fraud"})
data.head()

Unnamed: 0,step,type,amount,nameOrig,oldbalanceOrg,newbalanceOrig,nameDest,oldbalanceDest,newbalanceDest,isFraud,isFlaggedFraud
0,1,2,9839.64,C1231006815,170136.0,160296.36,M1979787155,0.0,0.0,No Fraud,0
1,1,2,1864.28,C1666544295,21249.0,19384.72,M2044282225,0.0,0.0,No Fraud,0
2,1,4,181.0,C1305486145,181.0,0.0,C553264065,0.0,0.0,Fraud,0
3,1,1,181.0,C840083671,181.0,0.0,C38997010,21182.0,0.0,Fraud,0
4,1,2,11668.14,C2048537720,41554.0,29885.86,M1230701703,0.0,0.0,No Fraud,0


In [40]:
# splitting the data
from sklearn.model_selection import train_test_split
x = data[["type", "amount", "oldbalanceOrg", "newbalanceOrig"]]
y = data[["isFraud"]]

In [41]:
# training a machine learning model
from sklearn.tree import DecisionTreeClassifier
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.30, random_state=42)
model = DecisionTreeClassifier()
model.fit(x_train, y_train)

In [42]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Calculate accuracy on test set
y_pred = model.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print("Accuracy:", acc)

Accuracy: 0.9724284199363733


In [43]:
from sklearn.metrics import confusion_matrix

# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred, labels=["No Fraud", "Fraud"])
print("Confusion Matrix:\n", cm)

Confusion Matrix:
 [[594  15]
 [ 11 323]]


In [44]:
from sklearn.metrics import classification_report

# Generate classification report
report = classification_report(y_test, y_pred, target_names=["No Fraud", "Fraud"])
print(report)

              precision    recall  f1-score   support

    No Fraud       0.96      0.97      0.96       334
       Fraud       0.98      0.98      0.98       609

    accuracy                           0.97       943
   macro avg       0.97      0.97      0.97       943
weighted avg       0.97      0.97      0.97       943



In [45]:
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

Precision: 0.9725
Recall:    0.9724
F1 Score:  0.9725


In [46]:
import joblib

# Save the model to a file
joblib.dump(model, 'decision_tree_model.pkl')

['decision_tree_model.pkl']