# **Requirement: [Dataset Link](https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud)**

# **Import necessary libraries**

In [21]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import plotly.figure_factory as ff

# **Load and Explore the Dataset**

In [22]:
# Load your credit card dataset (replace 'creditcard.csv' with your dataset)
data = pd.read_csv("creditcard.csv")

# Explore the dataset
print(data.head())
print(data.info())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

# **Data Preprocessing**

In [23]:
# Assuming you have already scaled the 'Amount' feature

# Split data into features (X) and labels (y)
X = data.drop('Class', axis=1)
y = data['Class']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **Create and Train the Isolation Forest Model**

In [24]:
from sklearn.impute import SimpleImputer

# Initialize the imputer
imputer = SimpleImputer(strategy='mean')

# Fit and transform the imputer on the training data
X_train_imputed = imputer.fit_transform(X_train)

# Transform the test data using the same imputer
X_test_imputed = imputer.transform(X_test)

# Initialize the Isolation Forest model and fit it on the imputed training data
model = IsolationForest(contamination=0.01, random_state=42)
model.fit(X_train_imputed)

# **Make Predictions and Evaluate the Model**

In [25]:
# Predict on the test data (-1 for anomalies, 1 for normal data)
y_pred = model.predict(X_test)

# Convert the predictions to 0 for normal data and 1 for anomalies
y_pred[y_pred == 1] = 0
y_pred[y_pred == -1] = 1

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))


X has feature names, but IsolationForest was fitted without feature names



Confusion Matrix:
[[34900   306]
 [   23    51]]

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.99      1.00     35206
         1.0       0.14      0.69      0.24        74

    accuracy                           0.99     35280
   macro avg       0.57      0.84      0.62     35280
weighted avg       1.00      0.99      0.99     35280



# **Plot**

In [26]:
import plotly.figure_factory as ff
import numpy as np

# Create a confusion matrix (replace with your actual confusion matrix)
confusion_matrix = np.array([[1000, 10], [5, 50]])

# Define class labels
class_labels = ['Normal', 'Fraud']

# Create a heatmap using Plotly
heatmap = ff.create_annotated_heatmap(
    z=confusion_matrix,
    x=class_labels,
    y=class_labels,
    colorscale='Blues',
)

# Update the layout for better readability
heatmap.update_layout(
    title="Confusion Matrix",
    xaxis_title="Predicted",
    yaxis_title="Actual",
)

# Show the heatmap
heatmap.show()