# Import the necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data

In [None]:
df = pd.read_csv('creditcard.csv')
df.head()

# Total null value in dataset

In [None]:
print(df.isnull().sum().sum())
# No null values
df.dropna(inplace = True)

In [None]:
df.info()

In [None]:
df.describe().T.head()

# Calculate the number of genuine and fraud transactions

In [None]:
genuine_transactions = df[df['Class'] == 0]  # assuming 'Class' indicates fraud (1) or genuine (0)
fraud_transactions = df[df['Class'] == 1]

num_genuine = len(genuine_transactions)
num_fraud = len(fraud_transactions)

fraud_percentage = (num_fraud / len(df)) * 100
print(f"Number of genuine transactions: {num_genuine}")
print(f"Number of fraud transactions: {num_fraud}")
print(f"Percentage of fraud transactions: {fraud_percentage}%")

# Correlation map


In [None]:
plt.figure(figsize=(20, 6))
numData = df.select_dtypes(include=[int,float])
corrMat = numData.corr()
sns.heatmap(corrMat,cmap='Blues')
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df['NormalizedAmount'] = scaler.fit_transform(df[['Amount']])

# Split the dataset into training and testing sets

In [None]:
from sklearn.model_selection import train_test_split
X = df.drop(['Class'], axis=1)
y = df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Performing Random Forest


In [None]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(n_estimators= 100)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
print("Random Forest Predictions:", rf_pred)

In [None]:
random_forest_score = rf_model.score(X_test, y_test) * 100

print("Random Forest Score: ", random_forest_score)

# Check the performance metrics

In [None]:
from sklearn.metrics import classification_report
print("Random Forest Performance Metrics:\n", classification_report(y_test, rf_pred))

# ROC Curve

In [None]:
from sklearn.metrics import roc_curve, auc
rf_probs = rf_model.predict_proba(X_test)[:, 1]
rf_fpr, rf_tpr, _ = roc_curve(y_test, rf_probs)
rf_auc = auc(rf_fpr, rf_tpr)

In [None]:
plt.plot(rf_fpr, rf_tpr, label=f"Random Forest (AUC = {rf_auc:.2f})")
plt.plot([0, 1], [0, 1], 'k--')
plt.title("ROC Curve")
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.legend()
plt.show()

# Precision Recall Curve

In [None]:
from sklearn.metrics import precision_recall_curve
rf_precision, rf_recall, _ = precision_recall_curve(y_test, rf_probs)
plt.plot(rf_recall, rf_precision, label="Random Forest")

plt.title("Precision-Recall Curve")
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.legend()
plt.show()