Credit Card Fraud Detection Using Random Forest

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.metrics import confusion_matrix
from imblearn.over_sampling import SMOTE
import joblib

In [None]:

# Load the dataset
data = pd.read_csv(r"C:\Users\hp\OneDrive\Desktop\AI Assignment\Credit card Fraud detection\creditcard.csv")

In [None]:
# Ensure the target column 'Class' is of integer type
data['Class'] = data['Class'].astype(int)

# Handle missing values (if any)
data.fillna(method='ffill', inplace=True)

In [None]:


# Scale numerical features
scaler = StandardScaler()
numerical_features = data.drop(['Class'], axis=1).columns
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [None]:

# Split the data into features and target
X = data.drop('Class', axis=1)
y = data['Class']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [None]:
# Apply SMOTE to balance the classes
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train)

# Initialize the Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)

In [None]:
# Train the model on the resampled dataset
model.fit(X_resampled, y_resampled)

# Predict on the test set
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

In [None]:
# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_pred_proba)

In [None]:
# Print evaluation metrics
print(f'Accuracy: {accuracy:.2f}')
print(f'Precision: {precision:.2f}')
print(f'Recall: {recall:.2f}')
print(f'F1 Score: {f1:.2f}')
print(f'ROC AUC Score: {roc_auc:.2f}')

from sklearn.metrics import confusion_matrix

# Calculate confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:\n', cm)

# Save the model for future use
joblib.dump(model, 'credit_card_fraud_model_rf.pkl')