<a href="https://colab.research.google.com/github/chandana-nukala18/sixth-repo/blob/main/Fraud_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib

Load dataset

In [None]:
df = pd.read_csv("/content/creditcard.csv")

Exploratory data analysis

In [None]:
print("First 5 rows of data:")
print(df.head())
print("\nClass distribution:")
print(df['Class'].value_counts())

First 5 rows of data:
   Time        V1        V2        V3  ...       V27       V28  Amount  Class
0   0.0 -1.359807 -0.072781  2.536347  ...  0.133558 -0.021053  149.62    0.0
1   0.0  1.191857  0.266151  0.166480  ... -0.008983  0.014724    2.69    0.0
2   1.0 -1.358354 -1.340163  1.773209  ... -0.055353 -0.059752  378.66    0.0
3   1.0 -0.966272 -0.185226  1.792993  ...  0.062723  0.061458  123.50    0.0
4   2.0 -1.158233  0.877737  1.548718  ...  0.219422  0.215153   69.99    0.0

[5 rows x 31 columns]

Class distribution:
Class
0.0    187892
1.0       368
Name: count, dtype: int64


Data Preprocessing

In [None]:
df = df.dropna(subset=['Class'])
X = df.drop('Class', axis=1)
y = df['Class']

Scaling

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

Train and Test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42, stratify=y)

Train model

In [None]:
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

Evaluation

In [None]:
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[56365     3]
 [   29    81]]

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00     56368
         1.0       0.96      0.74      0.84       110

    accuracy                           1.00     56478
   macro avg       0.98      0.87      0.92     56478
weighted avg       1.00      1.00      1.00     56478



In [None]:
joblib.dump(model, 'fraud_detection_model.pkl')
joblib.dump(scaler, 'scaler.pkl')

Simulation

In [None]:
def predict_transaction(input_data):
    input_scaled = scaler.transform([input_data])
    prediction = model.predict(input_scaled)
    return "Fraudulent" if prediction[0] == 1 else "Legitimate"

In [None]:
sample = X.iloc[1].values
print("sample transaction prediction:", predict_transaction(sample))