Name:- Amarpreet kaur lotte


Email:- amarpreetkaurlotte@gmail.com


Codsoft_List_Task5:- CREDIT CARD FRAUD DETECTION

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix,
    accuracy_score,
)
from imblearn.under_sampling import RandomUnderSampler


In [2]:
# Load dataset
data = pd.read_csv('creditcard.csv')
# Display the first few rows
print("Dataset preview:")
print(data.head())


Dataset preview:
   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26    

In [3]:
# Step 1:- Preprocessing Drop 'Time' column and separate features and target
X = data.drop(columns=['Time', 'Class'])
y = data['Class']

# Normalize the 'Amount' feature
scaler = StandardScaler()
X['Amount'] = scaler.fit_transform(X[['Amount']])

# Address class imbalance using undersampling
fraudulent = data[data['Class'] == 1]
genuine = data[data['Class'] == 0].sample(n=len(fraudulent), random_state=42)
balanced_data = pd.concat([fraudulent, genuine])

X_balanced = balanced_data.drop(columns=['Time', 'Class'])
y_balanced = balanced_data['Class']
X_balanced['Amount'] = scaler.fit_transform(X_balanced[['Amount']])


In [4]:
# Step 2:- Train-test split
X_train_balanced, X_test_balanced, y_train_balanced, y_test_balanced = train_test_split(
    X_balanced, y_balanced, test_size=0.2, random_state=42, stratify=y_balanced
)

In [5]:
# Step 3:-
# Train Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_balanced, y_train_balanced)
y_pred_rf = rf_model.predict(X_test_balanced)

# Train Logistic Regression model
lr_model = LogisticRegression(max_iter=1000, random_state=42)
lr_model.fit(X_train_balanced, y_train_balanced)
y_pred_lr = lr_model.predict(X_test_balanced)


In [7]:
# Step 4:-
#  Evaluate Random Forest
rf_precision = precision_score(y_test_balanced, y_pred_rf)
rf_recall = recall_score(y_test_balanced, y_pred_rf)
rf_f1 = f1_score(y_test_balanced, y_pred_rf)
rf_auc = roc_auc_score(y_test_balanced, rf_model.predict_proba(X_test_balanced)[:, 1])
rf_accuracy = accuracy_score(y_test_balanced, y_pred_rf)

# Evaluate Logistic Regression
lr_precision = precision_score(y_test_balanced, y_pred_lr)
lr_recall = recall_score(y_test_balanced, y_pred_lr)
lr_f1 = f1_score(y_test_balanced, y_pred_lr)
lr_auc = roc_auc_score(y_test_balanced, lr_model.predict_proba(X_test_balanced)[:, 1])
lr_accuracy = accuracy_score(y_test_balanced, y_pred_lr)


In [8]:
# Step 5:-
#  Display results
evaluation_results = {
    "Random Forest": {
        "Precision": rf_precision,
        "Recall": rf_recall,
        "F1-Score": rf_f1,
        "ROC-AUC": rf_auc,
        "Accuracy": rf_accuracy,
    },
    "Logistic Regression": {
        "Precision": lr_precision,
        "Recall": lr_recall,
        "F1-Score": lr_f1,
        "ROC-AUC": lr_auc,
        "Accuracy": lr_accuracy,
    },
}

import pprint
pprint.pprint(evaluation_results)


{'Logistic Regression': {'Accuracy': 0.949238578680203,
                         'F1-Score': 0.9489795918367347,
                         'Precision': 0.9489795918367347,
                         'ROC-AUC': 0.981962481962482,
                         'Recall': 0.9489795918367347},
 'Random Forest': {'Accuracy': 0.949238578680203,
                   'F1-Score': 0.9484536082474228,
                   'Precision': 0.9583333333333334,
                   'ROC-AUC': 0.9880952380952381,
                   'Recall': 0.9387755102040817}}


In [9]:
# Function to predict and compare results
def predict_and_compare():
    try:
        # Inform user about valid index range
        print(f"Enter an index from the test dataset (valid range: 0 to {len(X_test_balanced) - 1})")

        # User input for index
        index = int(input("Enter the index: "))

        # Ensure the index is valid
        if index < 0 or index >= len(X_test_balanced):
            return f"Error: Index must be between 0 and {len(X_test_balanced) - 1}."

        # Fetch input features and actual class
        input_features = X_test_balanced.iloc[[index]]  # Keep as DataFrame
        actual_class = y_test_balanced.iloc[index]

        # Predictions from both models
        rf_prediction = rf_model.predict(input_features)[0]
        lr_prediction = lr_model.predict(input_features)[0]

        # Display results
        result = {
            "Actual Class": actual_class,
            "Random Forest Prediction": rf_prediction,
            "Logistic Regression Prediction": lr_prediction,
        }
        return result
    except ValueError:
        return "Error: Please enter a valid integer index."

# Call the function to get predictions and comparison
result = predict_and_compare()
print(result)


Enter an index from the test dataset (valid range: 0 to 196)
{'Actual Class': 1, 'Random Forest Prediction': 1, 'Logistic Regression Prediction': 1}
