In [30]:
# Before run the program ,  first download CREDITCARD.CSV FILE through provided link, then run the program
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def load_dataset():
    
    # first download CREDITCARD.CSV FILE [link:https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud]
    file_path = 'creditcard.csv'         # here we can use another csv file 
    
    try:
        return pd.read_csv(file_path)
    except FileNotFoundError:
        print("Error: File not found.")
        return None
    except Exception as e:
        print("An error occurred:", e)
        return None

def balance_dataset(credit_card_data):
    legit = credit_card_data[credit_card_data.Class == 0]
    fraud = credit_card_data[credit_card_data.Class == 1]
    legit_sample = legit.sample(n=len(fraud))
    return pd.concat([legit_sample, fraud], axis=0)

def train_logistic_regression(X_train, Y_train):
    model = LogisticRegression()
    model.fit(X_train, Y_train)
    return model

def evaluate_model(model, X_train, Y_train, X_test, Y_test):
    train_accuracy = model.score(X_train, Y_train)
    test_accuracy = model.score(X_test, Y_test)
    return train_accuracy, test_accuracy

def plot_interactive_accuracy(train_accuracy, test_accuracy):
    # Calculate the difference between training and testing accuracies
    accuracy_difference = train_accuracy - test_accuracy

    fig = go.Figure()

    # Add bars for training and testing accuracies
    fig.add_trace(go.Bar(x=["Training Accuracy"], y=[train_accuracy], name="Training Accuracy", marker_color='royalblue'))
    fig.add_trace(go.Bar(x=["Test Accuracy"], y=[test_accuracy], name="Test Accuracy", marker_color='crimson'))

    # Add a line for the difference between training and testing accuracies
    fig.add_trace(go.Scatter(x=["Difference"], y=[accuracy_difference], mode='lines+markers', name="Difference", marker_color='darkorange'))

    fig.update_layout(title="Model Accuracy",
                      xaxis_title="Accuracy Type",
                      yaxis_title="Accuracy",
                      showlegend=True,
                      legend=dict(x=0.7, y=0.9))

    fig.show()

def main():
    print("Welcome to Fraud Detection System!")

    # Load dataset
    credit_card_data = load_dataset()

    if credit_card_data is not None:
        # Balance dataset
        balanced_data = balance_dataset(credit_card_data)

        # Split features and target variable
        X = balanced_data.drop(columns='Class', axis=1)
        Y = balanced_data['Class']

        # Splitting the data into training and testing sets
        X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

        # Train Logistic Regression model
        model = train_logistic_regression(X_train, Y_train)

        # Evaluate the model
        train_accuracy, test_accuracy = evaluate_model(model, X_train, Y_train, X_test, Y_test)

        print(f"Training Accuracy: {train_accuracy:.2f}")
        print(f"Test Accuracy: {test_accuracy:.2f}")

        # Plot interactive accuracy
        plot_interactive_accuracy(train_accuracy, test_accuracy)

if __name__ == "__main__":
    main()


Welcome to Fraud Detection System!
Training Accuracy: 0.93
Test Accuracy: 0.90


In [None]:
 import aspose.zip as zp

   with zp.Archive() as archive:
	archive.create_entry("entry_name", "creditcard.csv")
	archive.save('single_file_into_archive.zip')