In [None]:
# Import pandas for data manipulation and analysis
import pandas as pd

# Import matplotlib's pyplot for creating plots
import matplotlib.pyplot as plt

# Import functions and classes from scikit-learn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, roc_curve

In [None]:
# Define the file path to the dataset
file_path = 'Dataset\creditcard.csv'  # Replace with the actual path to your dataset

# Load the credit card dataset using pandas
credit_card_df = pd.read_csv(file_path)

# Display the first few rows of the dataset
credit_card_df.head()

Data Pre-Processing

In [None]:
# Calculate the number of missing values in each column of the credit_card_df DataFrame
missing_values = credit_card_df.isnull().sum()

# Print the results
print(missing_values)

In [None]:
# Get the list of column names in the credit_card_df DataFrame
credit_card_df_columns = credit_card_df.columns

# Iterate through each column and print the number of unique values
for column_name in credit_card_df_columns:
    unique_values_count = len(credit_card_df[column_name].unique())
    print(f"Column '{column_name}' has {unique_values_count} unique values.")

In [None]:
# Display the first 10 rows of the credit_card_df DataFrame
credit_card_df.head(10)

In [None]:
# Generate summary statistics for numerical columns in the credit_card_df DataFrame
summary_statistics = credit_card_df.describe()

# Display the summary statistics
print(summary_statistics)

In [None]:
# Converting Time Column

# Convert the 'Time' column to a datetime format
credit_card_df["datetime"] = pd.to_datetime(credit_card_df['Time'], unit='s')

# Extract the time component and format it as HH:MM:SS
credit_card_df["Time"] = credit_card_df["datetime"].dt.strftime('%H:%M:%S')

# Convert the 'datetime' column to HH:MM:SS format
credit_card_df["datetime"] = credit_card_df["datetime"].dt.strftime('%H:%M:%S')

# Drop the original 'Time' column
credit_card_df.drop(columns=['Time'], inplace=True)

# Rename the 'datetime' column to 'Time'
credit_card_df.rename(columns={"datetime":"Time"}, inplace=True)

# Display the modified DataFrame
credit_card_df.head()

In [None]:
# Display a concise summary of the credit_card_df DataFrame
credit_card_df.info()

In [None]:
# Calculate and print the percentage of fraud and non-fraud transactions

# Calculate the percentage of fraud transactions
fraud_percentage = round(credit_card_df['Class'].value_counts()[1] / len(credit_card_df) * 100, 2)
print("Fraud! :", fraud_percentage, "%")

# Calculate the percentage of non-fraud transactions
non_fraud_percentage = round(credit_card_df['Class'].value_counts()[0] / len(credit_card_df) * 100, 2)
print("No Fraud! :", non_fraud_percentage, "%")

In [None]:
# Separate features (x) and target (y) variables

# Remove the "Class" and "Time" columns from the features (x)
x = credit_card_df.drop(columns=["Class","Time"])

# Get the "Class" column as the target (y)
y = credit_card_df["Class"]

# Split the dataset into training and testing sets using train_test_split

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=2, test_size=0.3)

In [None]:
# Initialize and train a logistic regression model

# Initialize a logistic regression model with increased maximum iterations
logistic_regression = LogisticRegression(max_iter=10000)

# Train the model using the training data
logistic_regression.fit(x_train, y_train)

In [None]:
# Generate and plot the ROC curve

# Get the predicted probabilities for the positive class (fraudulent) from the model
y_predict = logistic_regression.predict_proba(x_test)
y_predict = y_predict[:, 1]  # Probability of being fraudulent

# Calculate the ROC curve data
fpr, tpr, _ = roc_curve(y_test, y_predict)

# Plot the ROC curve
plt.plot(fpr, tpr)

In [None]:
# Calculate the ROC AUC score

# Calculate the ROC AUC score using the predicted probabilities and the actual labels
roc_auc = roc_auc_score(y_test, y_predict)

# Print the calculated ROC AUC score
print("ROC AUC Score:", roc_auc)