In [33]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix, classification_report
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from sklearn.exceptions import UndefinedMetricWarning

In [34]:
# Importing necessary libraries
import pandas as pd

# Loading the datasets provided by the user
data = pd.read_csv('Base.csv')

In [None]:
# Separate features and target
X = data.drop(columns=['fraud_bool'])
y = data['fraud_bool'].values.ravel()  # Ensure target is a 1D array

# Convert categorical variables using one-hot encoding
X_encoded = pd.get_dummies(X, drop_first=True)

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.3, random_state=42)


In [None]:
# Logistic Regression with Lasso (L1 penalty)
model = LogisticRegression(penalty='l1', solver='saga', multi_class='multinomial', max_iter=1000)

# Fit model to encoded training data
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)

In [None]:
# Performance metrics
accuracy = accuracy_score(y_test, y_pred)
precision, recall, fscore, _ = precision_recall_fscore_support(y_test, y_pred, average='weighted')

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {fscore:.4f}')

In [None]:
# Detailed classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
# Confusion Matrix Visualization
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=np.unique(y), yticklabels=np.unique(y))
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Coefficients summary
coefficients_df = pd.DataFrame(model.coef_, columns=X.columns, index=model.classes_)
print("\nModel Coefficients:")
print(coefficients_df.T)