In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix, matthews_corrcoef, roc_auc_score, roc_curve
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [None]:
# Load data from CSV file
file_path = "data.csv"
data = pd.read_csv(file_path)

# Assuming the last column is the target variable and all other columns are features
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values

# Split data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=0)

In [None]:
# Define the model
model = Sequential([
    Dropout(0.5, input_shape=(X.shape[1],)),
    Dense(128, kernel_initializer='he_uniform', activation='relu'),
    Dropout(0.2),
    Dense(64, kernel_initializer='he_uniform', activation='relu'),
    Dropout(0.3),
    Dense(32, kernel_initializer='he_uniform', activation='relu'),
    Dense(16, kernel_initializer='he_uniform', activation='relu'),
    Dense(8, kernel_initializer='he_uniform', activation='relu'),
    Dense(1, kernel_initializer='he_uniform', activation='sigmoid')
])

# Compile the model
optimizer = Adam(learning_rate=0.01)
model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Fit the model
history = model.fit(X_train, Y_train, epochs=250, batch_size=len(X_train), verbose=1, validation_data=(X_test, Y_test))

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, Y_test, verbose=0)
print("Test Accuracy: {:.2f}%".format(test_accuracy * 100))


In [None]:
# Predicting the test set results
y_pred = model.predict(X_test)
y_pred_classes = (y_pred > 0.5).astype(int)

# Performance metrics
print("Confusion Matrix:")
conf_matrix = confusion_matrix(Y_test, y_pred_classes)
print(conf_matrix)
TN, FP, FN, TP = conf_matrix.ravel()
print("True Negatives: ", TN)
print("False Positives: ", FP)
print("False Negatives: ", FN)
print("True Positives: ", TP)

In [None]:
# Accuracy, Precision, Recall, F1 Score, MCC, and ROC AUC
accuracy = accuracy_score(Y_test, y_pred_classes)
precision = TP / (TP + FP)
recall = TP / (TP + FN)
f1_score = 2 * (precision * recall) / (precision + recall)
mcc = matthews_corrcoef(Y_test, y_pred_classes)
roc_auc = roc_auc_score(Y_test, y_pred)

print("Accuracy: {:.2f}%".format(accuracy * 100))
print("Precision: {:.2f}".format(precision))
print("Recall: {:.2f}".format(recall))
print("F1 Score: {:.2f}".format(f1_score))
print("Matthews Correlation Coefficient: {:.2f}".format(mcc))
print("ROC AUC Score: {:.2f}".format(roc_auc))

In [None]:
# Plotting training & validation accuracy and loss
plt.figure(figsize=(10, 6))

#plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy', linewidth=3)
plt.plot(history.history['val_accuracy'], label='Test Accuracy', linewidth=3)
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')


In [None]:
import os
import matplotlib.pyplot as plt

# Get the user's home directory
home_dir = os.path.expanduser('~')

plt.figure(figsize=(8, 4))
plt.plot(history.history['loss'], label='Train Loss', linewidth=3)
plt.plot(history.history['val_loss'], label='Test Loss', linewidth=3)
plt.xlabel('Epochs', fontsize=14)
plt.ylabel('Loss (%)', fontsize=14)
plt.legend(fontsize=12)
#plt.title('Model Loss', fontsize=16)
plt.grid(True, linestyle='--', linewidth=0.5)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Save the figure
plt.savefig(save_path, format='png', dpi=300, bbox_inches='tight')

plt.show()


In [None]:
# Plotting the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'], yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Plot ROC curve
fpr, tpr, _ = roc_curve(Y_test, y_pred)
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

# Assuming 'rf' is your trained Random Forest model and 'data' is your original dataset
importance = rf.feature_importances_
features = data.columns[:-1]  # Assuming data is your original dataset
top_features_idx = np.argsort(importance)[-50:]  # Select top 50 features by importance
top_features = features[top_features_idx]

plt.figure(figsize=(12, 8))
# Use a different color for each bar
colors = plt.cm.viridis(np.linspace(0, 1, len(top_features_idx)))
sns.barplot(x=importance[top_features_idx], y=top_features, palette=colors, ci=None)  # Set ci=None to remove confidence intervals

plt.title('Feature Importance')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.xticks(rotation=45)

# Label each feature individually outside the bars
for i, feature in enumerate(top_features):
    plt.text(importance[top_features_idx[i]] + 0.0005, i, f'', va='center')

# Add grid to the plot
plt.grid(axis='both', linestyle='--')

plt.tight_layout()
plt.show()


In [None]:
import lime
import lime.lime_tabular
import matplotlib.pyplot as plt

# Assuming 'rf' is your trained Random Forest model, and 'X_train' and 'X_test' are your data splits
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=data.columns[:-1], mode="classification")
exp = explainer.explain_instance(X_test[10], rf.predict_proba, num_features=len(X[10]))

# Generate the LIME explanation figure
fig = exp.as_pyplot_figure()
plt.title('LIME Explanation')

# Extract feature names and their importance values from the LIME explanation
feature_names = [feature[0] for feature in exp.as_list()]
feature_importances = [feature[1] for feature in exp.as_list()]

# Get the current axes and bars
ax = plt.gca()
bars = ax.patches

# Label each feature individually inside the bars
for bar, label in zip(bars, feature_names):
    ax.text(bar.get_width() / 2, bar.get_y() + bar.get_height() / 2, label, ha='center', va='center', color='white')

# Add x-axis and y-axis labels
ax.set_xlabel('Importance')
ax.set_ylabel('Features')

# Add grid
ax.grid(True, linestyle='--', which='both', axis='both')

# Remove y-axis labels
ax.set_yticklabels([])

plt.show()


In [None]:
# Assuming 'data' is your original dataset
plt.figure(figsize=(12, 8))

# Create the boxplot
sns.boxplot(data=data, orient='h')

# Set the title and axis labels
plt.title('Feature Distribution', fontsize=16)
plt.xlabel('Feature Values', fontsize=14)
plt.ylabel('Features', fontsize=14)

# Center the title and labels
plt.title('Feature Distribution', fontsize=16, loc='center')
plt.xlabel('Feature Values', fontsize=14, labelpad=10)
plt.ylabel('Features', fontsize=14, labelpad=10)

# Adjust the tick parameters for better readability
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

# Add a grid for better visual reference
plt.grid(axis='x', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()


In [None]:
# Assuming 'exp' is your LIME explanation object
exp.as_pyplot_figure()
plt.title('Local Contributions')
plt.show()


In [None]:
# Assuming 'exp' is your LIME explanation object
exp.show_in_notebook()


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Assuming 'y_pred' and 'Y_test' are your predictions and true labels
errors = np.where(y_pred != Y_test)[0]

plt.figure(figsize=(8, 5))

# Plot predicted errors
plt.scatter(range(len(errors)), y_pred[errors], marker='x', color='red', label='Predicted', s=100)

# Plot true values where errors occurred
plt.scatter(range(len(errors)), Y_test[errors], marker='o', color='blue', label='True', s=100)

# Title and axis labels
plt.title('Error Analysis', fontsize=16)
plt.xlabel('Instance', fontsize=14)
plt.ylabel('Prediction', fontsize=14)

# Adding grid
plt.grid(True, linestyle='--', alpha=0.7)

# Legend and adjustments
plt.legend(fontsize=12)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)

plt.tight_layout()
plt.show()
