In [None]:
# Code to create Bayesian Neural Network (BNN) on example dataset
# Ashok K Sharma

In [None]:
# Import Libraries
import numpy as np
import csv as csv
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [None]:
def sort_map(input_data):
    # Get the unique elements and their counts
    unique, counts = np.unique(input_data, return_counts=True)
    # Create a dictionary mapping each unique element to its count
    count_dict = dict(zip(unique, counts))
    # Sort the dictionary and return it
    return sorted(count_dict.items())

# Read DataSET Again becuase for NN and BNN data was converted in to Tensor Objects
# Read the train and test datasets
train_data = pd.read_csv("Example_data/train.csv")
test_data = pd.read_csv("Example_data/test.csv")

# Select the desired features from the datasets
features = ['ClogP', 'BSEP', 'Glu', 'Glu_Gal', 'THLE', 'HepG2', 'Fsp3', 'log10cmax']
X_train_df = train_data[features]
X_test_df = test_data[features]

# Initialize a StandardScaler to standardize the features to have mean=0 and variance=1
scaler = StandardScaler()

# Fit the StandardScaler on the training data and transform both training and test data
scaler.fit(X_train_df)
X_train = pd.DataFrame(scaler.transform(X_train_df), columns=X_train_df.columns)
X_test = pd.DataFrame(scaler.transform(X_test_df), columns=X_test_df.columns)

# Select the target variable
target = ['dili_sev']
Y_train = (train_data[target].values.astype(int) - 1)
Y_test = (test_data[target].values.astype(int) - 1)

# Apply the sort_map function on the target variable
sorted_train_output = sort_map(Y_train)
sorted_test_output = sort_map(Y_test)

# Print the sorted output
print(sorted_train_output)
print(sorted_test_output)

In [None]:
# Get importance Scores
import pandas as pd
from sklearn.feature_selection import mutual_info_classif

# Calculate mutual information
mi_scores = mutual_info_classif(X_train, Y_train.ravel())

# Create a DataFrame to display the results
mi_scores_df = pd.DataFrame({'Feature': X_train.columns, 'Mutual Information': mi_scores})
mi_scores_df = mi_scores_df.sort_values(by='Mutual Information', ascending=False)

# Print the most informative features
print(mi_scores_df)

# Bayesian Neural Network

In [None]:
#--  Bayesian Neural Network with Defined Number of features, Neurons and Number of Classes
#---- Define Following things
n0 = 8 # Number of Features
n1 = 15 # Number of Neurons in the Hidden Layer
K = 3 # Number of Classes

import pymc3 as pm
import numpy as np
import theano.tensor as tt

# Define the BNN model using a feed-forward function
def feedforward(inp, W0, b0, W1, b1):
    layer1 = tt.nnet.relu(tt.dot(inp, W0) + b0)
    output = tt.dot(layer1, W1) + b1
    return output

# Build the PyMC3 model
with pm.Model() as model:
    sig_w = pm.TruncatedNormal("sig_w", mu=0, sigma=1, lower=0)
    sig_b = pm.TruncatedNormal("sig_b", mu=0, sigma=1, lower=0)

    W0_flat = pm.Normal("W0_flat", mu=0, sigma=sig_w, shape=(n0 * n1))
    W0 = pm.Deterministic("W0", W0_flat.reshape((n0, n1)))

    b0 = pm.Normal("b0", mu=0, sigma=sig_b, shape=n1)

    W1_flat = pm.Normal("W1_flat", mu=0, sigma=sig_w, shape=(n1 * K))
    W1 = pm.Deterministic("W1", W1_flat.reshape((n1, K)))

    b1 = pm.Normal("b1", mu=0, sigma=sig_b, shape=K)

    # Define the likelihood using the feed-forward function
    preds = feedforward(X_train, W0, b0, W1, b1)
    # Define the likelihood
    for i in range(len(Y_train)):
        pm.Categorical("Y_train_{}".format(i), p=softmax(preds[i]), observed=Y_train[i])

    # Generate predictions for the test dataset
    preds_test = feedforward(X_test, W0, b0, W1, b1)
    # Define the likelihood
    for i in range(len(Y_test)):
        pm.Categorical("Y_test_{}".format(i), p=softmax(preds_test[i]), observed=Y_test[i])

    #output_train = feedforward(X_train, W0, b0, W1, b1)
    #likelihood_train = pm.Normal("Y_train", mu=output_train, sigma=1, observed=Y_train)

    # Generate predictions for the test dataset
    #output_test = feedforward(X_test, W0, b0, W1, b1)
    #likelihood_test = pm.Normal("Y_test", mu=output_test, sigma=1, observed=Y_test)

    # Perform sampling
    n_steps = 20_000
    #n_tune = 1_000 # This was added by me
    target_accept_rate = 0.65
    #target_accept_rate = 0.9 #(Try if Required - Higher Target Acceptence Rates)
    #trace = pm.sample(draws=n_steps, tune=n_tune, target_accept=target_accept_rate)
    trace = pm.sample(draws=n_steps, target_accept=target_accept_rate)

In [None]:
# Get Posterior Predictions
posterior_pred = pm.sample_posterior_predictive(trace, samples = 1000, model=model)

In [None]:
# Save Model and Predictions - On Time Task
import pickle

#***************** Save Trace and Model
# Save the trace
#with open("Final_models/SemenovaData_trace_BNN_updated.pkl", "wb") as file:
#    pickle.dump(trace, file)
# Save the model
#with open("Final_models/SemenovaData_model_BNN_updated.pkl", "wb") as file:
#    pickle.dump(model, file)

#import numpy as np
#np.save('Final_models/posterior_prediction_BNN_updated.npy', posterior_pred)

In [None]:
# Load Model and Predictions - On Time Task

# Load the trace
#with open("Final_models/SemenovaData_trace_BNN_updated.pkl", "rb") as file:
#    trace = pickle.load(file)

# Load the model
#with open("Final_models/SemenovaData_model_BNN_updated.pkl", "rb") as file:
#    model = pickle.load(file)

# Load the Posterior Predicitons
#posterior_pred = np.load('Final_models/posterior_prediction_BNN_updated.npy', allow_pickle=True)

### Get Performance on the Training SET

In [None]:
import numpy as np

# Create an empty list to store the class label arrays
class_labels = []

# Iterate over the class labels
for i in range(147):
    class_label = posterior_pred[f'Y_train_{i}']
    class_labels.append(class_label)

# Stack the class label arrays horizontally
train_predictions_array = np.hstack(class_labels)

# Print the shape of the predictions array - on Train Dataset
print(train_predictions_array.shape)

train_predictions_array

In [None]:
# posterior prediction for each category
def probs(y_pred_df, ind):
    y_pred_ind = y_pred_df[:,ind]
    p = [sum(y_pred_ind == 0)/len(y_pred_ind), sum(y_pred_ind == 1)/len(y_pred_ind), sum(y_pred_ind == 2)/len(y_pred_ind)]
    return p

# compute posterior probabilities for a selected drug
print(probs(train_predictions_array, 0))
print(probs(train_predictions_array, 1))
print(probs(train_predictions_array, 2))

In [None]:
# Summarize predictions On Training Data Set
train_predictions_array
train_predictions_array.shape # 1000 times 147 Columns (Data) - For Each compound get which class got majority votes
train_predictions_array_df = pd.DataFrame(train_predictions_array)

import pandas as pd
import numpy as np
from scipy.stats import mode

# Convert the NumPy array to a DataFrame
train_pred_df = pd.DataFrame(train_predictions_array_df)

# Get the label with the majority of votes for each column
train_majority_labels = np.asarray(mode(train_pred_df, axis=0)[0])[0]

# Print the majority labels
print(train_majority_labels)

# Just check
train_predictions_array_df[0].value_counts()

In [None]:
train_actual_pred = np.column_stack((Y_train, train_majority_labels))
train_actual_pred_df = pd.DataFrame(data=train_actual_pred, columns=['Train_actual_label', 'Train_pred_label'])
train_actual_pred_df
#train_actual_pred_df.to_csv('ML_results/BNN_Updated_trainData_Perform.csv', index=False)

In [None]:
#----- Save Different Performance Matrices - On Train DataSet
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_score, accuracy_score, f1_score, matthews_corrcoef, confusion_matrix

# Compute precision, accuracy, F1-score, and MCC for the predicted and true labels
precision = precision_score(Y_train, train_majority_labels, average='macro')
accuracy = accuracy_score(Y_train, train_majority_labels)
f1_score = f1_score(Y_train, train_majority_labels, average='macro')
mcc = matthews_corrcoef(Y_train, train_majority_labels)

# Calculate sensitivity and specificity using confusion matrix
cm = confusion_matrix(Y_train, train_majority_labels)

# Calculate sensitivity and specificity for each class
sensitivity = {}
specificity = {}

for i in range(cm.shape[0]):
    tp = cm[i, i]
    fn = sum(cm[i, :]) - tp
    fp = sum(cm[:, i]) - tp
    tn = cm.sum() - (tp + fn + fp)

    sensitivity[i] = tp / (tp + fn)
    specificity[i] = tn / (tn + fp)

# Calculate overall sensitivity and specificity
overall_sensitivity = sum(sensitivity.values()) / len(sensitivity)
overall_specificity = sum(specificity.values()) / len(specificity)

# Print the performance metrics to the console
print("**************** MODEL PERFORMANCE: Actual BNN Train Dataset ****************")
print("Confusion Matrix:")
print(cm)
print("Sensitivity:")
for key, value in sensitivity.items():
    print("Class {}: {:.2f}".format(key, value))
print("Specificity:")
for key, value in specificity.items():
    print("Class {}: {:.2f}".format(key, value))
print("Overall Sensitivity: {:.2f}".format(overall_sensitivity))
print("Overall Specificity: {:.2f}".format(overall_specificity))

print("Precision: {:.2f}".format(precision))
print("Accuracy: {:.2f}".format(accuracy))
print("F1-score: {:.2f}".format(f1_score))
print("MCC: {:.2f}".format(mcc))

In [None]:
# Get Probabilites Data Frame
import pandas as pd

# Initialize an empty DataFrame
train_probabilities_df = pd.DataFrame()
# Iterate over the instances and extract probabilities
class_index = 0  # Index of the desired class
for i in range(147):
    instance_probs = probs(train_predictions_array, i) # This pobs functions was called while summarizing the predictions on the Test Dataset
    train_probabilities_df = train_probabilities_df.append(pd.Series(instance_probs), ignore_index=True)

# Set column names for the probabilities DataFrame
train_probabilities_df.columns = ['Class 0', 'Class 1', 'Class 2']

# Print the probabilities DataFrame
print(train_probabilities_df)

In [None]:
# Save Prediction Results of BNN model on Train Dataset
# train_actual_pred_df - has actual and predicted lables
# print (train_actual_pred_df)

# train_actual_pred_df - has all three probabilities
# print(train_probabilities_df)

#- Get the Drug names from Test Dataset and Scaler values of All variables
train_data_drug_names = train_data[['Drug']]

#--- Merge Y_Test_Labels, Y_preds, Y_preds_Proabilites and test Dataset information also
BNN_trainData_concatenated_df = pd.concat([train_actual_pred_df, train_probabilities_df, train_data_drug_names, X_train], axis=1)
print (BNN_trainData_concatenated_df)

BNN_trainData_concatenated_df.to_csv('ML_results/BNN_trainData_Predictions.csv', index=False)  # Specify the desired file name and path

Prediction probabilites figures on the Train DataSet for each Molecule

In [None]:
###### -- Prediction probabilites figures on the Test DataSet for each Molecule

#-- This Code is to Bar Plot the Prediction probabilites for Each Compound Along with the Dot plot all Assay Parameters Associated
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Example dataframe
#df = pd.DataFrame({
#    'Compound': ['A', 'B', 'C'],
#    'Class 1': [0.6, 0.2, 0.4],
#    'Class 2': [0.3, 0.1, 0.5],
#    'Class 3': [0.1, 0.7, 0.1],
#    'Y_test': ['Class 1', 'Class 3', 'Class 1'],
#    'bsep': [100, 300, 400],
#    'lop': [3, -4, 1],
#    'cmax': [150, 20, 100],
#    'glu': [1, 0.1, 0.5],
#    'glu-gal': [20, 70, 50],
#    'fsp3': [1000, 150, 800]
#})

#BNN_trainData_concatenated_df <- pd.read_csv("ML_results/BNN_trainData_Predictions.csv")
# Reoder the Columns in the concatenated_poly_df file
df = BNN_trainData_concatenated_df.loc[:, ['Drug','Class 0','Class 1','Class 2','Train_actual_label','ClogP','BSEP','Glu','Glu_Gal','THLE','HepG2','Fsp3','log10cmax']]

# Set the width of the bars
bar_width = 0.2

# Set the positions of the bars on the x-axis
bar_positions = np.arange(len(df.columns[1:4]))

# Create a color map for the prediction probabilities
color_map = ['#008b00', '#b8860b', '#b22222']

#--- Get the Value Ranges to Add on the Dot Plot
range_cols = df.columns[5:]
# Set the range for the dot plot
x_range = [df[range_cols].min().min(), df[range_cols].max().max()]
x_min = df[range_cols].values.min()
x_max = df[range_cols].values.max()

# Loop over each compound and save the plot as an image
for i, compound in enumerate(df['Drug']):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

    # Bar Plot
    prediction_probs = df.iloc[i, 1:4]
    ax1.bar(bar_positions, prediction_probs, width=bar_width, color=color_map)

    # Customize the plot
    ax1.set_xticks(bar_positions)
    ax1.set_xticklabels(df.columns[1:4])
    ax1.set_xlabel('')
    ax1.set_ylabel('Prediction Probability')
    ax1.set_title(f'Compound name- {compound} - (True Label: {df["Train_actual_label"].iloc[i]})')

    # Dot Plot
    for j, col in enumerate(df.columns[5:]):
        if col in range_cols:
            ax2.plot([df[col].iloc[i]], [j], 'o', color='blue')  # Dot plot

    for j, col in enumerate(range_cols):
        ax2.hlines(y=j, xmin=df[col].min(), xmax=df[col].max(), colors='gray', linestyles='dashed', linewidth=1)

    # Customize the plot
    ax2.set_yticks(range(len(range_cols)))
    ax2.set_yticklabels(range_cols)
    ax2.set_xlabel('Values')
    ax2.set_ylabel('Variables')
    ax2.set_title(f'Compound name {compound}')
    ax2.set_xlim(x_min, x_max)

    # Save the plot as an image
    plt.subplots_adjust(wspace=0.5)
    plt.savefig(f'ML_results/TrainData_Predictions_BNN/{compound}_plot.png')
    plt.close()

In [None]:
#---- Export the Animation Out of the Images
import os
from PIL import Image
from matplotlib import pyplot as plt

# Directory containing the images
image_dir = '~/ML_results/Predictions_BNN/train_data/'

# Get a list of image file names in the directory
image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]

# Sort the image files by name (if necessary)
image_files.sort()

# Create a list to store the frames of the animation
frames = []

# Load each image and append it to the frames list
for image_file in image_files:
    image_path = os.path.join(image_dir, image_file)
    image = Image.open(image_path)
    frames.append(image)

# Create the animation (Play with the Duration to speed up or Slow the animation Speed)
animation = Image.new('RGB', frames[0].size)
animation.save('ML_results/BNN_train_Preds_Animation.gif', format='GIF', append_images=frames[1:], save_all=True, duration=10, loop=0)

## Summarize Predictions on the Test DataSet

In [None]:
import numpy as np

# Create an empty list to store the class label arrays
class_labels = []

# Iterate over the class labels
for i in range(37):
    class_label = posterior_pred[f'Y_test_{i}']
    class_labels.append(class_label)

# Stack the class label arrays horizontally
test_predictions_array = np.hstack(class_labels)

# Print the shape of the predictions array on Test Dataset
print(test_predictions_array.shape)

test_predictions_array

In [None]:
# posterior prediction for each category
def probs(y_pred_df, ind):
    y_pred_ind = y_pred_df[:,ind]
    p = [sum(y_pred_ind == 0)/len(y_pred_ind), sum(y_pred_ind == 1)/len(y_pred_ind), sum(y_pred_ind == 2)/len(y_pred_ind)]
    return p

# compute posterior probabilities for a selected drug
print(probs(test_predictions_array, 0))
print(probs(test_predictions_array, 1))
print(probs(test_predictions_array, 2))

In [None]:
# Summarize predictions On Test Data Set
test_predictions_array
test_predictions_array.shape # 1000 times 147 Columns (Data) - For Each compound get which class got majority votes
test_predictions_array_df = pd.DataFrame(test_predictions_array)

import pandas as pd
import numpy as np
from scipy.stats import mode

# Assuming you have a NumPy array called 'y_pred_train_df' with shape (5000, 147)
# Each column contains three predicted labels

# Convert the NumPy array to a DataFrame
test_pred_df = pd.DataFrame(test_predictions_array_df)

# Get the label with the majority of votes for each column
test_majority_labels = np.asarray(mode(test_pred_df, axis=0)[0])[0]

# Print the majority labels
print(test_majority_labels)

# Just check
test_predictions_array_df[0].value_counts()

In [None]:
test_actual_pred = np.column_stack((Y_test, test_majority_labels))
test_actual_pred_df = pd.DataFrame(data=test_actual_pred, columns=['Y_test', 'Y_pred'])
test_actual_pred_df
#test_actual_pred_df.to_csv('ML_results/BNN_Updated_testData_Perform.csv', index=False)

In [None]:
#----- Save Different Performance Matrices - On Test DataSet
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, precision_score, accuracy_score, f1_score, matthews_corrcoef, confusion_matrix

# Compute precision, accuracy, F1-score, and MCC for the predicted and true labels
precision = precision_score(Y_test, test_majority_labels, average='macro')
accuracy = accuracy_score(Y_test, test_majority_labels)
f1_score = f1_score(Y_test, test_majority_labels, average='macro')
mcc = matthews_corrcoef(Y_test, test_majority_labels)

# Calculate sensitivity and specificity using confusion matrix
cm = confusion_matrix(Y_test, test_majority_labels)

# Calculate sensitivity and specificity for each class
sensitivity = {}
specificity = {}

for i in range(cm.shape[0]):
    tp = cm[i, i]
    fn = sum(cm[i, :]) - tp
    fp = sum(cm[:, i]) - tp
    tn = cm.sum() - (tp + fn + fp)

    sensitivity[i] = tp / (tp + fn)
    specificity[i] = tn / (tn + fp)

# Calculate overall sensitivity and specificity
overall_sensitivity = sum(sensitivity.values()) / len(sensitivity)
overall_specificity = sum(specificity.values()) / len(specificity)

# Print the performance metrics to the console
print("**************** MODEL PERFORMANCE: Actual BNN Train Dataset ****************")
print("Confusion Matrix:")
print(cm)
print("Sensitivity:")
for key, value in sensitivity.items():
    print("Class {}: {:.2f}".format(key, value))
print("Specificity:")
for key, value in specificity.items():
    print("Class {}: {:.2f}".format(key, value))
print("Overall Sensitivity: {:.2f}".format(overall_sensitivity))
print("Overall Specificity: {:.2f}".format(overall_specificity))

print("Precision: {:.2f}".format(precision))
print("Accuracy: {:.2f}".format(accuracy))
print("F1-score: {:.2f}".format(f1_score))
print("MCC: {:.2f}".format(mcc))

In [None]:
# Get Probabilites Data Frame
import pandas as pd

# Initialize an empty DataFrame
test_probabilities_df = pd.DataFrame()

# Iterate over the instances and extract probabilities
class_index = 0  # Index of the desired class
for i in range(37):
    instance_probs = probs(test_predictions_array, i) # This pobs functions was called while summarizing the predictions on the Test Dataset
    test_probabilities_df = test_probabilities_df.append(pd.Series(instance_probs), ignore_index=True)

# Set column names for the probabilities DataFrame
test_probabilities_df.columns = ['Class 0', 'Class 1', 'Class 2']

# Print the probabilities DataFrame
print(test_probabilities_df)

In [None]:
# Save Prediction Results of BNN model on Test Dataset
# test_actual_pred_df - has actual and predicted lables
# print (test_actual_pred_df)
# test_probabilities_df - has all three probabilities
# print(test_probabilities_df)
#-- Get the Drug names from Test Dataset and Scaler values of All variables
test_data_drug_names = test_data[['Drug']]

#--- Merge Y_Test_Labels, Y_preds, Y_preds_Proabilites and test Dataset information also
BNN_testData_concatenated_df = pd.concat([test_actual_pred_df, test_probabilities_df, test_data_drug_names, X_test], axis=1)
print (BNN_testData_concatenated_df)

BNN_testData_concatenated_df.to_csv('ML_results/BNN_testData_Predictions.csv', index=False)  # Specify the desired file name and path

### Prediction probabilites figures on the Test DataSet for each Molecule

In [None]:
###### -- Prediction probabilites figures on the Test DataSet for each Molecule

#-- This Code is to Bar Plot the Prediction probabilites for Each Compound Along with the Dot plot all Assay Parameters Associated
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Example dataframe
#df = pd.DataFrame({
#    'Compound': ['A', 'B', 'C'],
#    'Class 1': [0.6, 0.2, 0.4],
#    'Class 2': [0.3, 0.1, 0.5],
#    'Class 3': [0.1, 0.7, 0.1],
#    'Y_test': ['Class 1', 'Class 3', 'Class 1'],
#    'bsep': [100, 300, 400],
#    'lop': [3, -4, 1],
#    'cmax': [150, 20, 100],
#    'glu': [1, 0.1, 0.5],
#    'glu-gal': [20, 70, 50],
#    'fsp3': [1000, 150, 800]
#})

#BNN_concatenated_df <- pd.read_csv("ML_results/BNN_testData_Predictions.csv")
# Reoder the Columns in the concatenated_poly_df file
df = BNN_concatenated_df.loc[:, ['Drug','Class 0','Class 1','Class 2','Y_test','ClogP','BSEP','Glu','Glu_Gal','THLE','HepG2','Fsp3','log10cmax']]

# Set the width of the bars
bar_width = 0.2

# Set the positions of the bars on the x-axis
bar_positions = np.arange(len(df.columns[1:4]))

# Create a color map for the prediction probabilities
color_map = ['#008b00', '#b8860b', '#b22222']

#--- Get the Value Ranges to Add on the Dot Plot
range_cols = df.columns[5:]
# Set the range for the dot plot
x_range = [df[range_cols].min().min(), df[range_cols].max().max()]
x_min = df[range_cols].values.min()
x_max = df[range_cols].values.max()

# Loop over each compound and save the plot as an image
for i, compound in enumerate(df['Drug']):
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

    # Bar Plot
    prediction_probs = df.iloc[i, 1:4]
    ax1.bar(bar_positions, prediction_probs, width=bar_width, color=color_map)

    # Customize the plot
    ax1.set_xticks(bar_positions)
    ax1.set_xticklabels(df.columns[1:4])
    ax1.set_xlabel('')
    ax1.set_ylabel('Prediction Probability')
    ax1.set_title(f'Compound name- {compound} - (True Label: {df["Y_test"].iloc[i]})')

    # Dot Plot
    for j, col in enumerate(df.columns[5:]):
        if col in range_cols:
            ax2.plot([df[col].iloc[i]], [j], 'o', color='blue')  # Dot plot

    for j, col in enumerate(range_cols):
        ax2.hlines(y=j, xmin=df[col].min(), xmax=df[col].max(), colors='gray', linestyles='dashed', linewidth=1)

    # Customize the plot
    ax2.set_yticks(range(len(range_cols)))
    ax2.set_yticklabels(range_cols)
    ax2.set_xlabel('Values')
    ax2.set_ylabel('Variables')
    ax2.set_title(f'Compound name {compound}')
    ax2.set_xlim(x_min, x_max)

    # Save the plot as an image
    plt.subplots_adjust(wspace=0.5)
    plt.savefig(f'ML_results/Predictions_BNN/test_data/{compound}_plot.png')
    plt.close()

In [None]:
#---- Export the Animation Out of the Images
import os
from PIL import Image
from matplotlib import pyplot as plt

# Directory containing the images
image_dir = '~/ML_results/Predictions_BNN/test_data/'

# Get a list of image file names in the directory
image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]

# Sort the image files by name (if necessary)
image_files.sort()

# Create a list to store the frames of the animation
frames = []

# Load each image and append it to the frames list
for image_file in image_files:
    image_path = os.path.join(image_dir, image_file)
    image = Image.open(image_path)
    frames.append(image)

# Create the animation (Play with the Duration to speed up or Slow the animation Speed)
animation = Image.new('RGB', frames[0].size)
animation.save('ML_results/BNN_test_Preds_Animation.gif', format='GIF', append_images=frames[1:], save_all=True, duration=10, loop=0)

In [None]:
import dill

# Create a dictionary of variables you want to save
workspace = {
    #'posterior_pred': posterior_pred,
   # 'trace': trace,
    #'model': model,
    'train_predictions_array': train_predictions_array,
    'test_predictions_array': test_predictions_array,
    'train_actual_pred_df':train_actual_pred_df,
    'test_actual_pred_df':test_actual_pred_df,
    'train_majority_labels': train_majority_labels,
    'test_majority_labels': test_majority_labels,
    'train_probabilities_df': train_probabilities_df,
    'test_probabilities_df': test_probabilities_df,
    'BNN_trainData_concatenated_df': BNN_trainData_concatenated_df,
    'BNN_testData_concatenated_df': BNN_testData_concatenated_df
    # Add other variables here
}

# Save the workspace
with open('Final_models/BNN_updated_Workspace.pkl', 'wb') as file:
    dill.dump(workspace, file)