In [None]:
#Importing Important Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow import keras
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.cluster import KMeans
from sklearn import metrics

#Extracting X and Y from simulated data
data = pd.read_csv("Simulated_AD_data.csv")
data.drop(['Unnamed: 0'], axis=1, inplace=True)
X=data[data.columns[2:24]]
Y=data['Drug']

#Scaling the data
sc = StandardScaler()
X = sc.fit_transform(X)

#Building the model for AutoEncoder
encoder = keras.models.Sequential([
    keras.layers.Dense(2, input_shape=[22]),
])
decoder = keras.models.Sequential([
    keras.layers.Dense(22, input_shape=[2]),
])
autoencoder = keras.models.Sequential([encoder, decoder])
autoencoder.compile(loss='mse', optimizer = 'adam')

#Fitting the data to the model
history = autoencoder.fit(X,X, epochs=100, callbacks=[keras.callbacks.EarlyStopping(patience=10)])

#Obtaining 2-dimensional matrix for the data
lower_dim = encoder.predict(X)

#Finding the cordinates of the centroids of the clusters and the euclidean distance between them
k=2
kmeans_model = KMeans(n_clusters=k).fit(lower_dim)
centers=np.array(kmeans_model.cluster_centers_)
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2)) ** 2))
distance=euclidean_distance(centers[0],centers[1])

#Plotting the results obtained using matplotlib
plotdata = pd.DataFrame({'E0':lower_dim[:,0], 'E1':lower_dim[:,1], 'Drug':Y})
fig = plotdata[plotdata.Drug=='Placebo'].plot(kind="scatter", x='E0', y='E1', color='green', label="Placebo")
plotdata[plotdata.Drug=='Solanezumab'].plot(kind="scatter", x='E0', y='E1', color='red', label="Solanezumab", ax=fig)
plt.scatter(centers[:,0],centers[:,1], color=['blue'], s=100)
plt.plot([centers[0][0], centers[1][0]], [centers[0][1], centers[1][1]], color='black')
plt.text((centers[0][0] + centers[1][0]) / 2, ((centers[0][1] + centers[1][1]) / 2)+0.25, f'{distance:.2f}', color='black')
fig=plt.gcf()
fig.set_size_inches(12,8)
plt.show()

#Evaluating the effects of different optimizers on the model
optimizers = ['SGD','RMSprop','adam','ftrl','lion']
for i in range(0,6):
    autoencoder.compile(loss='mse', optimizer = optimizers[i])
    history = autoencoder.fit(X,X, epochs=100, callbacks=[keras.callbacks.EarlyStopping(patience=10)])
    plt.plot(history.history['loss'], label=optimizers[i])
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Effects of optimizers on the model")
    plt.legend()
plt.show()

#Evaluating the effects of different losses on the model
losses=['mean_squared_error', 'mean_absolute_error', 'mean_squared_logarithmic_error', 'cosine_similarity', 'huber'] 
for i in range(0,5):
    autoencoder.compile(loss=losses[i], optimizer = 'adam')
    history = autoencoder.fit(X,X, epochs=100, callbacks=[keras.callbacks.EarlyStopping(patience=10)])
    plt.plot(history.history['loss'], label=losses[i])
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title("Effects of losses on the model")
    plt.legend()
plt.show()