In [None]:
import pandas as pd
import numpy as np

In [None]:
#Import the excel sheet with raw connectivity data
df = pd.read_excel (r"D:\Gordon Lab\codex SEZ TPN project\Paper preparation\paper_final raw data\connectivity\ACh clustering\ACH clustering_final.xlsx")
print (df)

In [None]:
#Clean the excel sheet
df=df.set_index('Name')
columns_to_drop = ['GRN type','Side','clusterlabel','GRN']
df = df.drop(columns_to_drop,axis=1)
df_a = df.fillna(0)
df_final = df_a.transpose()
y = df_final.index.tolist()

In [None]:
#Convert dataframe to arrays for preprocessing
data_x = df_final.values

In [None]:
#Normalize and perform dimensional reduction on a sparse matrix converted from the data array
from sklearn.preprocessing import normalize
from sklearn.decomposition import TruncatedSVD
from scipy.sparse import csr_matrix

sparse_matrix = csr_matrix(data_x)
sparse_matrix = normalize(sparse_matrix, axis=1, norm='l2')

n_components = 10
svd = TruncatedSVD(n_components=n_components, random_state=42)
reduced_data = svd.fit_transform(sparse_matrix)
print(svd.explained_variance_ratio_.sum())

In [None]:
#Perform hierarchical clustering
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
from scipy.cluster.hierarchy import dendrogram, linkage
Z = linkage(reduced_data, method='average', metric='correlation')
plt.figure(figsize=(20, 5))
dendrogram_result=dendrogram(Z, leaf_rotation=90, leaf_font_size=10, labels=df_final.index)
#plt.savefig('dendrogram_ACH interneuron clustering.pdf',dpi=300)

In [None]:
#Reorder the excel sheet based on the dendrogram order for plotting
dendrogram_order = dendrogram_result['leaves']
data_reordered = df_final.iloc[dendrogram_order]
data_reordered = data_reordered.fillna(0)
#data_reordered.to_excel('Ordered ACh clustering.xlsx')

In [None]:
#Perform Silhouette test to determine the number of clusters
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
cluster_range = range(2, 10)
silhouette_scores = []

for n_clusters in cluster_range:
    clustering = AgglomerativeClustering(n_clusters=n_clusters, linkage = 'average', metric='correlation')
    cluster_labels = clustering.fit_predict(reduced_data)

    silhouette_avg = silhouette_score(reduced_data, cluster_labels)
    silhouette_scores.append(silhouette_avg)

plt.plot(cluster_range, silhouette_scores, marker='o')
plt.xlabel("Number of Clusters")
plt.ylabel("Silhouette Score")
plt.title("Silhouette Score vs. Number of Clusters")
plt.grid(True)
#plt.savefig('Silhouette score ACH interneuron clustering.pdf',dpi=300)

In [None]:
#Add cluster labels to a separate excel sheet and save it
from sklearn.cluster import AgglomerativeClustering
cluster = AgglomerativeClustering(n_clusters=6, metric='correlation', linkage='average')
cluster_labels = cluster.fit_predict(reduced_data)

In [None]:
#Store the reduced data in a dataframe and cleaned up for a scatter plot
df_plot = pd.DataFrame(reduced_data)
index_values = df_final.index.tolist()
df_plot['Index'] = index_values
df_plot = df_plot.set_index('Index')
dendrogram_order = dendrogram_result['leaves']
reduceddata_reordered = df_plot.iloc[dendrogram_order]

In [None]:
#Plot pairwise distance matrix
import seaborn as sns
from sklearn.metrics.pairwise import pairwise_distances
corr_matrix = pairwise_distances(reduceddata_reordered, metric='correlation')
sns.heatmap(corr_matrix, cmap = 'Reds', square=True)
#plt.savefig('ACh clustering correlation distance matrix.pdf',dpi=300)

In [None]:
#Plot UMAP embedded reduced data with cluster labels in color.
import matplotlib.pyplot as plt
import seaborn as sns
import umap
import umap.plot
reducer = umap.UMAP(random_state=42, n_neighbors=30, min_dist=0.5, metric='correlation')
umapd = reducer.fit_transform(reduced_data)
plt.figure(figsize=(8, 6))
sns.scatterplot(data = umapd, x=umapd[:,0], y=umapd[:,1], hue=cluster_labels, palette='rainbow', s=50)
plt.legend(frameon=False)
plt.xlabel('UMAP_1')
plt.ylabel('UMAP_2')
#plt.savefig("clustering result_ACh interneurons_without labels.pdf", format="pdf", dpi=300)