In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import silhouette_score
from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
import matplotlib.pyplot as plt
import seaborn as sns

In [13]:
plt.rcParams['font.family'] = 'Times New Roman'

In [14]:
df = pd.read_excel("Toxicity_Data.xlsx")

In [15]:
toxicity_values = df[['Toxicity_Score']].values
scaler = MinMaxScaler()
toxicity_scaled = scaler.fit_transform(toxicity_values)

In [16]:
Z = linkage(toxicity_scaled, method='ward')

In [17]:
plt.figure(figsize=(10, 6))
dendrogram(Z, labels=df['NAME'].values, leaf_rotation=90)
plt.title("Hierarchical Clustering Dendrogram of Compounds")
plt.xlabel("Compound")
plt.ylabel("Distance")
plt.tight_layout()
plt.savefig("dendrogram.png", dpi=300)
plt.close()

In [18]:
num_clusters = 3
cluster_labels = fcluster(Z, num_clusters, criterion='maxclust')
df['Initial_Cluster'] = cluster_labels

In [19]:
cluster_mean = df.groupby('Initial_Cluster')['Toxicity_Score'].mean().sort_values(ascending=False)
risk_map = {old: new for new, old in enumerate(cluster_mean.index, start=1)}
df['Risk_Level'] = df['Initial_Cluster'].map(risk_map)

In [20]:
sil_score = silhouette_score(toxicity_scaled, df['Risk_Level'])
print(f"silhoustte coefficient: {sil_score:.3f}")

silhoustte coefficient: 0.602


In [21]:
plt.figure(figsize=(8, 5))
sns.scatterplot(x=range(len(df)), y=df['Toxicity_Score'], hue=df['Risk_Level'], palette='Set1')
plt.title(f"Toxicity Clustering Scatter Plot (Silhouette Score = {sil_score:.3f})")
plt.xlabel("Sample Index")
plt.ylabel("Toxicity Score")
plt.legend(title="Risk Level", labels=["Low", "Medium", "High"])
plt.tight_layout()
plt.savefig("cluster_scatter.png", dpi=300)
plt.close()

In [22]:
output_df = df[['NAME', 'Toxicity_Score']].copy()
output_df['Risk_Level'] = df['Risk_Level'].map({1: 'High', 2: 'Medium', 3: 'Low'})
output_df.columns = ['Compound_Name', 'Toxicity_Score', 'Risk_Level']
output_df.to_excel("Toxicity_Risk_Classification.xlsx", index=False)