In [None]:
import markov_clustering as mc
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

from networkx.algorithms import community as nx_comm

In [None]:
# Read the similarity matrix from the CSV file
dist = pd.read_csv("file.csv", index_col = 0)
#sena = 1 - dist
simil = 1 - dist
simil.describe()

In [None]:
max(simil.describe().loc["50%"])

In [None]:
simil_sparse = (simil - 0.59).clip(lower = 0.)

simil_sparse_array = np.asarray(simil_sparse)
np.fill_diagonal(simil_sparse_array, 0)
print(simil_sparse_array)

In [None]:
# Create the network graph
network = nx.from_numpy_array(simil_sparse_array)

# Get the node names
node_names = list(simil_sparse.index)

# Get the adjacency matrix
#matrix = nx.to_numpy_array(network)
matrix = nx.to_scipy_sparse_array(network)


In [None]:


# Define a range of inflation values to loop through
inflation_values = np.arange(1.5, 2.6, 0.1)

# Loop through inflation values and calculate modularity
for inflation in inflation_values:
    result = mc.run_mcl(matrix, inflation=inflation, expansion=4)
    clusters = mc.get_clusters(result)
    modularity = nx_comm.modularity(network, clusters)
    print(f"Inflation: {inflation}, Modularity: {modularity}")

In [None]:
# Apply Markov Clustering
result = mc.run_mcl(matrix, inflation=2.6, expansion=2)
clusters = mc.get_clusters(result)

modularity = nx_comm.modularity(network, clusters)
print(f"Modularity: {modularity}")

# Create a dictionary to store nodes and their corresponding clusters
node_cluster_dict = {}
for cluster_id, cluster in enumerate(clusters):
    for node_idx in cluster:
        node_name = node_names[node_idx]
        node_cluster_dict[node_name] = cluster_id

# Define positions for nodes (example positions, you can modify this)
#pos = nx.spring_layout(network)  # You can use other layout algorithms
#pos = nx.fruchterman_reingold_layout(network, k=2)
#pos = nx.kamada_kawai_layout(network)
pos = nx.fruchterman_reingold_layout(network)


# Draw the graph with specified positions
mc.draw_graph(matrix, clusters, pos=pos, node_size=15, with_labels=False, edge_color="silver")

# Print node names and their clusters
for node_name, cluster_id in node_cluster_dict.items():
    print(f"Node: {node_name}, Cluster: {cluster_id}")

plt.show()



In [None]:

# Create a heatmap using seaborn
plt.figure(figsize=(10, 8))
sns.clustermap(matrix, cmap="coolwarm", xticklabels=node_names, yticklabels=node_names)
plt.xlabel("Nodes")
plt.ylabel("Nodes")
plt.title("Matrix Heatmap")

plt.show()


In [None]:
# Create a DataFrame with cluster labels as rows
cluster_df = pd.DataFrame.from_dict(node_cluster_dict, orient='index', columns=['Cluster'])

# Create a figure and axis for the heatmap
plt.figure(figsize=(200, 200))

# Plot the heatmap with cluster labels on the y-axis
sns.heatmap(df_with_clusters)

plt.show()

In [None]:
#########for dist

# Create a DataFrame with cluster labels as rows
cluster_df = pd.DataFrame.from_dict(node_cluster_dict, orient='index', columns=["Cluster"])
df_with_clusters = pd.merge(dist, cluster_df, left_index=True, right_index=True, how="outer")
df_with_clusters = df_with_clusters.sort_values(by="Cluster")
df_with_clusters = df_with_clusters.reindex(list(df_with_clusters.index), axis=1)

# Create a color palette for clusters
cluster_palette = sns.color_palette("Set1", n_colors=len(cluster_df["Cluster"].unique()))

# Create a colormap mapping cluster labels to colors
cluster_colors = {label: color for label, color in zip(cluster_df["Cluster"].unique(), cluster_palette)}

# Map cluster labels to colors for each row in the DataFrame
row_colors = cluster_df["Cluster"].map(cluster_colors)

# Create a figure and axis for the heatmap
plt.figure(figsize=(200,200))  # Adjust the size as needed

# Plot the heatmap with row_colors
ax = sns.heatmap(df_with_clusters)
ax.tick_params(axis='y', which='major', pad=20, length=0)
#ax.set_yticklabels(cluster_df["Cluster"], rotation=0)
for i, color in enumerate(row_colors):
    ax.add_patch(plt.Rectangle(xy=(-0.05, i), width=0.05, height=1, color=color, lw=0,
                               transform=ax.get_yaxis_transform(), clip_on=False))

plt.tight_layout()
plt.savefig('heatmap_dist.png', format='png')
plt.show()


In [None]:
########### for result


# Create a DataFrame with cluster labels as rows
df = pd.DataFrame(result,  index=node_names, columns=node_names)
cluster_df = pd.DataFrame.from_dict(node_cluster_dict, orient='index', columns=["Cluster"])
df_with_clusters = pd.merge(df, cluster_df, left_index=True, right_index=True, how="outer")
df_with_clusters = df_with_clusters.sort_values(by="Cluster")
df_with_clusters = df_with_clusters.reindex(list(df_with_clusters.index), axis=1)

# Create a color palette for clusters
cluster_palette = sns.color_palette("Set1", n_colors=len(cluster_df["Cluster"].unique()))

# Create a colormap mapping cluster labels to colors
cluster_colors = {label: color for label, color in zip(cluster_df["Cluster"].unique(), cluster_palette)}

# Map cluster labels to colors for each row in the DataFrame
row_colors = cluster_df["Cluster"].map(cluster_colors)

# Create a figure and axis for the heatmap
plt.figure(figsize=(200,200))  # Adjust the size as needed

# Plot the heatmap with row_colors
ax = sns.heatmap(df_with_clusters)
ax.tick_params(axis='y', which='major', pad=20, length=0)
#ax.set_yticklabels(cluster_df["Cluster"], rotation=0)
for i, color in enumerate(row_colors):
    ax.add_patch(plt.Rectangle(xy=(-0.05, i), width=0.05, height=1, color=color, lw=0,
                               transform=ax.get_yaxis_transform(), clip_on=False))

plt.tight_layout()
plt.savefig('heatmap_result.png', format='png')
plt.show()



In [None]:
########### for simil


# Create a DataFrame with cluster labels as rows
df2 = pd.DataFrame(simil_sparse_array,  index=node_names, columns=node_names)
cluster_df = pd.DataFrame.from_dict(node_cluster_dict, orient='index', columns=["Cluster"])
df_with_clusters = pd.merge(df2, cluster_df, left_index=True, right_index=True, how="outer")
df_with_clusters = df_with_clusters.sort_values(by="Cluster")
df_with_clusters = df_with_clusters.reindex(list(df_with_clusters.index), axis=1)

# Create a color palette for clusters
cluster_palette = sns.color_palette("Set1", n_colors=len(cluster_df["Cluster"].unique()))

# Create a colormap mapping cluster labels to colors
cluster_colors = {label: color for label, color in zip(cluster_df["Cluster"].unique(), cluster_palette)}

# Map cluster labels to colors for each row in the DataFrame
row_colors = cluster_df["Cluster"].map(cluster_colors)

# Create a figure and axis for the heatmap
plt.figure(figsize=(200,200))  # Adjust the size as needed

# Plot the heatmap with row_colors
ax = sns.heatmap(df_with_clusters)
ax.tick_params(axis='y', which='major', pad=20, length=0)
#ax.set_yticklabels(cluster_df["Cluster"], rotation=0)
for i, color in enumerate(row_colors):
    ax.add_patch(plt.Rectangle(xy=(-0.05, i), width=0.05, height=1, color=color, lw=0,
                               transform=ax.get_yaxis_transform(), clip_on=False))

plt.tight_layout()
plt.savefig('heatmap_simil_sparse_array.png', format='png')
plt.show()

