In [None]:
import pandas as pd 
import numpy as np
import networkx as nx
import sys

from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.cluster import OPTICS
from sklearn.cluster import AgglomerativeClustering
from sklearn.cluster import SpectralClustering

import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import rcParams, cycler
from sklearn.metrics import silhouette_samples, silhouette_score

In [2]:
df_csv = pd.read_csv("mp_samples.csv")
df_csv_data = df_csv.iloc[:, 2:2971]
df_graph = pd.read_csv("graph.csv")

In [None]:
def edge_list_from_graph(df_graph):
    """Function to extract edge list from the whole graph
    
    Args: 
        df_graph (dataframe): dataframe containing the whole graph with all the edges in rows
    """
    df_edge_list = df_graph[['sid', 'did', 'values']]
    df_edge_list['sid'] = 'p' + df_edge_list['sid'].astype(str)
    df_edge_list['did'] = 'mp' + df_edge_list['did'].astype(str)
    
    return df_edge_list

In [None]:
df_edge_list = edge_list_from_graph(df_graph)
df_edge_list_0_dropped = df_edge_list.drop(df_edge_list[df_edge_list.values == 0].index)
df_edge_list_0_dropped

In [None]:
def adj_mat_from_edge_list(df_edge_list):
    """Function to create adjacency matrix of nodes from the list of edges
    
    Args:
        df_edge_list (dataframe): dataframe containing all edges between nodes and corresponding edge weights
    """
    G = nx.MultiGraph()
    values_list = df_edge_list.values.tolist()
    
    for i in range(len(values_list)):
        G.add_edge(values_list[i][0], values_list[i][1], weight=values_list[i][2])
        
    adj_mat = nx.adjacency_matrix(G).A
    
    return G, adj_mat

In [None]:
G, adj_mat = adj_mat_from_edge_list(df_edge_list)
print(adj_mat)

In [None]:
degrees = [val for (node, val) in G.degree()]

In [None]:
len(degrees)

In [None]:
adj_mat.shape

In [None]:
df_cluster = pd.DataFrame(G.nodes, columns = ['nodes'])
df_cluster

In [None]:
np.set_printoptions(threshold=np.inf)
df_cluster['cluster'] = SpectralClustering(n_clusters = 4, affinity='precomputed').fit_predict(adj_mat)
df_cluster


In [None]:
df_cluster['label'] = ['mp' if 'm' in x else 'p' for x in df_cluster['nodes']]

for index, row in df_cluster[~df_cluster.nodes.str.contains('m')].iterrows():
    
    p = row.nodes.split("p")[1]
    
    if int(p) < 18:
        df_cluster.at[index, 'label'] = 'p_c'
        
    else:
        if int(p) < 29:
            df_cluster.at[index, 'label']  = 'p_cd'
            
        else:
            df_cluster.at[index, 'label']  = 'p_uc'
            
df_cluster
        
        

In [None]:
cluster_p_c = df_cluster[df_cluster['label'] == 'p_c']
cluster_p_cd = df_cluster[df_cluster['label'] == 'p_cd']
cluster_p_uc = df_cluster[df_cluster['label'] == 'p_uc']

In [None]:
cluster_p = df_cluster[df_cluster['label'] == 'p']
cluster_mp = df_cluster[df_cluster['label'] == 'mp']

In [None]:
cluster_0 = df_cluster[df_cluster['cluster'] == 0]
cluster_1 = df_cluster[df_cluster['cluster'] == 1]
cluster_2 = df_cluster[df_cluster['cluster'] == 2]
cluster_3 = df_cluster[df_cluster['cluster'] == 3]

In [None]:
print(cluster_0.label.unique(), cluster_0.shape)
print(cluster_1.label.unique(), cluster_1.shape)
print(cluster_2.label.unique(), cluster_2.shape)
print(cluster_3.label.unique(), cluster_3.shape)

In [None]:
cluster_0_c = cluster_0[cluster_0['label'] == 'p_c']
cluster_0_cd = cluster_0[cluster_0['label'] == 'p_cd']
cluster_0_uc = cluster_0[cluster_0['label'] == 'p_uc']
cluster_0_mp = cluster_0[cluster_0['label'] == 'mp']

cluster_1_c = cluster_1[cluster_1['label'] == 'p_c']
cluster_1_cd = cluster_1[cluster_1['label'] == 'p_cd']
cluster_1_uc = cluster_1[cluster_1['label'] == 'p_uc']
cluster_1_mp = cluster_1[cluster_1['label'] == 'mp']

cluster_2_c = cluster_2[cluster_2['label'] == 'p_c']
cluster_2_cd = cluster_2[cluster_2['label'] == 'p_cd']
cluster_2_uc = cluster_2[cluster_2['label'] == 'p_uc']
cluster_2_mp = cluster_2[cluster_2['label'] == 'mp']

cluster_3_c = cluster_3[cluster_3['label'] == 'p_c']
cluster_3_cd = cluster_3[cluster_3['label'] == 'p_cd']
cluster_3_uc = cluster_3[cluster_3['label'] == 'p_uc']
cluster_3_mp = cluster_3[cluster_3['label'] == 'mp']

In [None]:
print(cluster_0_c.label.unique(), cluster_0_c.shape)
print(cluster_0_cd.label.unique(), cluster_0_cd.shape)
print(cluster_0_uc.label.unique(), cluster_0_uc.shape)
print(cluster_0_mp.label.unique(), cluster_0_mp.shape)

In [None]:
print(cluster_1_c.label.unique(), cluster_1_c.shape)
print(cluster_1_cd.label.unique(), cluster_1_cd.shape)
print(cluster_1_uc.label.unique(), cluster_1_uc.shape)
print(cluster_1_mp.label.unique(), cluster_1_mp.shape)

In [None]:
print(cluster_2_c.label.unique(), cluster_2_c.shape)
print(cluster_2_cd.label.unique(), cluster_2_cd.shape)
print(cluster_2_uc.label.unique(), cluster_2_uc.shape)
print(cluster_2_mp.label.unique(), cluster_2_mp.shape)

In [None]:
print(cluster_3_c.label.unique(), cluster_3_c.shape)
print(cluster_3_cd.label.unique(), cluster_3_cd.shape)
print(cluster_3_uc.label.unique(), cluster_3_uc.shape)
print(cluster_3_mp.label.unique(), cluster_3_mp.shape)

In [None]:
plt.rcParams["figure.figsize"] = (20,20)
pos = nx.spring_layout(G)
nx.draw_networkx_labels(G, pos)
nx.draw_networkx_edges(G, pos)
plt.show()