<a href="https://colab.research.google.com/github/drewwint/psy535_dev_neuro_2024/blob/main/DevNeuro_walktrap_groups.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code to assign groups using walktrap

In [23]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [28]:
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/dat_only.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,0,0,0,1,1,0,1,0,0,0,1,0,0,1,1,1,0,1
1,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,1,0
2,0,0,0,0,0,0,1,0,1,1,0,0,1,1,0,1,1,0
3,1,0,0,0,1,1,0,1,0,0,1,0,0,0,1,0,0,1
4,1,0,0,1,0,0,0,1,1,0,1,1,0,0,1,0,0,0


In [153]:
df.shape


(18, 18)

In [162]:
import numpy as np
from scipy.sparse.csgraph import laplacian
from scipy.sparse.linalg import eigsh
from sklearn.cluster import KMeans

def walktrap_community_detection(adjacency_matrix, num_clusters):
    # Convert the adjacency matrix to a Laplacian matrix
    laplacian_matrix = laplacian(adjacency_matrix, normed=False)

    # Compute the eigenvalues and eigenvectors of the Laplacian matrix
    eigenvalues, eigenvectors = eigsh(laplacian_matrix, k=num_clusters, which='SM')

    # Use the eigenvectors to embed the graph into a low-dimensional space
    embedding = eigenvectors[:, 1:]  # Skip the first eigenvector (constant vector)

    # Apply K-means clustering to the embedded space
    kmeans = KMeans(n_clusters=num_clusters, random_state=0)
    labels = kmeans.fit_predict(embedding)

    # Ensure an even number of participants in each community label
    for label in set(labels):
        # Get indices of participants in the current community
        indices = np.where(labels == label)[0]
        if len(indices) % 2 != 0:
            # Find the participant closest to the centroid and move it to another community
            centroid = np.mean(embedding[indices], axis=0)
            distances = np.linalg.norm(embedding[indices] - centroid, axis=1)
            closest_index = indices[np.argmin(distances)]
            # Find another community with fewer participants
            other_labels = set(labels) - {label}
            min_size = min(len(np.where(labels == l)[0]) for l in other_labels)
            # Move the participant to the community with the minimum size
            for other_label in other_labels:
                if len(np.where(labels == other_label)[0]) == min_size:
                    labels[closest_index] = other_label
                    break

    return labels

# Example usage:
# Create an adjacency matrix (example)
adjacency_matrix = np.array([[0, 1, 1, 0],
                              [1, 0, 1, 1],
                              [1, 1, 0, 1],
                              [0, 1, 1, 0]])

# Run Walktrap community detection
import warnings
warnings.filterwarnings("ignore")
num_clusters = 2  # Number of clusters
community_labels = walktrap_community_detection(adjacency_matrix.astype("d"), num_clusters)
print("Community labels:", community_labels)


Community labels: [0 1 0 1]


In [163]:
import warnings
warnings.filterwarnings("ignore")
community_labels = walktrap_community_detection(np.array(df).astype("d"), 10)
print("Community labels:", community_labels)
print("number of labels", len(community_labels))
print("unique labels", np.unique(community_labels))
print("number of unique labels", len(np.unique(community_labels)))
print("counts", np.unique(community_labels, return_counts = True)[1])

Community labels: [1 9 8 1 6 5 3 2 0 8 4 5 6 4 9 2 0 3]
number of labels 18
unique labels [0 1 2 3 4 5 6 8 9]
number of unique labels 9
counts [2 2 2 2 2 2 2 2 2]


Recoding

In [164]:
community_labels[community_labels == 8] = 7
community_labels[community_labels == 9] = 8
print("Community labels:", community_labels)
print("number of labels", len(community_labels))
print("unique labels", np.unique(community_labels))
print("number of unique labels", len(np.unique(community_labels)))
print("counts", np.unique(community_labels, return_counts = True)[1])

Community labels: [1 8 7 1 6 5 3 2 0 7 4 5 6 4 8 2 0 3]
number of labels 18
unique labels [0 1 2 3 4 5 6 7 8]
number of unique labels 9
counts [2 2 2 2 2 2 2 2 2]


In [176]:
# importing random module
import random
# input list
inputList = list(range(1,37))
# removing repeating elements from the list using the set() function
resultSet=set(inputList)
# converting the set into a list(now the list has only unique elements)
uniqueList =list(resultSet)
# printing 4 random numbers from the list which is non-repeating
# print("4 non-repeating random numbers from the list are:")
group_weeks = []
a = []
for i in range(0,10):
  print(i)
  uniqueList = np.setdiff1d(uniqueList,a)
  if len(uniqueList) > 0:
    a = []
    a.append(random.sample(sorted(uniqueList), 4))
    print(a[0])
    group_weeks.append(a[0])
  else:
    print("done")

0
[25, 6, 31, 8]
1
[24, 11, 26, 4]
2
[34, 16, 30, 2]
3
[14, 9, 18, 36]
4
[29, 32, 1, 7]
5
[3, 10, 19, 23]
6
[13, 17, 20, 15]
7
[35, 33, 5, 27]
8
[12, 21, 22, 28]
9
done


In [177]:
readings = pd.DataFrame({"read1":np.zeros(len(community_labels)),"read2":np.zeros(len(community_labels)),"read3":np.zeros(len(community_labels)),"read4":np.zeros(len(community_labels))})
for i in range(0,np.unique(community_labels).max()+1):
  a = np.where(community_labels == i)[0][0]
  b = np.where(community_labels == i)[0][1]
  for j in range(readings.shape[1]):
    readings.iloc[a,j] = group_weeks[community_labels[i]][j]
    readings.iloc[b,j] = group_weeks[community_labels[i]][j]


In [None]:
readings

In [167]:
names = pd.read_csv('/content/drive/MyDrive/names.csv')

In [178]:
grp = pd.DataFrame({"names":names.columns[1:], "group": community_labels})
grp_assign = pd.concat([grp,readings], axis=1)
grp_assign.to_csv('/content/drive/MyDrive/grp_assign.csv')

In [179]:
grp_assign

Unnamed: 0,names,group,read1,read2,read3,read4
0,"Ahamadeen, Naheeda",1,12.0,21.0,22.0,28.0
1,"Bogdan, Kristina",8,25.0,6.0,31.0,8.0
2,"Burns, Marcia",7,34.0,16.0,30.0,2.0
3,"Carson, Ian",1,12.0,21.0,22.0,28.0
4,"Crawford, Christopher",6,14.0,9.0,18.0,36.0
5,"Fisher-Fox, Lindsey",5,3.0,10.0,19.0,23.0
6,"Huser, William",3,24.0,11.0,26.0,4.0
7,"Lipuma, Timothy",2,35.0,33.0,5.0,27.0
8,"Martin, Catherine",0,24.0,11.0,26.0,4.0
9,"Nguyen, Thanh",7,34.0,16.0,30.0,2.0
