In [1]:
import pandas as pd
import networkx as nx
import numpy as np
import os

from scipy.spatial.distance import pdist
from scipy import linalg

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.metrics import adjusted_rand_score
from sklearn.cluster import SpectralClustering

In [4]:
data = pd.read_csv("all_rows.csv", index_col=0)
data

Unnamed: 0,time,Fc5.,Fc3.,Fc1.,Fcz.,Fc2.,Fc4.,Fc6.,C5..,C3..,...,Po3.,Poz.,Po4.,Po8.,O1..,Oz..,O2..,Iz..,Class,Index
0,0.00000,-16.0,-29.0,2.0,22.0,-12.0,-23.0,-46.0,-36.0,-26.0,...,-52.0,-35.0,-22.0,-33.0,-53.0,-21.0,-11.0,15.0,T0,S001R01
1,0.00625,-56.0,-54.0,-27.0,-4.0,-31.0,-36.0,-56.0,-75.0,-55.0,...,-29.0,-18.0,-3.0,-9.0,-53.0,-12.0,1.0,21.0,T0,S001R01
2,0.01250,-55.0,-55.0,-29.0,-5.0,-29.0,-34.0,-52.0,-53.0,-42.0,...,-12.0,-6.0,4.0,-7.0,-45.0,2.0,18.0,35.0,T0,S001R01
3,0.01875,-50.0,-44.0,-13.0,13.0,-16.0,-25.0,-45.0,-44.0,-21.0,...,4.0,14.0,20.0,7.0,-29.0,16.0,35.0,47.0,T0,S001R01
4,0.02500,-36.0,-28.0,13.0,42.0,9.0,-10.0,-46.0,-34.0,-12.0,...,6.0,20.0,24.0,6.0,-13.0,29.0,40.0,50.0,T0,S001R01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19915,124.46875,100.0,100.0,99.0,103.0,98.0,95.0,82.0,121.0,110.0,...,92.0,94.0,47.0,24.0,89.0,77.0,-6.0,37.0,T2,S001R14
19916,124.47500,117.0,128.0,130.0,124.0,119.0,122.0,106.0,133.0,131.0,...,87.0,96.0,68.0,63.0,78.0,74.0,7.0,43.0,T2,S001R14
19917,124.48125,122.0,142.0,147.0,141.0,136.0,144.0,135.0,124.0,130.0,...,86.0,106.0,107.0,110.0,71.0,82.0,44.0,56.0,T2,S001R14
19918,124.48750,116.0,135.0,142.0,142.0,129.0,124.0,90.0,120.0,132.0,...,88.0,99.0,104.0,96.0,72.0,81.0,53.0,44.0,T2,S001R14


In [5]:
data_df = data.drop(["time","Index","Class"], axis=1)
data_df

Unnamed: 0,Fc5.,Fc3.,Fc1.,Fcz.,Fc2.,Fc4.,Fc6.,C5..,C3..,C1..,...,P8..,Po7.,Po3.,Poz.,Po4.,Po8.,O1..,Oz..,O2..,Iz..
0,-16.0,-29.0,2.0,22.0,-12.0,-23.0,-46.0,-36.0,-26.0,-18.0,...,-30.0,-56.0,-52.0,-35.0,-22.0,-33.0,-53.0,-21.0,-11.0,15.0
1,-56.0,-54.0,-27.0,-4.0,-31.0,-36.0,-56.0,-75.0,-55.0,-43.0,...,-20.0,-35.0,-29.0,-18.0,-3.0,-9.0,-53.0,-12.0,1.0,21.0
2,-55.0,-55.0,-29.0,-5.0,-29.0,-34.0,-52.0,-53.0,-42.0,-35.0,...,-20.0,-23.0,-12.0,-6.0,4.0,-7.0,-45.0,2.0,18.0,35.0
3,-50.0,-44.0,-13.0,13.0,-16.0,-25.0,-45.0,-44.0,-21.0,-11.0,...,-16.0,-12.0,4.0,14.0,20.0,7.0,-29.0,16.0,35.0,47.0
4,-36.0,-28.0,13.0,42.0,9.0,-10.0,-46.0,-34.0,-12.0,8.0,...,-9.0,-7.0,6.0,20.0,24.0,6.0,-13.0,29.0,40.0,50.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19915,100.0,100.0,99.0,103.0,98.0,95.0,82.0,121.0,110.0,115.0,...,40.0,74.0,92.0,94.0,47.0,24.0,89.0,77.0,-6.0,37.0
19916,117.0,128.0,130.0,124.0,119.0,122.0,106.0,133.0,131.0,134.0,...,64.0,68.0,87.0,96.0,68.0,63.0,78.0,74.0,7.0,43.0
19917,122.0,142.0,147.0,141.0,136.0,144.0,135.0,124.0,130.0,138.0,...,85.0,65.0,86.0,106.0,107.0,110.0,71.0,82.0,44.0,56.0
19918,116.0,135.0,142.0,142.0,129.0,124.0,90.0,120.0,132.0,138.0,...,64.0,71.0,88.0,99.0,104.0,96.0,72.0,81.0,53.0,44.0


In [6]:
data["Class"], class_mappings = pd.factorize(data["Class"])
labels = data["Class"].tolist()
labels

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [7]:
# Pairwise distances
dimension = data_df.shape[0]
dist_ = pdist(data_df)
dist_mat = np.zeros([dimension, dimension])
dist = iter(dist_)
for i in range(dimension):
    for j in range(i+1, dimension):  
        d = next(dist)
        dist_mat[i,j] = d
        dist_mat[j,i] = d

MemoryError: Unable to allocate 249. GiB for an array with shape (33360349056,) and data type float64