In [None]:
# Para visualización en notebook
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (8,6)
mpl.rcParams['font.size'] = 16
import seaborn as sns
import math

# Números y Datos
import numpy as np
import pandas as pd

#version Paola 2.3
import networkx as nx

from glob import glob
from pathlib import Path

In [None]:
filenames = glob('DataSujetos/*.csv')

print("Nro de archivos = " + str(len(filenames)))
subjects = set()
sleepStages = set()
for filename in filenames:
        subjects.add(filename[filename.index('_') + 1:-4])
        sleepStages.add(filename[filename.index('/') + 1: filename.index('_')])
print("Nro de sujetos = " + str(len(subjects)))
print("Nro de estadios = " + str(len(sleepStages)))

sleepStagesOrdered = ("W", "N1", "N2", "N3")

print("sleepStages = " + str(sleepStages))
print("sleepStagesOrdered = " + str(sleepStagesOrdered))

#es el limite inferior establecido en el paper
min_density = 0.025
#max_density = 0.16982759
max_density = 0.03534483
step_density = 0.01
count_density = math.ceil((max_density - min_density)/step_density)

#Densidades de 0 a 1    
densities = np.linspace(min_density,max_density,count_density)
print("densidades:" + str(len(densities)))

In [None]:
#Toma un archivo csv con la matriz de input y devuelve una lista de grafos con la densidad de aristas correspondiente
def subjectmatrix2graphslist(filename, densities):
    """
    Toma un archivo csv con la matriz de input y devuelve una lista de grafos con la densisdad de aristas correspondiente
    """
    print(filename)
    correlation_matrix = pd.read_csv(filename, header=-1)
    n = correlation_matrix.shape[0]
   
    # Se limpia la diagonal poniendo todos ceros en lugar de unos. Esto lo saco pero es necesario porque 
    # sino al crear la adjacency matrix quedan los 1 y por ende los self loops
    correlation_matrix -= np.diag(np.ones(n))
    max_links = (n*n-n)//2
    
    #retorna los indices del triangulo inferior de la matrix n*n 
    tril_idx = np.tril_indices(n,-1)
    sorted_correlations = sorted(correlation_matrix.values[tril_idx].flatten(),reverse=True)
   
    Gs = []
    for d in densities:
        idx = int(d*max_links)
        threshold = sorted_correlations[idx]
        adjacency_matrix = (correlation_matrix>=threshold)*1
        Gs.append(nx.from_pandas_adjacency(adjacency_matrix))
       
    return Gs

def graphslistBySleepStage(sleepStage, densities):
    filenames = glob('DataSujetos/%s_*.csv' % sleepStage)
    graphs = {}
    for filename in filenames:
        Gs = subjectmatrix2graphslist(filename, densities)
        name = Path(filename).stem
        graphs[name] = Gs
    return graphs   

def buildFileName(stage, subject):
    return ("%s_%s" % (stage, subject))
    
densities


In [None]:
graphsBySleepStage = {}

#se arma una mapa por estadio de sueño con los grafos de cada individuo
for sleepStage in sleepStages: 
    graphsBySleepStage[sleepStage] = graphslistBySleepStage(sleepStage, densities)

In [None]:
#Funciones para calcular y dibujar los graficos por estadio de sueño
#calcular medida
def calculateMeasure(graphs, calculateMeasureByGraph):
    value = []
    for name, Gs in graphs.items():
        value_ =  [calculateMeasureByGraph(G) for G in Gs]
        value.append(value_)
    value = np.vstack(value)
    return value


#Dibujar graficos por estadio
def plotGraphMeasureBySleepStage(xlabel,ylabel, graphsBySleepStage, densities, calculateMeasureByGraph):
    plt.figure(figsize=(10,10))

    for i, sleepStage in enumerate(sleepStagesOrdered):
        measure = calculateMeasure(graphsBySleepStage[sleepStage], calculateMeasureByGraph)
        plt.subplot(2,2,i+1)
        plt.plot(densities, measure.T);
        if (i != 0 and i != 1):
            plt.xlabel(xlabel)
        plt.ylabel(ylabel);
        plt.title(sleepStage)
        
def plotMeanGraphMeasureBySleepStage(xlabel, ylabel, graphsBySleepStage, densities,
                                     calculateMeasureByGraph, showStd = True):
    plt.figure(figsize=(10,10))

    for i, sleepStage in enumerate(sleepStagesOrdered):
        measure = calculateMeanMeasure(graphsBySleepStage[sleepStage], calculateMeasureByGraph)
        plt.subplot(2,2,i+1)
        if (showStd):
            plt.fill_between(densities, measure[0] - measure[1], measure[0] + measure[1], alpha = 0.5);
        plt.plot(densities, measure[0]);
        if (i != 0 and i != 1):
            plt.xlabel(xlabel)
        plt.ylabel(ylabel);
        plt.title(sleepStage)  
        
def calculateMeanMeasure(graphs, calculateMeasureByGraph):
    values = []
    for name, Gs in graphs.items():
        value = []
        for i, G in enumerate(Gs):
            value.append(calculateMeasureByGraph(G))
            values.append(value)
    values = np.vstack(values)
    return (values.mean(0), values.std(0))        

In [None]:
from community import community_louvain

In [None]:
def calculateModularityCoefficient(graph):
    modulos = community_louvain.best_partition(graph)
    return community_louvain.modularity(modulos, graph)

def calculateNumberOfModules(graph):
    modulos = community_louvain.best_partition(graph)
    return len(set(modulos.values()))

In [None]:
#plotMeanGraphMeasureBySleepStage('Edges density', 'Modularity (Q)',
#                             graphsBySleepStage, densities, calculateModularityCoefficient)

In [None]:
# Se grafica el número de comunidades (Nc) en función de las densidades
#plotMeanGraphMeasureBySleepStage('Edges density', 'Number of communities (Nc)',
#                             graphsBySleepStage, densities, calculateNumberOfModules)

In [None]:
def partition_set_to_dict(m):
    d = {}
    for i,c in enumerate(m):
        for n in c:
            d[n] = i
    return d

def best_partition(graph):
    # Va sacando conexiones usando betweenness de aristas
    modulos = nx.community.girvan_newman(graph)
    modulos = list(modulos)
    modularity = []
    # Para cada partición se calcula la modularidad
    for m in modulos:
        modularity.append(community_louvain.modularity(partition_set_to_dict(m),graph))
    # Se grafican las diferentes modularidades
    idx = np.argmax(modularity)
    return modulos[idx]

def girvan_newman():
    stages = {}
    for stage in graphsBySleepStage:
        subjects = {}
        print("stage = " + stage)
        for subject in graphsBySleepStage[stage]:
            results = []
            print("subject = " + subject)
            for i, graph in enumerate(graphsBySleepStage[stage][subject]):
                print("density = " + str(i) + ": " + str(densities[i]))
                modulos = best_partition(graph)
                modulos = list(modulos)                
                modularity = community_louvain.modularity(partition_set_to_dict(modulos), graph)
                number_of_modules = len(modulos)
                results.append((modularity, number_of_modules))
            subjects[subject] = results
        stages[stage] = subjects
    return stages
        
girvanNewmanModules = girvan_newman()

In [None]:
def plotMeanTwoAlgoriths(xlabel, ylabel, densities,
                         calculateMeasure1, calculateMeasure2, showStd = True):
    plt.figure(figsize=(10,10))

    for i, sleepStage in enumerate(sleepStagesOrdered):
        print("sleepStage = " + sleepStage)
        measure1 = calculateMeanMeasure(graphsBySleepStage[sleepStage], calculateMeasure1)
        measure2 = calculateMeanGirvanNewman(sleepStage, calculateMeasure2)
        plt.subplot(2,2,i+1)
        if (showStd):
            plt.fill_between(densities, measure1[0] - measure1[1], measure1[0] + measure1[1], alpha = 0.5);
            plt.fill_between(densities, measure2[0] - measure2[1], measure2[0] + measure2[1], alpha = 0.5);
        plt.plot(densities, measure1[0], label = 'Louvain');
        plt.plot(densities, measure2[0], label = 'Girvan Newman');
        if (i != 0 and i != 1):
            plt.xlabel(xlabel)
        plt.legend()
        plt.ylabel(ylabel)
        plt.title(sleepStage)  
        
def calculateMeanGirvanNewman(sleepStage, calculateMeasureByGraph):
    values = []
    for name, Gs in graphsBySleepStage[sleepStage].items():
        value = []
        for i, G in enumerate(Gs):
            value.append(calculateMeasureByGraph(sleepStage, name, i))
            values.append(value)
    values = np.vstack(values)
    return (values.mean(0), values.std(0))        

In [None]:
def calculateGirvanNewmanModularityCoefficient(sleepStage, subjectName, densityNumber):
    return girvanNewmanModules[sleepStage][subjectName][densityNumber][0]

def calculateGirvanNewmanNumberOfModules(sleepStage, subjectName, densityNumber):
    return girvanNewmanModules[sleepStage][subjectName][densityNumber][1]

In [None]:
plotMeanTwoAlgoriths('Edges density', 'Modularity (Q)',
                    densities, calculateModularityCoefficient, calculateGirvanNewmanModularityCoefficient,
                    showStd = False)

In [None]:
plotMeanTwoAlgoriths('Edges density', 'Number of communities (Nc)',
                    densities, calculateNumberOfModules, calculateGirvanNewmanNumberOfModules,
                    showStd = False)

In [None]:
girvanNewmanModules