<a href="https://colab.research.google.com/github/cbaldassari/gmm_init/blob/main/gmm_mtf_5_PJMfile.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#import

In [None]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [None]:
%%capture
!pip install community
!pip install python-louvain
!pip install tsia
!pip install networkx
!pip install easydev
!pip install colormap

In [None]:
%%capture
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys

from matplotlib import gridspec
from numba import njit, prange
from pyts.image import MarkovTransitionField

import tsia.plot
import tsia.markov
import tsia.network_graph

import community
from community import community_louvain
import networkx as nx

from matplotlib.colors import to_hex

from sklearn.preprocessing import MinMaxScaler
from sklearn.mixture import GaussianMixture
import numpy as np
from scipy.stats import kurtosis, skew

import csv
from colormap import rgb2hex

#funcs


In [None]:
# Useful constants definition
COLORMAP = 'jet'

def get_network_graph2(mtf):
    # Build the graph with networkx:
    graph = nx.from_numpy_matrix(mtf)
    
    # Loops through the edges to get associate each of them with the
    # corresponding Markov transition probability:
    weights = [mtf[u,v] for u,v in graph.edges()]
    for index, e in enumerate(graph.edges()):
        graph[e[0]][e[1]]['weight'] = weights[index]
        
    return graph
    
def compute_network_graph_statistics2(partitions, graph=None, mtf=None):    
    if (graph is None) and (mtf is not None):
        graph = get_network_graph(mtf)
        
    #partitions = community_louvain.best_partition(graph, random_state=1234)
    nb_partitions = len(set(partitions.values()))
    modularity = community_louvain.modularity(partitions, graph)

    '''
    diameter = nx.diameter(graph)
    node_size = list(nx.clustering(graph, weight='weight').values())
    avg_clustering_coeff = np.array(node_size).mean()
    density = nx.density(graph)
    avg_path_length = nx.average_shortest_path_length(graph, weight='weight', method='dijkstra')
    
    average_degree = nx.average_degree_connectivity(graph)
    average_degree = np.mean(list(average_degree.values()))
    avg_weighted_degree = nx.average_degree_connectivity(graph, weight='weight')
    avg_weighted_degree = np.mean(list(avg_weighted_degree.values()))
    '''
    statistics = {
        #'Diameter': diameter,
        #'Average degree': average_degree,
        #'Average weighted degree': avg_weighted_degree,
        #'Density': density,
        #'Average path length': avg_path_length,
        #'Average clustering coefficient': avg_clustering_coeff,
        'Modularity': modularity,
        'Partitions': nb_partitions
    }
    
    return statistics
    
def get_modularity_encoding2(graph, colormap=COLORMAP, reversed_cmap=False):
  
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)
    
    # Get the node partitions and number of partitions found with the Louvain
    # algorithm, as implemented in the `community` package:

    partitions = community_louvain.best_partition(graph, random_state=1234)
    #####################################
    
    nb_partitions = len(set(partitions.values()))
    #print("nb_partitions: ",nb_partitions)

    # Compute node colors and edges colors for the modularity encoding:
    edge_colors = [to_hex(colormap(partitions.get(v)/(nb_partitions - 1))) for u,v in graph.edges()]
    node_colors = [partitions.get(node) for node in graph.nodes()]
    node_size = list(nx.clustering(graph, weight='weight').values())
    node_size = list((node_size - np.min(node_size)) * 2000 + 10)
    
    # Store the encoding to return in a dictionnary:
    #print("node_colors: ",len(set(node_colors)))

    encoding = {
        'node_size': node_size,
        'edge_color': edge_colors,
        'node_color': node_colors
    }
    return encoding, partitions
    
def get_network_graph_map2(timeseries, encoding, colormap=COLORMAP, reversed_cmap=False):
   
    # Get encoding definitions:
    node_colors = encoding['node_color']

    #print(node_colors)

    image_size = len(node_colors)
    #print("node_colors",node_colors)
    #print("np.max(node_colors)",np.max(node_colors))
    partition_color = node_colors / np.max(node_colors)

    # Define the color map:
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)

    # Plot each subset of the signal with the color associated to the network
    # graph partition it belongs to:
    network_graph_map = []
    sequences_width = timeseries.shape[0] / image_size

    #df=pd.DataFrame([{"color": p ,"value": k}])

    for i in range(image_size):
        c = colormap(partition_color[i])

        start = int(i * sequences_width)
        end = int((i+1) * sequences_width)#-1
        data = timeseries.iloc[start:end, :]

        current_map = dict()

        current_map.update({
            'color': c,
            'slice': data
        })

        #print(len(current_map["slice"]))

        network_graph_map.append(current_map)
        
    return network_graph_map, node_colors

def inversemapAna(ng_map2,colors2):
  df=pd.DataFrame(columns=["color","value"])
  if (len(ng_map2)!=len(colors2)):
      print("ERROR")
  for i in range(len(ng_map2)):
      d=ng_map2[i]
      p=colors2[i]
      #p=rgb2hex(p[0],p[1],p[2],p[3])
      slic=d["slice"].values.reshape(-1)

      for k in slic:
        df=df.append([{"color": p ,"value": k}], ignore_index=True)
  return df  

#text

In [None]:
hubnames=["pjm"]
tag_data = pd.read_csv('/content/drive/MyDrive/Mari/plotpaper2/GMMdata.csv')
strategy = 'quantile'
note=pd.DataFrame(columns=["hub","bins","imsize","bic","aic"])
trial=1000
gridlist = []

for b in range(2,100,2):
  for ts in range(5,300,5):
    gridlist.append((b,ts))
gridlist = tuple(gridlist)

for hub in hubnames:
  tag_df=tag_data[hub]
  X = tag_df.values.reshape(1, -1)
  ccc=0
  for g in gridlist:
    bins=g[0]
    imsize=g[1]
    mtf = MarkovTransitionField(image_size=imsize, n_bins=bins, strategy=strategy,overlapping=False)
    tag_mtf = mtf.fit_transform(X)
    
    graph= get_network_graph2(tag_mtf[0])
    
    encoding2, partitions = get_modularity_encoding2(graph)
    tag_df=pd.DataFrame(tag_df)
    ng_map2, colors2 = get_network_graph_map2(tag_df, encoding2)

    statistics=compute_network_graph_statistics2(partitions,graph)
    nb_partitions=statistics["Partitions"]
    modularity=statistics["Modularity"]
    lin_map=inversemapAna(ng_map2,colors2)
    colors=lin_map.groupby(['color']).size()
    colors=colors.index

    means=[]
    precisions=[]
    nk=[]
       
    for c in colors:
      a=lin_map.value[lin_map["color"]==c].values
      means.append(np.mean(a))
      precisions.append(1/np.std(a))
      nk.append(len(a)/len(lin_map))

    precisions=np.array(precisions).reshape(-1,1,1)
    means=np.array(means).reshape(-1,1)

    scaler = MinMaxScaler()
    XX = scaler.fit_transform(tag_data[hub].values.reshape(-1, 1))
    gmm = GaussianMixture(n_components=len(nk),
                    weights_init=nk,
                    means_init=means,
                    precisions_init=precisions,
                    covariance_type='full').fit(XX)
    size = len(XX)

    grid=pd.DataFrame(columns=["idxs","avg2","std2","avg3","std3","avg4","std4","aic","bic","weights"])
    grid.set_index("idxs")       

    res=pd.DataFrame(columns=["idx","second","third","fourth"])
    res.set_index("idx")

    bic=gmm.bic(XX)
    aic=gmm.aic(XX) 

    for cnt in range(trial):
      data_new = gmm.sample(size)
      sample=data_new[0].reshape(-1, 1) 
      
      descaled_sample=scaler.inverse_transform(sample)
      samplezero=descaled_sample

      item={"2":samplezero.std(),
            "3":skew(samplezero)[0],
            "4":kurtosis(samplezero)[0]+3
            }
    
      res.at[cnt,"second"]=item["2"]
      res.at[cnt,"third"]=item["3"]
      res.at[cnt,"fourth"]=item["4"]

    grid.at[0,'avg2']=res["second"].mean()
    grid.at[0,'std2']=res["second"].std()

    grid.at[0,'avg3']=res["third"].mean()
    grid.at[0,'std3']=res["third"].std()

    grid.at[0,'avg4']=res["fourth"].mean()
    grid.at[0,'std4']=res["fourth"].std()

    grid.at[0,'bic']=np.asarray(bic)
    grid.at[0,'aic']=np.asarray(aic)

    grid.at[0,'comp']=nb_partitions

    itemorig={"1":tag_df[hub].mean(),
          "2":tag_df[hub].std(),
          "3":skew(tag_df[hub]),
          "4":kurtosis(tag_df[hub])+3
    }
    fraction=2

    out=pd.DataFrame()#####
    for index, row in grid.iterrows():
      if (  (abs(row["avg2"]-itemorig["2"])< (row["std2"])/fraction) and  
            (abs(row["avg3"]-itemorig["3"])< (row["std3"])/fraction) and  
            (abs(row["avg4"]-itemorig["4"])< (row["std4"])/fraction)
          ):  
          dictionary={"hub":hub,"frac":fraction,
                      "bic":row["bic"],
                      "aic":row["aic"],
                      "weights":row["weights"]}
          out = out.append(dictionary, ignore_index=True)
        
    if (len(out.head())==1):
      print("###")
      print(hub,"(",bins,imsize,")")
      dct = {'hub': hub, 'bins': bins, 'imsize': imsize,"nb_partitions":nb_partitions,"nk_debug":nk,
             "n_components":gmm.n_components,"bic":out["bic"].values[0],"aic":out["aic"].values[0],
             "weights":gmm.weights_,"means":gmm.means_, "covariances": gmm.covariances_}

      note=note.append(dct, ignore_index=True)
      note.to_csv("/content/drive/MyDrive/Mari/plotpaper2/20210930_15_MTFloop_PJM.csv")

###
pjm ( 2 40 )
###
pjm ( 2 45 )
###
pjm ( 2 50 )
###
pjm ( 2 55 )
###
pjm ( 2 60 )
###
pjm ( 2 65 )
###
pjm ( 2 70 )
###
pjm ( 2 75 )
###
pjm ( 2 80 )
###
pjm ( 2 85 )
###
pjm ( 2 90 )
###
pjm ( 2 95 )
###
pjm ( 2 100 )
###
pjm ( 2 105 )
###
pjm ( 2 110 )
###
pjm ( 2 115 )
###
pjm ( 2 120 )
###
pjm ( 2 125 )
###
pjm ( 2 130 )
###
pjm ( 2 135 )
###
pjm ( 2 140 )
###
pjm ( 2 145 )
###
pjm ( 2 150 )
###
pjm ( 2 155 )
###
pjm ( 2 160 )
###
pjm ( 2 165 )
###
pjm ( 2 170 )
###
pjm ( 2 175 )
###
pjm ( 2 180 )
###
pjm ( 2 185 )
###
pjm ( 2 190 )
###
pjm ( 2 195 )
###
pjm ( 2 200 )
###
pjm ( 2 205 )
###
pjm ( 2 210 )
###
pjm ( 2 215 )
###
pjm ( 2 220 )
###
pjm ( 2 225 )
###
pjm ( 2 230 )
###
pjm ( 2 235 )
###
pjm ( 2 240 )
###
pjm ( 2 245 )
###
pjm ( 2 250 )
###
pjm ( 2 255 )
###
pjm ( 2 260 )
###
pjm ( 2 265 )
###
pjm ( 2 270 )
###
pjm ( 2 275 )
###
pjm ( 2 280 )
###
pjm ( 2 285 )
###
pjm ( 2 290 )
###
pjm ( 2 295 )
###
pjm ( 4 40 )
###
pjm ( 4 45 )
###
pjm ( 4 50 )
###
pjm ( 4 70 )
###
pjm 