<a href="https://colab.research.google.com/github/cbaldassari/time_series_network/blob/main/time_series_network.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##importing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%capture
!pip install pyts
!pip install community
!pip install python-louvain
!pip install tsia
!pip install networkx
!pip install easydev
!pip install colormap
!pip install tomaster
!pip install karateclub
!pip install ts2vg

In [None]:
%%capture
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys
import seaborn as sns

from matplotlib import gridspec
from numba import njit, prange
from pyts.image import MarkovTransitionField

import tsia.plot
import tsia.markov
import tsia.network_graph

import community
from community import community_louvain
import networkx as nx

from matplotlib.colors import to_hex

from sklearn.preprocessing import MinMaxScaler
from sklearn.mixture import GaussianMixture
import numpy as np
from scipy.stats import kurtosis, skew

import csv
from colormap import rgb2hex
from tomaster import tomato

from karateclub import ASNE
from karateclub import GraphWave
from karateclub import NEU
from karateclub import Diff2Vec

import seaborn as sns
from pickle import FALSE
import scipy
import scipy.sparse as sparse

from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler

from ts2vg import NaturalVG
from ts2vg import HorizontalVG

##funcs


In [None]:
# Useful constants definition
COLORMAP = 'jet'

def get_network_graph2(mtf):
    # Build the graph with networkx:
    graph = nx.from_numpy_matrix(mtf)
    
    # Loops through the edges to get associate each of them with the
    # corresponding Markov transition probability:
    weights = [mtf[u,v] for u,v in graph.edges()]
    for index, e in enumerate(graph.edges()):
        graph[e[0]][e[1]]['weight'] = weights[index]
        
    return graph
    
def compute_network_graph_statistics2(partitions, graph=None, mtf=None):    
    if (graph is None) and (mtf is not None):
        graph = get_network_graph(mtf)
        
    #partitions = community_louvain.best_partition(graph, random_state=1234)
    nb_partitions = len(set(partitions.values()))
    modularity = community_louvain.modularity(partitions, graph)

    
    diameter = nx.diameter(graph)
    node_size = list(nx.clustering(graph, weight='weight').values())
    avg_clustering_coeff = np.array(node_size).mean()
    density = nx.density(graph)
    avg_path_length = nx.average_shortest_path_length(graph, weight='weight', method='dijkstra')
    
    average_degree = nx.average_degree_connectivity(graph)
    average_degree = np.mean(list(average_degree.values()))
    avg_weighted_degree = nx.average_degree_connectivity(graph, weight='weight')
    avg_weighted_degree = np.mean(list(avg_weighted_degree.values()))
    
    statistics = {
        'Diameter': diameter,
        'Average degree': average_degree,
        'Average weighted degree': avg_weighted_degree,
        'Density': density,
        'Average path length': avg_path_length,
        'Average clustering coefficient': avg_clustering_coeff,
        'Modularity': modularity,
        'Partitions': nb_partitions
    }
    
    return statistics
    
def get_modularity_encoding2(graph, colormap=COLORMAP, reversed_cmap=False):
  
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)
    
    # Get the node partitions and number of partitions found with the Louvain
    # algorithm, as implemented in the `community` package:

    partitions = community_louvain.best_partition(graph, random_state=1234)
    #####################################
    
    nb_partitions = len(set(partitions.values()))
    #print("nb_partitions: ",nb_partitions)

    # Compute node colors and edges colors for the modularity encoding:
    edge_colors = [to_hex(colormap(partitions.get(v)/(nb_partitions - 1))) for u,v in graph.edges()]
    node_colors = [partitions.get(node) for node in graph.nodes()]
    node_size = list(nx.clustering(graph, weight='weight').values())
    node_size = list((node_size - np.min(node_size)) * 2000 + 10)
    
    # Store the encoding to return in a dictionnary:
    #print("node_colors: ",len(set(node_colors)))

    encoding = {
        'node_size': node_size,
        'edge_color': edge_colors,
        'node_color': node_colors
    }
    return encoding, partitions

def foo(w,m,v):
  x2=[]
  x3=[]
  x4=[]
  n=len(w)
  for j in range(n):
    x2.append(v[j]+m[j]**2)
    x3.append(pow(m[j],3)+3*m[j]*v[j])
    x4.append(pow(m[j],4)+6*m[j]**2*v[j]+3*v[j]**2)
  X1=np.dot(w,m)
  X2=np.dot(w,x2)
  X3=np.dot(w,x3)
  X4=np.dot(w,x4)
  mu=X1
  sig=np.sqrt(np.subtract(X2, mu**2))
  sk=(X3-3*X2*X1+2*pow(X1,3))/pow(sig,3)
  kur=(X4-4*X3*X1+6*X2*X1**2-3*pow(X1,4))/pow(sig,4)
  return [mu, sig, sk, kur]

def get_network_graph_map2(timeseries, encoding, colormap=COLORMAP, reversed_cmap=False):
   
    # Get encoding definitions:
    node_colors = encoding['node_color']

    #print(node_colors)

    image_size = len(node_colors)
    #print("node_colors",node_colors)
    #print("np.max(node_colors)",np.max(node_colors))
    partition_color = node_colors / np.max(node_colors)

    # Define the color map:
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)

    # Plot each subset of the signal with the color associated to the network
    # graph partition it belongs to:
    network_graph_map = []
    sequences_width = timeseries.shape[0] / image_size

    #df=pd.DataFrame([{"color": p ,"value": k}])

    for i in range(image_size):
        c = colormap(partition_color[i])

        start = int(i * sequences_width)
        end = int((i+1) * sequences_width)#-1
        data = timeseries.iloc[start:end, :]

        current_map = dict()

        current_map.update({
            'color': c,
            'slice': data
        })

        #print(len(current_map["slice"]))

        network_graph_map.append(current_map)
        
    return network_graph_map, node_colors


def inversemapAna(ng_map2,colors2):

  df=pd.DataFrame(columns=["color","value"])
  dout=pd.DataFrame(columns=["color","value"])

  #if (len(ng_map2)!=len(colors2)):
  #    print("ERROR")

  for i in range(len(ng_map2)):
      d=ng_map2[i]
      p=colors2[i]
      slic=d["slice"].values.reshape(-1)

      for k in slic:
        df=df.append([{"color": p ,"value": k}], ignore_index=True)
  
  df["diff"]=df["value"]-df["value"].shift(1)
  df.drop(df.index[[0]], inplace=True)
  df.drop(['value'], axis = 1, inplace=True)
  df.rename(columns = {'diff':'value'}, inplace = True)
  #print(df)
  return df

In [None]:
import numpy as np
import networkx as nx
from sklearn.preprocessing import normalize
from karateclub.estimator import Estimator

class NEU2(Estimator):
    
    def __init__(self, L1: float = 0.5, L2: float = 0.25, T: int = 1, seed: int = 42):
        self.iterations = T
        self.L1 = L1
        self.L2 = L2
        self.seed = seed

    def _normalize_embedding(self, original_embedding):       
        norms = np.linalg.norm(original_embedding, axis=1)
        normalized_embedding = (original_embedding.T / norms).T
        return normalized_embedding

    def _update_embedding(self, graph, original_embedding):       
        embedding = self._normalize_embedding(original_embedding)
        adjacency = nx.adjacency_matrix(graph, nodelist=range(graph.number_of_nodes()))
        normalized_adjacency = normalize(adjacency, norm="l1", axis=1)
        for _ in range(self.iterations):
            embedding = (
                embedding
                + self.L1 * (normalized_adjacency @ embedding)
                + self.L2 * (normalized_adjacency @ (normalized_adjacency @ embedding))
            )
        return embedding

    def fit(self, graph: nx.classes.graph.Graph, features):       
        self._set_seed()
        graph = self._check_graph(graph)

        #print(features)

        self.model = ASNE()
        self.model.fit(graph, features)
        original_embedding = self.model.get_embedding()
        self._embedding = self._update_embedding(graph, original_embedding)


    def get_embedding(self) -> np.array:        
        return self._embedding

#Embedding senza attributi


##Diff2Vec

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="Diff2Vec"
model=Diff2Vec()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
        for strategy in strategies:
          if (strategy=="natural"):
            g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
          elif (strategy=="horizontal"):
            g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

          graph= g.as_networkx()
          model.fit(graph)
          embedding=model.get_embedding()
          flag=True

          for k in klist:            
              scaler = StandardScaler()
              embedding=scaler.fit_transform(embedding)

              clusters = tomato(points=embedding, k=k)
              num_clusters=len(set(clusters))
              clus=pd.DataFrame(clusters,columns=["cluster"])
              clus.drop(clus.index[0], inplace=True)
              colors=list(range(0,num_clusters))
              returns=r[hub]
              returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")

              if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
                tag_df=returns#tag_data[hub]
                tag_df=pd.DataFrame(tag_df)

                means=[]
                precisions=[]
                nk=[]
                
                for x in colors:        
                  a=returnCluster[hub][returnCluster.cluster==x]
                  means.append(np.mean(a))
                  precisions.append(1/pow(np.std(a),2))
                  nk.append(len(a)/(len(returnCluster)))##controllare     
                  
                precisions=np.array(precisions).reshape(-1,1,1)
                means=np.array(means).reshape(-1,1)

                grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
                grid.set_index("idxs")     
                
                itemorig={"1":tag_df[hub].mean(),
                      "2":tag_df[hub].std(),
                      "3":skew(tag_df[hub]),
                      "4":kurtosis(tag_df[hub])+3
                }

                XY = tag_df[hub].values.reshape(-1, 1)#tag_data
                
                gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
          
                nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

                grid.at[0,'comp']=num_clusters
                grid.at[0,'hub']=hub
                grid.at[0,'comp']=gmm.n_components

                grid.at[0,'bins']=""
                grid.at[0,'k']=k
                #grid.at[0,'netstat']=str("")

                grid.at[0,'bic']=gmm.bic(XY) 
                grid.at[0,'aic']=gmm.aic(XY)
                grid.at[0,'weights']=gmm.weights_.reshape(-1)
                grid.at[0,'means']=gmm.means_.reshape(-1)
                grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

                grid.at[0,'orig_M1']=itemorig["1"]
                
                grid.at[0,'orig_M2']=itemorig["2"]
                grid.at[0,'orig_M3']=itemorig["3"]
                grid.at[0,'orig_M4']=itemorig["4"]

                grid.at[0,'GMM_M1']=nosim[0]
                
                grid.at[0,'GMM_M2']=nosim[1]
                grid.at[0,'GMM_M3']=nosim[2]
                grid.at[0,'GMM_M4']=nosim[3]

                grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
                
                grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
                grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
                grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

                grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
                
                grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
                grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
                grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
                
                if (flag):
                  #grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS2.csv", header=True, mode='a')
                  flag=False
                else:      
                  #grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS2.csv", header=False, mode='a')

                print("#",k)
                print("####\n\n")


##Diff2Vec Rendimenti


In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="Diff2Vec"
model=Diff2Vec()
clus=pd.DataFrame()
returns=pd.DataFrame()
col=[]


for hub in hubnames:
        for strategy in strategies:
          if (strategy=="natural"):
            g = NaturalVG(directed=None,weighted=None).build(r[hub])
          elif (strategy=="horizontal"):
            g=HorizontalVG(directed=None,weighted=None).build(r[hub])


          graph= g.as_networkx()
          model.fit(graph)
          embedding=model.get_embedding()
          flag=True

          for k in klist:            
              scaler = StandardScaler()
              embedding=scaler.fit_transform(embedding)

              clusters = tomato(points=embedding, k=k)
              num_clusters=len(set(clusters))
              clus=pd.DataFrame(clusters,columns=["cluster"])
              #clus.drop(clus.index[0], inplace=True)
              colors=list(range(0,num_clusters))
              returns=r[hub]

              col2=[]
              for index, row in clus.iterrows():
                col2.append(index+1)
              clus["idxs"]=col2
             
              clus.reset_index(drop=True)
              clus.set_index("idxs", inplace=True)

              returnCluster=pd.merge(clus, r, on=clus.index, how="inner")#returns

              if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
                tag_df=r[hub]#returns #tag_data[hub]
                tag_df=pd.DataFrame(tag_df)

                means=[]
                precisions=[]
                nk=[]
                
                for x in colors:        
                  a=returnCluster[hub][returnCluster.cluster==x]
                  means.append(np.mean(a))
                  precisions.append(1/pow(np.std(a),2))
                  nk.append(len(a)/(len(returnCluster)))##controllare     
                  
                precisions=np.array(precisions).reshape(-1,1,1)
                means=np.array(means).reshape(-1,1)

                grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
                grid.set_index("idxs")     
                
                itemorig={"1":tag_df[hub].mean(),
                      "2":tag_df[hub].std(),
                      "3":skew(tag_df[hub]),
                      "4":kurtosis(tag_df[hub])+3
                }

                XY = tag_df[hub].values.reshape(-1, 1)#tag_data
                
                gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
          
                nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

                grid.at[0,'comp']=num_clusters
                grid.at[0,'hub']=hub
                grid.at[0,'comp']=gmm.n_components

                grid.at[0,'bins']=""
                grid.at[0,'k']=k
                #grid.at[0,'netstat']=str("")

                grid.at[0,'bic']=gmm.bic(XY) 
                grid.at[0,'aic']=gmm.aic(XY)
                grid.at[0,'weights']=gmm.weights_.reshape(-1)
                grid.at[0,'means']=gmm.means_.reshape(-1)
                grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

                grid.at[0,'orig_M1']=itemorig["1"]
                
                grid.at[0,'orig_M2']=itemorig["2"]
                grid.at[0,'orig_M3']=itemorig["3"]
                grid.at[0,'orig_M4']=itemorig["4"]

                grid.at[0,'GMM_M1']=nosim[0]
                
                grid.at[0,'GMM_M2']=nosim[1]
                grid.at[0,'GMM_M3']=nosim[2]
                grid.at[0,'GMM_M4']=nosim[3]

                grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
                
                grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
                grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
                grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

                grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
                
                grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
                grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
                grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
                
                if (flag):
                  #grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS2.csv", header=True, mode='a')
                  flag=False
                else:      
                  #grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS2.csv", header=False, mode='a')

                print("#",k)
                print("####\n\n")


# 3
####


# 4
####


# 5
####


# 6
####


# 7
####


# 9
####


# 10
####


# 11
####


# 12
####


# 13
####


# 14
####


# 15
####


# 16
####


# 17
####


# 18
####


# 19
####


# 20
####


# 21
####


# 22
####


# 23
####


# 24
####


# 25
####


# 26
####


# 27
####


# 28
####


# 29
####


# 30
####


# 31
####


# 32
####


# 33
####


# 34
####


# 35
####


# 36
####


# 37
####


# 38
####


# 39
####


# 40
####


# 41
####


# 42
####


# 43
####


# 44
####


# 45
####


# 46
####


# 47
####


# 48
####


# 49
####


# 50
####


# 51
####


# 52
####


# 53
####


# 54
####


# 55
####


# 56
####


# 57
####


# 58
####


# 59
####


# 60
####


# 61
####


# 62
####


# 63
####


# 64
####


# 65
####


# 66
####


# 67
####


# 68
####


# 69
####


# 70
####


# 71
####


# 72
####


# 73
####


# 74
####


# 75
####


# 76
####


# 77
####


# 78
####


# 79
####


# 80
####


# 81
####


# 82
####


# 83
####


# 84
####


# 85
####


# 86
####


# 87
####


##GraphWave

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="GraphWave"
model=GraphWave()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
        for strategy in strategies:
          if (strategy=="natural"):
            g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
          elif (strategy=="horizontal"):
            g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

          graph= g.as_networkx()
          model.fit(graph)
          embedding=model.get_embedding()
          flag=True

          for k in klist:            
              scaler = StandardScaler()
              embedding=scaler.fit_transform(embedding)

              clusters = tomato(points=embedding, k=k)
              num_clusters=len(set(clusters))
              clus=pd.DataFrame(clusters,columns=["cluster"])
              clus.drop(clus.index[0], inplace=True)
              colors=list(range(0,num_clusters))
              returns=r[hub]
              returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")

              if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
                tag_df=returns#tag_data[hub]
                tag_df=pd.DataFrame(tag_df)

                means=[]
                precisions=[]
                nk=[]
                
                for x in colors:        
                  a=returnCluster[hub][returnCluster.cluster==x]
                  means.append(np.mean(a))
                  precisions.append(1/pow(np.std(a),2))
                  nk.append(len(a)/(len(returnCluster)))##controllare     
                  
                precisions=np.array(precisions).reshape(-1,1,1)
                means=np.array(means).reshape(-1,1)

                grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
                grid.set_index("idxs")     
                
                itemorig={"1":tag_df[hub].mean(),
                      "2":tag_df[hub].std(),
                      "3":skew(tag_df[hub]),
                      "4":kurtosis(tag_df[hub])+3
                }

                XY = tag_df[hub].values.reshape(-1, 1)#tag_data
                
                gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
          
                nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

                grid.at[0,'comp']=num_clusters
                grid.at[0,'hub']=hub
                grid.at[0,'comp']=gmm.n_components

                grid.at[0,'bins']=""
                grid.at[0,'k']=k
                #grid.at[0,'netstat']=str("")

                grid.at[0,'bic']=gmm.bic(XY) 
                grid.at[0,'aic']=gmm.aic(XY)
                grid.at[0,'weights']=gmm.weights_.reshape(-1)
                grid.at[0,'means']=gmm.means_.reshape(-1)
                grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

                grid.at[0,'orig_M1']=itemorig["1"]
                
                grid.at[0,'orig_M2']=itemorig["2"]
                grid.at[0,'orig_M3']=itemorig["3"]
                grid.at[0,'orig_M4']=itemorig["4"]

                grid.at[0,'GMM_M1']=nosim[0]
                
                grid.at[0,'GMM_M2']=nosim[1]
                grid.at[0,'GMM_M3']=nosim[2]
                grid.at[0,'GMM_M4']=nosim[3]

                grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
                
                grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
                grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
                grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

                grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
                
                grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
                grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
                grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
                
                if (flag):
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
                  flag=False
                else:      
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')

                print("#",k)
                print("####\n\n")


##NEU_Diff2vec

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="NEU_Diff2Vec"
model=NEU()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
        for strategy in strategies:
          if (strategy=="natural"):
            g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
          elif (strategy=="horizontal"):
            g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

          graph= g.as_networkx()
          model.fit(graph, Diff2Vec())
          embedding=model.get_embedding()
          flag=True

          for k in klist:            
              scaler = StandardScaler()
              embedding=scaler.fit_transform(embedding)

              clusters = tomato(points=embedding, k=k)
              num_clusters=len(set(clusters))
              clus=pd.DataFrame(clusters,columns=["cluster"])
              clus.drop(clus.index[0], inplace=True)
              colors=list(range(0,num_clusters))
              returns=r[hub]
              returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")

              if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
                tag_df=returns#tag_data[hub]
                tag_df=pd.DataFrame(tag_df)

                means=[]
                precisions=[]
                nk=[]
                
                for x in colors:        
                  a=returnCluster[hub][returnCluster.cluster==x]
                  means.append(np.mean(a))
                  precisions.append(1/pow(np.std(a),2))
                  nk.append(len(a)/(len(returnCluster)))##controllare     
                  
                precisions=np.array(precisions).reshape(-1,1,1)
                means=np.array(means).reshape(-1,1)

                grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
                grid.set_index("idxs")     
                
                itemorig={"1":tag_df[hub].mean(),
                      "2":tag_df[hub].std(),
                      "3":skew(tag_df[hub]),
                      "4":kurtosis(tag_df[hub])+3
                }

                XY = tag_df[hub].values.reshape(-1, 1)#tag_data
                
                gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
          
                nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

                grid.at[0,'comp']=num_clusters
                grid.at[0,'hub']=hub
                grid.at[0,'comp']=gmm.n_components

                grid.at[0,'bins']=""
                grid.at[0,'k']=k
                #grid.at[0,'netstat']=str("")

                grid.at[0,'bic']=gmm.bic(XY) 
                grid.at[0,'aic']=gmm.aic(XY)
                grid.at[0,'weights']=gmm.weights_.reshape(-1)
                grid.at[0,'means']=gmm.means_.reshape(-1)
                grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

                grid.at[0,'orig_M1']=itemorig["1"]
                
                grid.at[0,'orig_M2']=itemorig["2"]
                grid.at[0,'orig_M3']=itemorig["3"]
                grid.at[0,'orig_M4']=itemorig["4"]

                grid.at[0,'GMM_M1']=nosim[0]
                
                grid.at[0,'GMM_M2']=nosim[1]
                grid.at[0,'GMM_M3']=nosim[2]
                grid.at[0,'GMM_M4']=nosim[3]

                grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
                
                grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
                grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
                grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

                grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
                
                grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
                grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
                grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
                
                if (flag):
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
                  flag=False
                else:      
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')

                print("#",k)
                print("####\n\n")


##NEU_GraphWave

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="NEU_GraphWave"
model=NEU()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
        for strategy in strategies:
          if (strategy=="natural"):
            g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
          elif (strategy=="horizontal"):
            g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

          graph= g.as_networkx()
          model.fit(graph, GraphWave())
          embedding=model.get_embedding()
          flag=True

          for k in klist:            
              scaler = StandardScaler()
              embedding=scaler.fit_transform(embedding)

              clusters = tomato(points=embedding, k=k)
              num_clusters=len(set(clusters))
              clus=pd.DataFrame(clusters,columns=["cluster"])
              clus.drop(clus.index[0], inplace=True)
              colors=list(range(0,num_clusters))
              returns=r[hub]
              returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")

              if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
                tag_df=returns#tag_data[hub]
                tag_df=pd.DataFrame(tag_df)

                means=[]
                precisions=[]
                nk=[]
                
                for x in colors:        
                  a=returnCluster[hub][returnCluster.cluster==x]
                  means.append(np.mean(a))
                  precisions.append(1/pow(np.std(a),2))
                  nk.append(len(a)/(len(returnCluster)))##controllare     
                  
                precisions=np.array(precisions).reshape(-1,1,1)
                means=np.array(means).reshape(-1,1)

                grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
                grid.set_index("idxs")     
                
                itemorig={"1":tag_df[hub].mean(),
                      "2":tag_df[hub].std(),
                      "3":skew(tag_df[hub]),
                      "4":kurtosis(tag_df[hub])+3
                }

                XY = tag_df[hub].values.reshape(-1, 1)#tag_data
                
                gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
          
                nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

                grid.at[0,'comp']=num_clusters
                grid.at[0,'hub']=hub
                grid.at[0,'comp']=gmm.n_components

                grid.at[0,'bins']=""
                grid.at[0,'k']=k
                #grid.at[0,'netstat']=str("")

                grid.at[0,'bic']=gmm.bic(XY) 
                grid.at[0,'aic']=gmm.aic(XY)
                grid.at[0,'weights']=gmm.weights_.reshape(-1)
                grid.at[0,'means']=gmm.means_.reshape(-1)
                grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

                grid.at[0,'orig_M1']=itemorig["1"]
                
                grid.at[0,'orig_M2']=itemorig["2"]
                grid.at[0,'orig_M3']=itemorig["3"]
                grid.at[0,'orig_M4']=itemorig["4"]

                grid.at[0,'GMM_M1']=nosim[0]
                
                grid.at[0,'GMM_M2']=nosim[1]
                grid.at[0,'GMM_M3']=nosim[2]
                grid.at[0,'GMM_M4']=nosim[3]

                grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
                
                grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
                grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
                grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

                grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
                
                grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
                grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
                grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
                
                if (flag):
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
                  flag=False
                else:      
                  grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')

                print("#",k)
                print("####\n\n")


#Embedding con attributi

##ASNE

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="ASNE"
model=ASNE()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
  for strategy in strategies:
    
    tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
    tag_data=tag_data.iloc[1:]
    tag_df=tag_data[hub]
    features=sparse.coo_matrix(np.array(r[hub].values))

    if (strategy=="natural"):
      g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
    elif (strategy=="horizontal"):
      g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

    graph= g.as_networkx()
    model.fit(graph, features)
    embedding=model.get_embedding()
    flag=True

    for k in klist:            
        scaler = StandardScaler()
        embedding=scaler.fit_transform(embedding)

        clusters = tomato(points=embedding, k=k)
        num_clusters=len(set(clusters))

        clus=pd.DataFrame(clusters,columns=["cluster"])
        #clus.drop(clus.index[0], inplace=True)#PER ATTRIBUTE EMBEDDING è COMMENTATO
        colors=list(range(0,num_clusters))
        returns=pd.DataFrame(r[hub])
        returns.index -= 1 #aggiunto
        #returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")
        returnCluster=clus.join(returns)

        if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
          tag_df=returns#tag_data[hub]
          tag_df=pd.DataFrame(tag_df)

          means=[]
          precisions=[]
          nk=[]
          
          for x in colors:        
            a=returnCluster[hub][returnCluster.cluster==x]
            means.append(np.mean(a))
            precisions.append(1/pow(np.std(a),2))
            nk.append(len(a)/(len(returnCluster)))##controllare     
            
          precisions=np.array(precisions).reshape(-1,1,1)
          means=np.array(means).reshape(-1,1)

          grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
          grid.set_index("idxs")     
          
          itemorig={"1":tag_df[hub].mean(),
                "2":tag_df[hub].std(),
                "3":skew(tag_df[hub]),
                "4":kurtosis(tag_df[hub])+3
          }

          XY = tag_df[hub].values.reshape(-1, 1)#tag_data
          
          gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
    
          nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

          grid.at[0,'comp']=num_clusters
          grid.at[0,'hub']=hub
          grid.at[0,'comp']=gmm.n_components

          grid.at[0,'bins']=""
          grid.at[0,'k']=k
          #grid.at[0,'netstat']=str("")

          grid.at[0,'bic']=gmm.bic(XY) 
          grid.at[0,'aic']=gmm.aic(XY)
          grid.at[0,'weights']=gmm.weights_.reshape(-1)
          grid.at[0,'means']=gmm.means_.reshape(-1)
          grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

          grid.at[0,'orig_M1']=itemorig["1"]
          
          grid.at[0,'orig_M2']=itemorig["2"]
          grid.at[0,'orig_M3']=itemorig["3"]
          grid.at[0,'orig_M4']=itemorig["4"]

          grid.at[0,'GMM_M1']=nosim[0]
          
          grid.at[0,'GMM_M2']=nosim[1]
          grid.at[0,'GMM_M3']=nosim[2]
          grid.at[0,'GMM_M4']=nosim[3]

          grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
          
          grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
          grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
          grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

          grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
          
          grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
          grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
          grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
          
          if (flag):
            grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
            flag=False
          else:      
            grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')

          print("#",k)
          print("####\n\n")


##NEUASNE

In [None]:
hubnames=["pjm","sp15","paloverde","nepool"]
strategies=["natural","horizontal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")
klist=range(2,101,1)
modelName="NEU_ASNE"
model=NEU2()
clus=pd.DataFrame()
returns=pd.DataFrame()

for hub in hubnames:
  for strategy in strategies:
    
    tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
    tag_data=tag_data.iloc[1:]
    tag_df=tag_data[hub]
    features=sparse.coo_matrix(np.array(r[hub].values))

    if (strategy=="natural"):
      g = NaturalVG(directed=None,weighted=None).build(tag_data[hub])
    elif (strategy=="horizontal"):
      g=HorizontalVG(directed=None,weighted=None).build(tag_data[hub])

    graph= g.as_networkx()
    model.fit(graph, features)
    embedding=model.get_embedding()
    flag=True

    for k in klist:            
        scaler = StandardScaler()
        embedding=scaler.fit_transform(embedding)

        clusters = tomato(points=embedding, k=k)
        num_clusters=len(set(clusters))

        clus=pd.DataFrame(clusters,columns=["cluster"])
        #clus.drop(clus.index[0], inplace=True)#PER ATTRIBUTE EMBEDDING è COMMENTATO
        colors=list(range(0,num_clusters))
        returns=pd.DataFrame(r[hub])
        returns.index -= 1 #aggiunto
        #returnCluster=pd.merge(clus, returns, on=clus.index, how="inner")
        returnCluster=clus.join(returns)

        if (num_clusters<=10 and (1 not in returnCluster.cluster.value_counts().values) ):
          tag_df=returns#tag_data[hub]
          tag_df=pd.DataFrame(tag_df)

          means=[]
          precisions=[]
          nk=[]
          
          for x in colors:        
            a=returnCluster[hub][returnCluster.cluster==x]
            means.append(np.mean(a))
            precisions.append(1/pow(np.std(a),2))
            nk.append(len(a)/(len(returnCluster)))##controllare     
            
          precisions=np.array(precisions).reshape(-1,1,1)
          means=np.array(means).reshape(-1,1)

          grid=pd.DataFrame(columns=["idxs","hub","aic","bic","comp","weights","means","covariances"])
          grid.set_index("idxs")     
          
          itemorig={"1":tag_df[hub].mean(),
                "2":tag_df[hub].std(),
                "3":skew(tag_df[hub]),
                "4":kurtosis(tag_df[hub])+3
          }

          XY = tag_df[hub].values.reshape(-1, 1)#tag_data
          
          gmm = GaussianMixture(n_components=len(nk), weights_init=nk, means_init=means, precisions_init=precisions, covariance_type='full').fit(XY)
    
          nosim=foo(gmm.weights_.reshape(-1),gmm.means_.reshape(-1),gmm.covariances_.reshape(-1))

          grid.at[0,'comp']=num_clusters
          grid.at[0,'hub']=hub
          grid.at[0,'comp']=gmm.n_components

          grid.at[0,'bins']=""
          grid.at[0,'k']=k
          #grid.at[0,'netstat']=str("")

          grid.at[0,'bic']=gmm.bic(XY) 
          grid.at[0,'aic']=gmm.aic(XY)
          grid.at[0,'weights']=gmm.weights_.reshape(-1)
          grid.at[0,'means']=gmm.means_.reshape(-1)
          grid.at[0,'covariances']=gmm.covariances_.reshape(-1)

          grid.at[0,'orig_M1']=itemorig["1"]
          
          grid.at[0,'orig_M2']=itemorig["2"]
          grid.at[0,'orig_M3']=itemorig["3"]
          grid.at[0,'orig_M4']=itemorig["4"]

          grid.at[0,'GMM_M1']=nosim[0]
          
          grid.at[0,'GMM_M2']=nosim[1]
          grid.at[0,'GMM_M3']=nosim[2]
          grid.at[0,'GMM_M4']=nosim[3]

          grid.at[0,'absdiff_M1']=abs(itemorig["1"]-nosim[0])
          
          grid.at[0,'absdiff_M2']=abs(itemorig["2"]-nosim[1])
          grid.at[0,'absdiff_M3']=abs(itemorig["3"]-nosim[2])
          grid.at[0,'absdiff_M4']=abs(itemorig["4"]-nosim[3])

          grid.at[0,'rel%diff_M1']=100*abs((itemorig["1"]-nosim[0])/(itemorig["1"]))
          
          grid.at[0,'rel%diff_M2']=100*abs((itemorig["2"]-nosim[1])/(itemorig["2"]))
          grid.at[0,'rel%diff_M3']=100*abs((itemorig["3"]-nosim[2])/(itemorig["3"]))
          grid.at[0,'rel%diff_M4']=100*abs((itemorig["4"]-nosim[3])/(itemorig["4"]))
          
          if (flag):
            grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=True, mode='a')
            flag=False
          else:      
            grid.to_csv("/content/drive/MyDrive/Mari/Paper3/Results/"+modelName+"/"+strategy+"_"+hub+"_VIS.csv", header=False, mode='a')

          print("#",k)
          print("####\n\n")
