#importing

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%%capture
!pip install community
!pip install python-louvain
!pip install tsia
!pip install networkx
!pip install easydev
!pip install colormap
!pip install tomaster
!pip install karateclub

In [None]:
%%capture
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import sys
import seaborn as sns

from matplotlib import gridspec
from numba import njit, prange
from pyts.image import MarkovTransitionField

import tsia.plot
import tsia.markov
import tsia.network_graph

import community
from community import community_louvain
import networkx as nx

from matplotlib.colors import to_hex

from sklearn.preprocessing import MinMaxScaler
from sklearn.mixture import GaussianMixture
import numpy as np
from scipy.stats import kurtosis, skew

import csv
from colormap import rgb2hex
from tomaster import tomato

from karateclub import MUSAE
from karateclub import GraphWave
from karateclub import Role2Vec
from karateclub import Diff2Vec
from karateclub import FeatherNode
from karateclub import AE
from karateclub import NEU
from karateclub import GEMSEC
from karateclub import EdMot
from karateclub import SCD
from karateclub import NodeSketch
from karateclub import GeoScattering
from karateclub import FSCNMF
from karateclub import NetMF 
from karateclub import GLEE
from karateclub import DeepWalk

import seaborn as sns
from pickle import FALSE
import scipy
import scipy.sparse as sparse

#funcs


In [None]:
# Useful constants definition
COLORMAP = 'jet'

def get_network_graph2(mtf):
    # Build the graph with networkx:
    graph = nx.from_numpy_matrix(mtf)
    
    # Loops through the edges to get associate each of them with the
    # corresponding Markov transition probability:
    weights = [mtf[u,v] for u,v in graph.edges()]
    for index, e in enumerate(graph.edges()):
        graph[e[0]][e[1]]['weight'] = weights[index]
        
    return graph
    
def compute_network_graph_statistics2(partitions, graph=None, mtf=None):    
    if (graph is None) and (mtf is not None):
        graph = get_network_graph(mtf)
        
    #partitions = community_louvain.best_partition(graph, random_state=1234)
    nb_partitions = len(set(partitions.values()))
    modularity = community_louvain.modularity(partitions, graph)

    
    diameter = nx.diameter(graph)
    node_size = list(nx.clustering(graph, weight='weight').values())
    avg_clustering_coeff = np.array(node_size).mean()
    density = nx.density(graph)
    avg_path_length = nx.average_shortest_path_length(graph, weight='weight', method='dijkstra')
    
    average_degree = nx.average_degree_connectivity(graph)
    average_degree = np.mean(list(average_degree.values()))
    avg_weighted_degree = nx.average_degree_connectivity(graph, weight='weight')
    avg_weighted_degree = np.mean(list(avg_weighted_degree.values()))
    
    statistics = {
        'Diameter': diameter,
        'Average degree': average_degree,
        'Average weighted degree': avg_weighted_degree,
        'Density': density,
        'Average path length': avg_path_length,
        'Average clustering coefficient': avg_clustering_coeff,
        'Modularity': modularity,
        'Partitions': nb_partitions
    }
    
    return statistics
    
def get_modularity_encoding2(graph, colormap=COLORMAP, reversed_cmap=False):
  
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)
    
    # Get the node partitions and number of partitions found with the Louvain
    # algorithm, as implemented in the `community` package:

    partitions = community_louvain.best_partition(graph, random_state=1234)
    #####################################
    
    nb_partitions = len(set(partitions.values()))
    #print("nb_partitions: ",nb_partitions)

    # Compute node colors and edges colors for the modularity encoding:
    edge_colors = [to_hex(colormap(partitions.get(v)/(nb_partitions - 1))) for u,v in graph.edges()]
    node_colors = [partitions.get(node) for node in graph.nodes()]
    node_size = list(nx.clustering(graph, weight='weight').values())
    node_size = list((node_size - np.min(node_size)) * 2000 + 10)
    
    # Store the encoding to return in a dictionnary:
    #print("node_colors: ",len(set(node_colors)))

    encoding = {
        'node_size': node_size,
        'edge_color': edge_colors,
        'node_color': node_colors
    }
    return encoding, partitions

def foo(w,m,v):
  x2=[]
  x3=[]
  x4=[]
  n=len(w)
  for j in range(n):
    x2.append(v[j]+m[j]**2)
    x3.append(pow(m[j],3)+3*m[j]*v[j])
    x4.append(pow(m[j],4)+6*m[j]**2*v[j]+3*v[j]**2)
  X1=np.dot(w,m)
  X2=np.dot(w,x2)
  X3=np.dot(w,x3)
  X4=np.dot(w,x4)
  mu=X1
  sig=np.sqrt(np.subtract(X2, mu**2))
  sk=(X3-3*X2*X1+2*pow(X1,3))/pow(sig,3)
  kur=(X4-4*X3*X1+6*X2*X1**2-3*pow(X1,4))/pow(sig,4)
  return [mu, sig, sk, kur]

def get_network_graph_map2(timeseries, encoding, colormap=COLORMAP, reversed_cmap=False):
   
    # Get encoding definitions:
    node_colors = encoding['node_color']

    #print(node_colors)

    image_size = len(node_colors)
    #print("node_colors",node_colors)
    #print("np.max(node_colors)",np.max(node_colors))
    partition_color = node_colors / np.max(node_colors)

    # Define the color map:
    if reversed_cmap == True:
        colormap = plt.cm.get_cmap(colormap).reversed()
    else:
        colormap = plt.cm.get_cmap(colormap)

    # Plot each subset of the signal with the color associated to the network
    # graph partition it belongs to:
    network_graph_map = []
    sequences_width = timeseries.shape[0] / image_size

    #df=pd.DataFrame([{"color": p ,"value": k}])

    for i in range(image_size):
        c = colormap(partition_color[i])

        start = int(i * sequences_width)
        end = int((i+1) * sequences_width)#-1
        data = timeseries.iloc[start:end, :]

        current_map = dict()

        current_map.update({
            'color': c,
            'slice': data
        })

        #print(len(current_map["slice"]))

        network_graph_map.append(current_map)
        
    return network_graph_map, node_colors


def inversemapAna(ng_map2,colors2):

  df=pd.DataFrame(columns=["color","value"])
  dout=pd.DataFrame(columns=["color","value"])

  #if (len(ng_map2)!=len(colors2)):
  #    print("ERROR")

  for i in range(len(ng_map2)):
      d=ng_map2[i]
      p=colors2[i]
      slic=d["slice"].values.reshape(-1)

      for k in slic:
        df=df.append([{"color": p ,"value": k}], ignore_index=True)
  
  df["diff"]=df["value"]-df["value"].shift(1)
  df.drop(df.index[[0]], inplace=True)
  df.drop(['value'], axis = 1, inplace=True)
  df.rename(columns = {'diff':'value'}, inplace = True)
  #print(df)
  return df

#Generate embeddings

In [None]:
#features=sparse.coo_matrix(np.array(returns[hub].values))
#r = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")

In [None]:
hubnames=["paloverde","nepool"]
strategies=["quantile","normal"]
tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
binslist=range(2,102,2)
models=[Diff2Vec()]
#returns = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/rendimenti.pk")

for hub in hubnames:
  for model in models:
    for strategy in strategies:
      for bins in binslist:
        print(bins)

        tag_data = pd.read_pickle("/content/drive/MyDrive/Mari/Paper3/Hubs_rawdata/logprezzidepurati.pk")
        #tag_data=tag_data.iloc[1:]
        tag_df=tag_data[hub]
                
        X = tag_df.values.reshape(1, -1)
        
        mtf = MarkovTransitionField(image_size=len(tag_df), n_bins=bins, strategy=strategy, overlapping=False)
        tag_mtf = mtf.fit_transform(X) 

        graph= get_network_graph2(tag_mtf[0])
        #features=sparse.coo_matrix(np.array(returns[hub].values))

        model.fit(graph)#, features)
        embedding=model.get_embedding()
        print(embedding.shape)
        
        np.savetxt("/content/drive/MyDrive/Mari/Paper3/Embed2/"+"Diff2Vec"+"/"+hub+"/"+strategy+"_bins_"+str(bins)+".txt", embedding)

2
(1826, 128)
4
(1826, 128)
6
(1826, 128)
8
(1826, 128)
10
(1826, 128)
12
(1826, 128)
14
(1826, 128)
16
(1826, 128)
18
(1826, 128)
20
(1826, 128)
22
(1826, 128)
24
(1826, 128)
26
(1826, 128)
28
(1826, 128)
30
(1826, 128)
32
(1826, 128)
34
(1826, 128)
36
(1826, 128)
38
(1826, 128)
40
(1826, 128)
42
(1826, 128)
44
(1826, 128)
46
(1826, 128)
48
(1826, 128)
50
(1826, 128)
52
(1826, 128)
54
(1826, 128)
56
(1826, 128)
58
(1826, 128)
60
(1826, 128)
62
(1826, 128)
64
(1826, 128)
66
(1826, 128)
68
(1826, 128)
70
(1826, 128)
72
(1826, 128)
74
(1826, 128)
76
(1826, 128)
78
(1826, 128)
80
(1826, 128)
82
(1826, 128)
84
(1826, 128)
86
(1826, 128)
88
(1826, 128)
90
(1826, 128)
92
(1826, 128)
94
(1826, 128)
96
(1826, 128)
98
(1826, 128)
100
(1826, 128)
2
(1826, 128)
4
(1826, 128)
6
(1826, 128)
8
(1826, 128)
10
(1826, 128)
12
(1826, 128)
14
(1826, 128)
16
(1826, 128)
18
(1826, 128)
20
(1826, 128)
22
(1826, 128)
24
(1826, 128)
26
(1826, 128)
28
(1826, 128)
30
(1826, 128)
32
(1826, 128)
34
(1826, 128)
36