In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import networkx as nx

In [None]:
df = pd.read_csv('exported_data/normalized_3.df')

In [None]:
df.head()

In [None]:
df['I(X_1->X_2)/H(X_2)'] = df['I(X_1->X_2)'] / df['H(X_2)']
df['I(X_1->X_2)/d_1'] = df['I(X_1->X_2)'] / df['d_1']
df['I(X_1->X_2)/d_2'] = df['I(X_1->X_2)'] / df['d_2']

df['model_1'] = df['model_1'].apply(lambda x: "/".join(x.split('/')[-2:]))
df['model_2'] = df['model_2'].apply(lambda x: "/".join(x.split('/')[-2:]))




In [None]:
df = df.groupby(['model_1', 'model_2']).first().reset_index()

In [None]:

df = df[~(df['model_1'] == "jinaai/jina-embedding-s-en-v1")]
df = df[~(df['model_2'] == "jinaai/jina-embedding-s-en-v1")]

In [None]:
sns.set_style("whitegrid")
metric = 'I(X_1->X_2)/d_2'
from matplotlib import patheffects

cmap =sns.color_palette("coolwarm", as_cmap=True)


table = df
table = table[["model_1", "model_2", metric]].pivot("model_1", "model_2",metric).fillna(-0.0)

display(table)
# remove lines and columns containing xsum in index and columns

# compute 1/x for each value

G= nx.from_pandas_adjacency(table, create_using=nx.DiGraph)
G.remove_edges_from(nx.selfloop_edges(G))




avg_weight = {n : np.mean([d[2]['weight'] for d in G.out_edges(n, data=True)]) for n in G.nodes()}
avg_income = {n : np.mean([d[2]['weight'] for d in G.in_edges(n, data=True)]) for n in G.nodes()}
# remove edge with weighht <= 50
# for edge in list(G.edges(data=True)):
#     if edge[2]['weight'] >= 1/55:
#         G.remove_edge(edge[0], edge[1])


layout = nx.spring_layout(G, k=0.1, iterations=100)
fig, ax = plt.subplots(figsize=(30, 30))

def make_border_color(strength, cmap, vmin, vmax):
    return cmap((strength - vmin) / (vmax - vmin))
    

# draw nodes with border color based on avg_income
nx.draw_networkx_nodes(G, layout, node_size=1000, node_color=list(avg_weight.values()), cmap=cmap, vmin=min(avg_weight.values()), vmax=max(avg_weight.values()), ax=ax, linewidths=4, edgecolors=[make_border_color(avg_income[n], cmap, min(avg_income.values()), max(avg_income.values())) for n in G.nodes()])


nx.draw_networkx_edges(G, layout, width=2, alpha=0.6, edge_color=[d[2]['weight'] for d in G.edges(data=True)], edge_cmap=cmap, edge_vmin=min(avg_weight.values()), edge_vmax=max(avg_weight.values()), ax=ax, arrowsize=30, arrowstyle='-|>', connectionstyle='arc3, rad = 0.4', min_source_margin=20, min_target_margin=20)

nx.draw_networkx_labels(G, layout, font_size=14, font_color='black', font_weight='bold', ax=ax)




# add cmap legend
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min(avg_weight.values()), vmax=max(avg_weight.values())))
sm._A = []
plt.colorbar(sm, ax=plt.gca())


# add white contour to all texts in the figure
for text in plt.gca().texts:
    text.set_path_effects([patheffects.Stroke(linewidth=4, foreground='white'), patheffects.Normal()])
    
plt.tight_layout()
axis = plt.gca()
axis.set_xlim([1.2*x for x in axis.get_xlim()])
axis.set_ylim([1.2*y for y in axis.get_ylim()])
plt.tight_layout()

In [None]:

sns.clustermap(table, cmap='viridis', annot=True, fmt=".2f", figsize=(20, 20), vmax=1)


In [None]:
from scipy.cluster.hierarchy import linkage
link = linkage(table, method="ward")
cluster = sns.clustermap(table, row_linkage=link, col_linkage=link, figsize=(20, 20), cmap='viridis', annot=True, fmt=".2f", vmax=1)

In [None]:
,# Community detection

from networkx.algorithms.community import greedy_modularity_communities

communities = list(greedy_modularity_communities(G))


    
    


In [None]:
# find communities






In [None]:
from netgraph import Graph, InteractiveGraph

from networkx.algorithms.community import girvan_newman, modularity_max, louvain_communities

communities = louvain_communities(G, resolution=1.1)
communities = list(communities)
# get a discrete color map

cmap = sns.color_palette("coolwarm", as_cmap=True)


G= nx.from_pandas_adjacency(table, create_using=nx.DiGraph)
G.remove_edges_from(nx.selfloop_edges(G))

avg_weight = {n : np.median([d[2]['weight'] for d in G.out_edges(n, data=True)]) for n in G.nodes()}
avg_income = {n : np.median([d[2]['weight'] for d in G.in_edges(n, data=True)]) for n in G.nodes()}


node_to_community = {node: i for i, community in enumerate(communities) for node in community}

# node color using a color map
# node_color = {node: cmap(i) for i, community in enumerate(communities) for node in community}

# make average out going weight the node color
node_color = {node: cmap(avg_weight[node]) for node in G.nodes()}
node_edge_color = {node: cmap(avg_income[node]) for node in G.nodes()}


node_labels = {node: node for node in G.nodes()}

edge_color = {edge: cmap(G.edges[edge]['weight']) for edge in G.edges()}

# normalize edge alpha
min_alpha = 0.1
max_alpha = 0.9
edge_alpha = {edge: G.edges[edge]['weight'] for edge in G.edges()}
edge_alpha = {edge: (edge_alpha[edge] - min(edge_alpha.values())) / (max(edge_alpha.values()) - min(edge_alpha.values())) * (max_alpha - min_alpha) + min_alpha for edge in edge_alpha}

# edge width
min_edge_width = 0.3
max_edge_width = 1
edge_width = {edge: G.edges[edge]['weight'] for edge in G.edges()}
edge_width = {edge: (edge_width[edge] - min(edge_width.values())) / (max(edge_width.values()) - min(edge_width.values())) * (max_edge_width - min_edge_width) + min_edge_width for edge in edge_width}



fig, ax = plt.subplots(figsize=(20, 20))

graph = Graph(G, node_layout_kwargs=dict(node_to_community=node_to_community), node_layout="community", node_color=node_color, node_labels=node_labels, edge_color=edge_color, ax=ax, node_label_fontdict={'fontsize': 15, 'fontweight': 'bold'}, node_edge_color=node_edge_color, edge_layout="curved", edge_alpha=edge_alpha, arrows=True)


# add white contour to all texts in the figure
for text in plt.gca().texts:
    text.set_path_effects([patheffects.Stroke(linewidth=4, foreground='white'), patheffects.Normal()])




