In [None]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import matplotlib.patheffects as patheffects

In [None]:
df = pd.read_csv('../../output/cross/crossmis.csv')

# permute all xxx_1 and xxx_2 columns
df = df.rename(columns={col: col.replace(' 1', ' 2') for col in df.columns if '1' in col} | {col: col.replace(' 2', ' 1') for col in df.columns if '2' in col})

df = df.rename(columns={"summary_1": "summary_2", "summary_2": "summary_1"})


display(df)
df = df.groupby(['metadata/Model name 1', 'metadata/Model name 2']).nth(0).reset_index()



In [None]:
df
# for each group get only firt row


In [None]:

# make a custom palette where 0 is gray and everything else is a gradient following the YlGnBu palette
palette = sns.color_palette("YlGnBu", 100)
palette[-1] = (0.3, 0.3, 0.3, 1.0)


# create heat map table
table = df.dropna().pivot("metadata/Model name 1", "metadata/Model name 2", "I(summary_1 -> summary_2)").fillna(-1)

display(table)
# get min that is not 0
min_val = table[table > 0].min().min()

g = sns.clustermap(table, annot=True, fmt=".2f", linewidths=.5, cmap=palette, vmin=min_val)

# add title

# save figure
path = f"../../../papers/Mutual-information-for-summarization/img/crossmis.png"
plt.tight_layout()
plt.savefig(path, bbox_inches='tight', dpi=300)

# sns.heatmap(data=df, x="metadata/Model name 1", y="metadata/Model name 2", value, annot=True, fmt=".2f", linewidths=.5, cmap="YlGnBu")


In [None]:
# plot graph of models

# make grid dark background
sns.set_style("whitegrid")

cmap =sns.color_palette("coolwarm", as_cmap=True)


table = df.dropna().pivot("metadata/Model name 1", "metadata/Model name 2", "I(summary_1 -> summary_2)").fillna(-0.0)
# remove lines and columns containing xsum in index and columns

# compute 1/x for each value

G= nx.from_pandas_adjacency(table, create_using=nx.DiGraph)
G.remove_edges_from(nx.selfloop_edges(G))




avg_weight = {n : np.mean([d[2]['weight'] for d in G.out_edges(n, data=True)]) for n in G.nodes()}
avg_income = {n : np.mean([d[2]['weight'] for d in G.in_edges(n, data=True)]) for n in G.nodes()}
# remove edge with weighht <= 50
# for edge in list(G.edges(data=True)):
#     if edge[2]['weight'] >= 1/55:
#         G.remove_edge(edge[0], edge[1])


layout = nx.spring_layout(G, k=0.9, iterations=100)
fig, ax = plt.subplots(figsize=(20, 10))

def make_border_color(strength, cmap, vmin, vmax):
    return cmap((strength - vmin) / (vmax - vmin))
    

# draw nodes with border color based on avg_income
nx.draw_networkx_nodes(G, layout, node_size=1000, node_color=list(avg_weight.values()), cmap=cmap, vmin=min(avg_weight.values()), vmax=max(avg_weight.values()), ax=ax, linewidths=4, edgecolors=[make_border_color(avg_income[n], cmap, min(avg_income.values()), max(avg_income.values())) for n in G.nodes()])


nx.draw_networkx_edges(G, layout, width=[d[2]['weight']/15 for d in G.edges(data=True)], edge_color=[d[2]['weight'] for d in G.edges(data=True)], edge_cmap=cmap, edge_vmin=min(avg_weight.values()), edge_vmax=max(avg_weight.values()), ax=ax, arrowsize=30, arrowstyle='-|>', alpha=[d[2]['weight']/200 for d in G.edges(data=True)], connectionstyle='arc3, rad = 0.4', min_source_margin=20, min_target_margin=20)

nx.draw_networkx_labels(G, layout, font_size=14, font_color='black', font_weight='bold', ax=ax)




# add cmap legend
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min(avg_weight.values()), vmax=max(avg_weight.values())))
sm._A = []
plt.colorbar(sm, ax=plt.gca())

# add white contour to all texts in the figure
for text in plt.gca().texts:
    text.set_path_effects([patheffects.Stroke(linewidth=4, foreground='white'), patheffects.Normal()])
    
plt.tight_layout()
axis = plt.gca()
axis.set_xlim([1.2*x for x in axis.get_xlim()])
axis.set_ylim([1.2*y for y in axis.get_ylim()])
plt.tight_layout()
# save figure
path = f"../../../papers/Mutual-information-for-summarization/img/crossmisgraph.png"
plt.tight_layout()
plt.savefig(path, bbox_inches='tight', dpi=300)






In [None]:
def central_community_layout(G, threshold=58):

    avg_weight = {n : np.mean([d[2]['weight'] for d in G.out_edges(n, data=True)]) for n in G.nodes()}
    avg_income = {n : np.mean([d[2]['weight'] for d in G.in_edges(n, data=True)]) for n in G.nodes()}
    
    # group the nodes with avg_weight > threshold and the rest
    # get the subgraph of nodes with avg_weight > threshold
    subgraph = G.subgraph([n for n in G.nodes() if avg_weight[n] > threshold])
    # get the subgraph of nodes with avg_weight <= threshold
    subgraph2 = G.subgraph([n for n in G.nodes() if avg_weight[n] <= threshold])
    
    # get the position of the nodes in the subgraph
    pos = nx.circular_layout(subgraph, scale=0.5, center=(0, 0))
    
    # get the position of the nodes in the subgraph2
    pos2 = nx.circular_layout(subgraph2, scale=1.5, center=(0, 0))
    
    # get the position of the nodes in the original graph
    pos = {n : pos[n] if n in pos else pos2[n] for n in G.nodes()}
    
    return pos

In [None]:

# plot graph of models

# make grid dark background
sns.set_style("whitegrid")

cmap =sns.color_palette("coolwarm", as_cmap=True)


table = df.dropna().pivot("metadata/Model name 1", "metadata/Model name 2", "I(summary_1 -> summary_2)").fillna(-0.0)
# remove lines and columns containing xsum in index and columns

# compute 1/x for each value

G= nx.from_pandas_adjacency(table, create_using=nx.DiGraph)
G.remove_edges_from(nx.selfloop_edges(G))





avg_weight = {n : np.mean([d[2]['weight'] for d in G.out_edges(n, data=True)]) for n in G.nodes()}
avg_income = {n : np.mean([d[2]['weight'] for d in G.in_edges(n, data=True)]) for n in G.nodes()}
# remove edge with weighht <= 50
# for edge in list(G.edges(data=True)):
#     if edge[2]['weight'] >= 1/55:
#         G.remove_edge(edge[0], edge[1])


layout = central_community_layout(G, threshold=61)
fig, ax = plt.subplots(figsize=(20, 10))

def make_border_color(strength, cmap, vmin, vmax):
    return cmap((strength - vmin) / (vmax - vmin))


# draw nodes with border color based on avg_income
nx.draw_networkx_nodes(G, layout, node_size=5000, node_color=list(avg_weight.values()), cmap=cmap, vmin=min(avg_weight.values()), vmax=max(avg_weight.values()), ax=ax, linewidths=8, edgecolors=[make_border_color(avg_income[n], cmap, min(avg_income.values()), max(avg_income.values())) for n in G.nodes()])

edge_colors = [d[2]['weight'] for d in G.edges(data=True)]
edge_size = [d[2]['weight']/15 for d in G.edges(data=True)]
alphas = [d[2]['weight']/200 for d in G.edges(data=True)]


nx.draw_networkx_edges(G, layout, width=edge_size, edge_color=edge_colors, edge_cmap=cmap, edge_vmin=min(avg_weight.values()), edge_vmax=max(avg_weight.values()), ax=ax, arrowsize=30, arrowstyle='-|>', alpha=alphas, connectionstyle='arc3, rad = 0.4', min_source_margin=20, min_target_margin=20)

# remove first part of the names split by _
labels = {n : "_".join(n.split('_')[1:]) if "summarization_text" not in n and "summarization_medical" not in n else n  for n in G.nodes() }

# replace the labels

nx.draw_networkx_labels(G, layout, font_size=22, font_color='black', font_weight='bold', ax=ax, labels=labels)


# add cmap legend
sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=min(avg_weight.values()), vmax=max(avg_weight.values())))
sm._A = []
plt.colorbar(sm, ax=plt.gca())

# make colorbar tick labels bigger
cbar = plt.gcf().axes[-1]
cbar.tick_params(labelsize=22)


# add white contour to all texts in the figure
for text in plt.gca().texts:
    text.set_path_effects([patheffects.Stroke(linewidth=4, foreground='white'), patheffects.Normal()])

plt.tight_layout()
axis = plt.gca()
axis.set_xlim([1.2*x for x in axis.get_xlim()])
axis.set_ylim([1.2*y for y in axis.get_ylim()])
plt.tight_layout()
# save figure
path = f"../../../papers/Mutual-information-for-summarization/img/crossmisgraph.png"
plt.tight_layout()
plt.savefig(path, bbox_inches='tight', dpi=300)
