In [137]:
import os
import json
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [95]:
def get_number_of_topics_by_epoch(graph):
    ntopics_by_epoch = [len(matrix) for matrix in graph["matrix"]]
    ntopics_last_epoch = len(graph["matrix"][-1][0])
    ntopics_by_epoch.append(ntopics_last_epoch)
    return ntopics_by_epoch

def get_threshold(graph, epsilon):
    similarities = pd.Series([edge["w"] for edge in graph["edges"]])
    threshold = similarities.quantile(epsilon)
    return threshold

def count_type_of_evolutions(edges):
    input_edges_by_topic = edges.sum(axis = 0)
    output_edges_by_topic = edges.sum(axis = 1)
    
    birth = len(input_edges_by_topic[input_edges_by_topic == 0])
    death = len(output_edges_by_topic[output_edges_by_topic == 0])
    merge = len(input_edges_by_topic[input_edges_by_topic > 1])
    split = len(output_edges_by_topic[output_edges_by_topic > 1])
    
    return birth, death, merge, split

In [None]:
# load environment variables
load_dotenv()

In [None]:
# load similarity graphs
graph_path = f'{os.getenv("GRAPH_PATH")}graph_wmd_q100.json'
with open(graph_path, "rb") as f:
    graph = json.load(f)

In [132]:
topics_by_epoch = get_number_of_topics_by_epoch(graph)
epsilons = [0.9, 0.95, 0.99]
evolution_by_epsilon = []
for epsilon in epsilons:
    threshold = get_threshold(graph, epsilon)
    evolution_by_epoch = {"epsilon": [], "epoch": [], "ntopics":[], "birth":[], "death":[], "merge":[], "split": []}
    epoch = 2
    for matrix in graph["matrix"]:
        matrix = np.array(matrix)
        masked_matrix = (matrix >= threshold).astype(int)
        birth, death, merge, split = count_type_of_evolutions(masked_matrix)
        evolution_by_epoch["birth"].append(birth)
        evolution_by_epoch["death"].append(death)
        evolution_by_epoch["merge"].append(merge)
        evolution_by_epoch["split"].append(split)
        evolution_by_epoch["epsilon"].append(epsilon)
        evolution_by_epoch["epoch"].append(epoch)
        evolution_by_epoch["ntopics"].append(len(matrix))
        epoch += 1
    evolution_by_epsilon.append(pd.DataFrame(evolution_by_epoch))
evolution_by_epsilon = pd.concat(evolution_by_epsilon)

In [135]:
evolution_by_epsilon

Unnamed: 0,epsilon,epoch,ntopics,birth,death,merge,split
0,0.9,2,9,3,3,3,1
1,0.9,3,8,3,3,0,2
2,0.9,4,10,4,3,1,1
3,0.9,5,11,1,4,2,2
4,0.9,6,8,2,1,4,2
0,0.95,2,9,5,6,0,0
1,0.95,3,8,6,4,0,0
2,0.95,4,10,7,6,0,0
3,0.95,5,11,3,7,0,1
4,0.95,6,8,4,3,1,0


In [204]:
df_90 = evolution_by_epsilon[evolution_by_epsilon.epsilon == 0.9]
df_95 = evolution_by_epsilon[evolution_by_epsilon.epsilon == 0.95]
df_99 = evolution_by_epsilon[evolution_by_epsilon.epsilon == 0.99]
fig = make_subplots(rows = 2, cols = 2)

fig.add_trace(go.Scatter(x = df_90.epoch, y = df_90["birth"]/df_90.ntopics, showlegend=False, line = dict(color='royalblue', width=2)), row = 1, col = 1)
fig.add_trace(go.Scatter(x = df_95.epoch, y = df_95["birth"]/df_95.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dash')), row = 1, col = 1)
fig.add_trace(go.Scatter(x = df_99.epoch, y = df_99["birth"]/df_99.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dot')), row = 1, col = 1)

fig.add_trace(go.Scatter(x = df_90.epoch, y = df_90["merge"]/df_90.ntopics, name = r"$\zeta = 0.9$", line = dict(color='royalblue', width=2)), row = 1, col = 2)
fig.add_trace(go.Scatter(x = df_95.epoch, y = df_95["merge"]/df_95.ntopics, name = r"$\zeta = 0.95$",line = dict(color='royalblue', width=2, dash='dash')), row = 1, col = 2)
fig.add_trace(go.Scatter(x = df_99.epoch, y = df_99["merge"]/df_99.ntopics, name = r"$\zeta = 0.99$",line = dict(color='royalblue', width=2, dash='dot')), row = 1, col = 2)

fig.add_trace(go.Scatter(x = df_90.epoch, y = df_90["death"]/df_90.ntopics, showlegend=False, line = dict(color='royalblue', width=2)), row = 2, col = 1)
fig.add_trace(go.Scatter(x = df_95.epoch, y = df_95["death"]/df_95.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dash')), row = 2, col = 1)
fig.add_trace(go.Scatter(x = df_99.epoch, y = df_99["death"]/df_99.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dot')), row = 2, col = 1)

fig.add_trace(go.Scatter(x = df_90.epoch, y = df_90["split"]/df_90.ntopics, showlegend=False, line = dict(color='royalblue', width=2)), row = 2, col = 2)
fig.add_trace(go.Scatter(x = df_95.epoch, y = df_95["split"]/df_95.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dash')), row = 2, col = 2)
fig.add_trace(go.Scatter(x = df_99.epoch, y = df_99["split"]/df_99.ntopics, showlegend=False, line = dict(color='royalblue', width=2, dash='dot')), row = 2, col = 2)

fig.update_layout(template="plotly_white")
fig.update_layout(xaxis = dict(tickmode = 'linear', dtick = 1))

fig.update_xaxes(title_text="(a) Nacimiento", row=1, col=1)
fig.update_xaxes(title_text="(b) Fusión", row=1, col=2)
fig.update_xaxes(title_text="(c) Muerte", row=2, col=1)
fig.update_xaxes(title_text="(d) División", row=2, col=2)
fig.update_yaxes(title_text="Proporción de tópicos", row=1, col=1)
fig.update_yaxes(title_text="Proporción de tópicos", row=2, col=1)

In [205]:
fig.write_image("../../tesis/img/ch4/topics_evolution.eps")