# Narrative Maps Eval


In [1]:
import os
import sys

# To make our relative library imports work
current_dir = os.getcwd()
parent_dir = os.path.dirname(current_dir)
sys.path.append(parent_dir)
grandparent_dir = os.path.dirname(parent_dir)
sys.path.append(grandparent_dir)

## Imports


In [2]:
import pandas as pd
import numpy as np

import re
from ast import literal_eval
from urllib.parse import urlparse

import networkx as nx
from networkx.drawing.nx_agraph import write_dot, graphviz_layout

import itertools
import pickle
import json
from time import time

import math
from math import log, exp, pi, sqrt, ceil
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial import distance
from tqdm import tqdm

import umap
import hdbscan

from pulp import *

from Library.embedding_extraction import extract_embeddings

from narrative_maps import (
    extract_varsdict,
    compute_temp_distance_table,
    build_graph,
    graph_stories,
)

  from .autonotebook import tqdm as notebook_tqdm


Using device: cpu


In [3]:
np.random.seed = 420

## Linear Program Construction

This has a lot of parameters, some of them ended up unused.


In [4]:
def create_LP(query, sim_table, membership_vectors, clust_sim_table, exp_temp_table, ent_table, numclust, relevance_table,
              K, mincover, sigma_t, credibility=[], bias=[], operations=[],
              has_start=True, has_end=False, window_time=None, cluster_list=[], start_nodes=[], end_nodes=[],
              verbose=True, force_cluster=True, previous_varsdict=None):
    n = len(query.index)  # We can cut out everything after the end.
    # Variable names and indices
    var_i = []
    var_ij = []
    var_k = [str(k) for k in range(0, numclust)]

    for i in range(0, n):  # This goes up from 0 to n-1.
        var_i.append(str(i))
        for j in window_i_j[i]:
            if i == j:
                print("ERROR IN WINDOW - BASE")
            var_ij.append(str(i) + "_" + str(j))

    # Linear program variable declaration.
    minedge = LpVariable("minedge", lowBound=0, upBound=1)
    node_act_vars = LpVariable.dicts("node_act", var_i, lowBound=0, upBound=1)
    node_next_vars = LpVariable.dicts("node_next", var_ij, lowBound=0,  upBound=1)
    # clust_active_vars = LpVariable.dicts("clust_active", var_k, lowBound=0, upBound=1)

    # Create the 'prob' variable to contain the problem data
    prob = LpProblem("StoryChainProblem", LpMaximize)
    # The objective function is added to 'prob' first
    prob += minedge, "WeakestLink"

    # Chain restrictions
    if has_start:
        num_starts = len(start_nodes)
        if verbose:
            print("Start node(s):")
            print(start_nodes)
        if num_starts == 0:  # This is the default when no list is given and it has a start.
            prob += node_act_vars[str(0)] == 1, 'InitialNode'
        else:
            if verbose:
                print("Added start node(s)")
                print("--- %s seconds ---" % (time() - start_time))
            initial_energy = 1.0 / num_starts
            earliest_start = min(start_nodes)
            for node in start_nodes:
                prob += node_act_vars[str(node)] == initial_energy, 'InitialNode' + str(node)
            for node in range(0, earliest_start):
                prob += node_act_vars[str(node)] == 0, 'BeforeStart' + str(node)
    if has_end:
        num_ends = len(end_nodes)
        if verbose:
            print("End node(s):")
            print(end_nodes)
        if num_ends == 0:  # This is the default when no list is given and it has a start.
            prob += node_act_vars[str(n - 1)] == 1, 'FinalNode'
        else:
            if verbose:
                print("Added end node(s)")
                print("--- %s seconds ---" % (time() - start_time))
            final_energy = 1.0 / num_ends
            latest_end = min(end_nodes)
            for node in end_nodes:
                prob += node_act_vars[str(node)] == final_energy, 'FinalNode' + str(node)
            for node in range(latest_end + 1, n):
                prob += node_act_vars[str(node)] == 0, 'AfterEnd' + str(node)

    if verbose:
        print("Chain constraints created.")
        print("--- %s seconds ---" % (time() - start_time))
    prob += lpSum([node_act_vars[i] for i in var_i]) == K, 'KNodes'

    if verbose:
        print("Expected length constraints created.")
        print("--- %s seconds ---" % (time() - start_time))

    if has_start:
        if verbose:
            print("Equality constraints.")
            print("--- %s seconds ---" % (time() - start_time))
        for j in range(1, n):
            if j not in start_nodes:
                prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                              for i in window_j_i[j]]) == node_act_vars[str(j)], 'InEdgeReq' + str(j)
            else:
                if verbose:
                    print("Generating specific starting node constraints.")
                    print("--- %s seconds ---" % (time() - start_time))
                prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                              for i in window_j_i[j]]) == 0, 'InEdgeReq' + str(j)
    else:
        if verbose:
            print("Inequality constraints.")
            print("--- %s seconds ---" % (time() - start_time))
        for j in range(1, n):
            prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                          for i in window_j_i[j]]) <= node_act_vars[str(j)], 'InEdgeReq' + str(j)
    if verbose:
        print("In-degree constraints created.")
        print("--- %s seconds ---" % (time() - start_time))

    if has_end:
        if verbose:
            print("Equality constraints.")
            print("--- %s seconds ---" % (time() - start_time))
        for i in range(0, n - 1):
            if i not in end_nodes:
                prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                              for j in window_i_j[i]]) == node_act_vars[str(i)], 'OutEdgeReq' + str(i)
            else:
                if verbose:
                    print("Generating specific starting node constraints.")
                    print("--- %s seconds ---" % (time() - start_time))
                prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                              for j in window_i_j[i]]) == 0, 'OutEdgeReq' + str(i)
    else:
        if verbose:
            print("Inequality constraints.")
            print("--- %s seconds ---" % (time() - start_time))
        for i in range(0, n - 1):
            prob += lpSum([node_next_vars[str(i) + "_" + str(j)]
                          for j in window_i_j[i]]) <= node_act_vars[str(i)], 'OutEdgeReq' + str(i)
    if verbose:
        print("Out-degree constraints created.")
        print("--- %s seconds ---" % (time() - start_time))

    # Objective
    for i in range(0, n):
        for j in window_i_j[i]:
            coherence_weights = [0.5, 0.5]
            # Five or more entities in common means double the connection strength.
            entity_multiplier = min(1 + ent_table[i, j], 2)
            # Geometric mean the relevances, multiply based on how far it is from 0.5.
            relevance_multiplier = (relevance_table[i] * relevance_table[j]) ** 0.5
            coherence = (sim_table[i, j] ** coherence_weights[0]) * \
                (clust_sim_table[i, j] ** coherence_weights[1])
            weighted_coherence = min(coherence * entity_multiplier * relevance_multiplier, 1.0)
            prob += minedge <= 1 - node_next_vars[str(i) + "_" + str(j)] + \
                weighted_coherence, "Objective" + str(i) + "_" + str(j)
    if verbose:
        print("Objective constraints created.")
        print("--- %s seconds ---" % (time() - start_time))

    if previous_varsdict:
        current_names = [v.name for v in prob.variables() if "node_act" in v.name]
        if verbose:
            print("Generated list of names.")
            print("--- %s seconds ---" % (time() - start_time))
        for k, v in previous_varsdict.items():
            if "node_act" in k and k in current_names:
                node_act_vars[k.replace("node_act_", "")].setInitialValue(v)

    if verbose:
        if previous_varsdict:
            print("Used previous solution as starting point.")
            print("--- %s seconds ---" % (time() - start_time))
        else:
            print("No previous solution available.")
            print("--- %s seconds ---" % (time() - start_time))
    # The problem data is written to an .lp file
    return prob

## Building the Graph Data Frame


In [5]:
def build_graph_df_multiple_starts(query, varsdict, prune=None, threshold=0.01, cluster_dict={}, start_nodes=[]):
    n = len(query)
    # This has some leftover stuff that is not really useful now.
    if 'bias' in query.columns:
        graph_df = pd.DataFrame(columns=['id', 'adj_list', 'adj_weights',
                                'date', 'publication', 'title', 'text', 'url', 'bias', 'coherence'])
    else:
        graph_df = pd.DataFrame(columns=['id', 'adj_list', 'adj_weights',
                                'date', 'publication', 'title', 'text', 'url', 'coherence'])

    already_in = []
    for i in range(0, n):
        prob = []
        coherence = varsdict["node_act_" + str(i)]
        if coherence <= threshold:
            continue
        coherence_list = []
        index_list = []
        for j in window_i_j[i]:
            name = "node_next_" + str(i) + "_" + str(j)
            prob.append(varsdict[name])
            coherence_list.append(varsdict["node_act_" + str(j)])
        idx_list = [window_i_j[i][idx] for idx, e in enumerate(prob) if round(
            e, 8) != 0 and e > threshold and coherence_list[idx] > threshold]  # idx + i + 1
        nz_prob = [e for idx, e in enumerate(prob) if round(
            e, 8) != 0 and e > threshold and coherence_list[idx] > threshold]
        if prune:
            if len(idx_list) > prune:
                top_prob_idx = sorted(range(len(nz_prob)), key=lambda k: nz_prob[k])[-prune:]
                idx_list = [idx_list[j] for j in top_prob_idx]
                nz_prob = [nz_prob[idx] for idx in top_prob_idx]
        sum_nz = sum(nz_prob)
        nz_prob = [nz_prob[j] / sum_nz for j in range(0, len(nz_prob))]
        # If we haven't checked this one before we add it to the graph.
        url = str(query.iloc[i]['url'])
        if i in already_in or sum_nz > 0:
            if len(url) > 0:
                url = urlparse(url).netloc
            if not (graph_df['id'] == i).any():
                title = query.iloc[i]['title']
                for key, value in cluster_dict.items():
                    if str(i) in value:
                        title = "[" + str(key) + "] " + title
                outgoing_edges = [idx_temp for idx_temp in idx_list]
                # coherence = varsdict["node_act_" + str(i)]
                if 'bias' in query.columns:
                    graph_df.loc[len(graph_df)] = [i, outgoing_edges, nz_prob, query.iloc[i]['date'], query.iloc[i]['publication'],
                                                   title, '', query.iloc[i]['url'], query.iloc[i]['bias'], coherence]
                else:
                    graph_df.loc[len(graph_df)] = [i, outgoing_edges, nz_prob, query.iloc[i]['date'], query.iloc[i]['publication'],
                                                   title, '', query.iloc[i]['url'], coherence]

            already_in += [i] + idx_list
    return graph_df

In [6]:
start_time = None
window_i_j = {}
window_j_i = {}


def solve_LP(
    query,
    dataset,
    membership_vectors,
    K=6,
    mincover=0.20,
    sigma_t=30,
    start_nodes=[],
    end_nodes=[],
    verbose=True,
    force_cluster=True,
    use_entities=True,
    use_temporal=True,
    strict_start=False,
):

    global start_time
    start_time = time()

    n = len(query.index)
    # varsdict_filename = 'varsdict_' + dataset + "_" + str(n) + '.pickle'

    if sigma_t != 0 and use_temporal:
        exp_temp_table = np.exp(-temporal_distance_table / sigma_t)
    else:
        exp_temp_table = np.ones(temporal_distance_table.shape)

    if verbose:
        print("Computed temporal distance table.")
        print("--- %s seconds ---" % (time() - start_time))

    window_time = None
    if sigma_t != 0 and use_temporal:
        window_time = sigma_t * 3  # Days

    if window_time is None:
        for i in range(0, n):
            window_i_j[i] = list(range(i + 1, n))
        for j in range(0, n):
            window_j_i[j] = list(range(0, j))
    else:
        for j in range(0, n):
            window_j_i[j] = []
        for i in range(0, n):
            window_i_j[i] = []
        for i in range(0, n - 1):
            window = 0
            for j in range(i + 1, n):
                if temporal_distance_table[i, j] <= window_time:
                    window += 1
            window = max(min(5, n - i), window)
            window_i_j[i] = list(range(i + 1, min(i + window, n)))
            for j in window_i_j[i]:
                window_j_i[j].append(i)

    if verbose:
        print("Computed temporal windows.")
        print("--- %s seconds ---" % (time() - start_time))

    if verbose:
        print("Computed entity similarities.")
        print("--- %s seconds ---" % (time() - start_time))
    ent_table = np.zeros((n, n))  # Fill entity information with zeros by default.
    actual_ent_table = ent_table
    ent_doc_list = None
    if use_entities:
        ent_table, ent_doc_list = get_entity_table(query, dataset)
        actual_ent_table = ent_table

    # Deprecated relevance table computation
    relevance_table = [1.0] * membership_vectors.shape[0]  # Create a vector full of 1s

    has_start = False
    if start_nodes is not None:
        has_start = (len(start_nodes) > 0)
    if end_nodes is not None:
        has_end = (len(end_nodes) > 0)
    if verbose:
        print("Creating LP...")

    # Read previous solution and feed to LP. If none there is no previous solution.
    previous_varsdict = None
    # if os.path.isfile(varsdict_filename):
    #     with open(varsdict_filename, 'rb') as handle:
    #         previous_varsdict = pickle.load(handle)

    prob = create_LP(
        query,
        sim_table,
        membership_vectors,
        clust_sim_table,
        exp_temp_table,
        actual_ent_table,
        numclust,
        relevance_table,
        K=K,
        mincover=mincover,
        sigma_t=sigma_t,
        has_start=has_start,
        has_end=has_end,
        start_nodes=start_nodes,
        end_nodes=end_nodes,
        verbose=verbose,
        force_cluster=force_cluster,
        previous_varsdict=previous_varsdict
    )

    # if verbose:
    #     print("Saving model...")
    #     print("--- %s seconds ---" % (time() - start_time))

    # prob.writeLP("left_story.lp")

    if verbose:
        print("Solving model...")
        print("--- %s seconds ---" % (time() - start_time))

    # (GLPK_CMD(path = 'C:\\glpk-4.65\\w64\\glpsol.exe', options = ["--tmlim", "180"]))

    prob.solve(PULP_CBC_CMD(mip=False, warmStart=True, msg=verbose))

    varsdict = extract_varsdict(prob)

    # Overwrite last solution.
    # with open(varsdict_filename, 'wb') as handle:
    #     pickle.dump(varsdict, handle, protocol=pickle.HIGHEST_PROTOCOL)

    graph_df = build_graph_df_multiple_starts(query, varsdict, prune=ceil(
        sqrt(K)), threshold=0.1/K, cluster_dict={})

    if verbose:
        print("Graph data frame construction...")
        print("--- %s seconds ---" % (time() - start_time))

    if strict_start and has_start:
        graph_df = graph_clean_up(graph_df, start_nodes)

    if verbose:
        print("Graph clean up...")
        print("--- %s seconds ---" % (time() - start_time))

    return [graph_df, (numclust, LpStatus[prob.status]), sim_table, clust_sim_table, ent_table, ent_doc_list]

## MAIN: Calling the Map Construction Method


In [7]:
data_aminer = pd.read_feather(f"../../data/AMiner/aminer-subset.feather")
data_aminer["date"] = pd.to_datetime(data_aminer["date"])
data_aminer.reset_index(inplace=True, drop=True)  # The original indices are incorrect, so we reset here.

# Load embeddings for Aminer
aminer_embeds, _, _ = extract_embeddings(
    text=(data_aminer["title"] + ";" + data_aminer["abstract"]).tolist(),
    foldername=f"../../data/AMiner",
    model_name="gpt4"
)

data_aminer["embed"] = aminer_embeds.tolist()
data_aminer["publication"] = ""
data_aminer["url"] = ""
data_aminer.sort_values(by="date").reset_index(names="idx")


data_aminer = data_aminer.sample(1140, replace=False, random_state=420).sort_values(by="date")
data_aminer.reset_index(names="idx", inplace=True)


# select sources and targets
aminer_src = np.random.choice(data_aminer.index[:(len(data_aminer) // 2) - 50], 50)
aminer_tgt = np.random.choice(data_aminer.index[(len(data_aminer) // 2) + 50:], 50)

data_aminer

File '../../data/AMiner/embed_data-gpt4.pickle' loaded successfully.


Unnamed: 0,idx,id,title,doi,keywords,abstract,date,embed,publication,url
0,4407,53e9b0abb7602d9703b14d28,An Integrated Query and Mining System for Temp...,10.1007/3-540-44466-1_33,"[temporal association rules, better decision-m...",In real world the knowledge used for aiding de...,2000-01-01,"[-0.03705746680498123, 0.02978450618684292, 0....",,
1,1718,53e9baecb7602d970471ac79,Recognition of local features for camera-based...,10.1109/ICPR.2000.903050,"[hand shape, hand location, global features, c...",A sign language recognition system is required...,2000-01-01,"[0.03322494775056839, 0.005597303621470928, 0....",,
2,5394,53e99a0ab7602d9702257cbb,Chunking with WPDV Models.,10.3115/1117601.1117639,"[different base chunkers, wpdv model, base chu...",In this paper I describe the application of th...,2000-01-01,"[-0.003214028896763921, -0.01248752698302269, ...",,
3,4937,53e9ae3cb7602d9703852344,Fast implementation of multiple oriented filters,10.1109/ICPR.2000.903582,"[optimisation, binary restoration, time comple...",One method to estimate image values in the pre...,2000-01-01,"[-0.023320039734244347, 0.021940156817436218, ...",,
4,2798,53e9bc9db7602d970491c40d,Agent negotiation in trusted third party media...,10.1145/336595.337482,"[uncertainty equivalent, rational agents, trus...",Traditional game theoretic reasoning for agent...,2000-01-01,"[-0.060538649559020996, -0.025153733789920807,...",,
...,...,...,...,...,...,...,...,...,...,...
1135,2006,6361e3d390e50fcafd8ca03e,Fuzzy Cognitive Maps for Interpretable Image-b...,10.1109/FUZZ-IEEE55066.2022.9882767,"[fuzzy cognitive maps, fuzzy sets, interpretab...",Image classification is a fundamental componen...,2022-01-01,"[0.002480722963809967, -0.012560468167066574, ...",,
1136,4983,61bc2f585244ab9dcb0e9e6a,Using deep clustering to improve fMRI dynamic ...,10.1016/j.neuroimage.2022.119288,"[Dynamic functional connectivity, Sliding wind...",•We compared dimensionality reduction methods ...,2022-01-01,"[-0.0012515292037278414, -0.01167546771466732,...",,
1137,1500,61dab91e5244ab9dcb5ab654,Efficient deep-reinforcement learning aware re...,10.1007/s10515-021-00318-6,"[Deep reinforcement learning, Offloading, Sc...","These days, fog computing is an emerging parad...",2022-01-01,"[0.015906620770692825, -0.008914765901863575, ...",,
1138,245,62d16f3c5aee126c0fd7a8f0,Adapting Deep Learning for Content Caching Fra...,10.1109/OJCOMS.2022.3175927,"[Traffic offloading, D2D caching, machine lear...","Recently, we have witnessed an expeditious gro...",2022-01-01,"[0.03156762197613716, -0.031177597120404243, 0...",,


In [8]:
umap_model = umap.UMAP(
    n_neighbors=32,
    n_components=48,
    min_dist=0,
    metric="cosine",
    random_state=42,
    n_jobs=1,
    low_memory=True
)

low_dim_mapper = umap_model.fit(np.array(data_aminer["embed"].tolist()))
low_dim_embeds = low_dim_mapper.embedding_

hdbscan_model = hdbscan.HDBSCAN(
    min_cluster_size=8,
    cluster_selection_method="eom",
    prediction_data=True,
).fit(low_dim_embeds)

cluster_label_probs = hdbscan.prediction.all_points_membership_vectors(
    hdbscan_model
)

cluster_labels = cluster_label_probs.argmax(1)

data_aminer["topic"] = cluster_labels
print(f"Found {len(np.unique(cluster_labels))} Topics")

Found 18 Topics


In [9]:
# Map Length (usually values from 6 to 12 produce decent maps, but it depends on data set size and probably the underlying distribution of similarities).
k_input = 12

# % of average coverage we require. For small data sets 50-80 is good. For bigger data sets with many clusters you will likely only get 20%.
# This was tested with values up to 500. After that I'm not sure how well the model will perform.
mincover_input = 0

# Temporal distance penalty in DAYS. I left it on 30 as default for the Cuban data set.
# Lower values allow more temporally distant connections. Consider temporal density of the data when adjusting.
# Can set it to 0 and it will be discarded from the computation.
sigma_t = 0
use_temporal = False  # Use this to enable or disable the temporal penalty, by default it is on.

# Leave this as false, there was supposed to be a reward factor for events with common entities, but it adds too much computational time so not worth it.
use_entities = False

# If you enable strict start you will discard any storyline that does not start from the user-defined start node.
# It is recommended to disable this to allow for extra storylines that emerge from the LP solution.
strict_start = False

# Compute angular similarity
similarities = np.clip(cosine_similarity(np.array(data_aminer["embed"].tolist())), -1, 1)
sim_table = (1 - np.arccos(similarities) / pi)
mask = np.ones(sim_table.shape, dtype=bool)
np.fill_diagonal(mask, 0)
max_value = sim_table[mask].max()
min_value = sim_table[mask].min()
sim_table = (sim_table - min_value) / (max_value - min_value)
sim_table = np.clip(sim_table, 0, 1)

# Compute topic similarity
numclust = 1
clust_sim = np.zeros((cluster_label_probs.shape[0], cluster_label_probs.shape[0]))

if len(cluster_label_probs.shape) > 1:
    numclust = cluster_label_probs.shape[1]
    cluster_label_probs[cluster_label_probs < 1/numclust] = 0
    cluster_label_probs[np.all(cluster_label_probs == 0,
                                        axis=1)] = np.ones(numclust) / numclust
    row_sums = cluster_label_probs.sum(axis=1)
    cluster_label_probs = cluster_label_probs / row_sums[:, np.newaxis]

    clust_sim = distance.cdist(
        cluster_label_probs,
        cluster_label_probs,
        lambda u, v: distance.jensenshannon(u, v, base=2.0)
    )
else:
    cluster_label_probs = np.ones((cluster_label_probs.shape[0], 1))

clust_sim_table = 1 - clust_sim

# Compute temporal distance
temporal_distance_table = compute_temp_distance_table(data_aminer, "./narrative_maps/temp/aminer")

In [10]:
results_data = {
    "algorithm": [],
    "src": [],
    "tgt": [],
    "exec_time": [],
    "effective_exec_time": [],
    "main_storyline": [],
    "storylines": []
}

for src, tgt in tqdm(zip(aminer_src, aminer_tgt), total=len(aminer_src)):
    for i in range(4):
        start_time = time()
        graph_df_new, status, _, _, _, _ = solve_LP(
            data_aminer,
            dataset="news_articles",
            membership_vectors=cluster_label_probs,
            K=k_input,
            mincover=mincover_input/100,
            sigma_t=sigma_t,
            start_nodes=[src],
            end_nodes=[tgt],
            verbose=False,
            use_entities=use_entities,
            use_temporal=use_temporal,
            strict_start=strict_start,
        )
        end_time = time() - start_time

        # Post Processing
        if 'Optimal' in status[1]:
            G = build_graph(graph_df_new)
            storylines = graph_stories(G, start_nodes=[src], end_nodes=[tgt])

            results_data["algorithm"].append("narrative_maps")
            results_data["src"].append(src)
            results_data["tgt"].append(tgt)
            results_data["exec_time"].append(end_time)
            results_data["effective_exec_time"].append(end_time / len(storylines))
            results_data["main_storyline"].append(storylines[0])
            results_data["storylines"].append(storylines)
        else:
            print(f"** Warning: Experiment '({src}, {tgt})' not optimal")

100%|██████████| 50/50 [3:10:16<00:00, 228.33s/it]  


In [11]:
results_data_df = pd.DataFrame(results_data)
results_data_df.sample(10)  # Show 10 example results

Unnamed: 0,algorithm,src,tgt,exec_time,effective_exec_time,main_storyline,storylines
81,narrative_maps,226,729,55.07171,5.507171,"[226, 406, 541, 579, 665, 729]","[[226, 406, 541, 579, 665, 729], [252, 324, 34..."
124,narrative_maps,218,784,52.782767,4.798433,"[218, 235, 556, 592, 605, 660, 676, 732, 784]","[[218, 235, 556, 592, 605, 660, 676, 732, 784]..."
11,narrative_maps,175,653,58.577506,4.184108,"[175, 381, 519, 556, 580, 626, 644, 653]","[[175, 381, 519, 556, 580, 626, 644, 653], [63..."
68,narrative_maps,78,648,57.978055,3.623628,"[78, 299, 502, 648]","[[78, 299, 502, 648], [244, 272], [120, 196, 2..."
85,narrative_maps,329,939,53.571004,5.3571,"[329, 468, 503, 519, 520, 558, 693, 716, 748, ...","[[329, 468, 503, 519, 520, 558, 693, 716, 748,..."
25,narrative_maps,328,1025,52.419744,4.032288,"[328, 427, 504, 677, 827, 971, 999, 1025]","[[328, 427, 504, 677, 827, 971, 999, 1025], [5..."
83,narrative_maps,226,729,55.35198,5.535198,"[226, 406, 541, 579, 665, 729]","[[226, 406, 541, 579, 665, 729], [252, 324, 34..."
148,narrative_maps,81,1119,62.48305,4.806388,"[81, 567, 666, 684, 779, 785, 917, 929, 1045, ...","[[81, 567, 666, 684, 779, 785, 917, 929, 1045,..."
26,narrative_maps,328,1025,52.022058,4.001697,"[328, 427, 504, 677, 827, 971, 999, 1025]","[[328, 427, 504, 677, 827, 971, 999, 1025], [5..."
30,narrative_maps,460,1091,58.294845,3.886323,"[460, 522, 577, 580, 795, 922, 929, 1091]","[[460, 522, 577, 580, 795, 922, 929, 1091], [9..."


In [12]:
results_data_df.to_pickle("./narrative_maps/results/aminer.pkl")