In [None]:
import sys
import os

# Get the absolute path of the parent directory.
parent_dir = os.path.abspath(os.path.join(os.path.dirname("__file__"), ".."))

# Add the parent directory to the system path to be able to import modules from 'lib.'
sys.path.append(parent_dir)

In [None]:
from IPython.display import HTML, Markdown as md
import itertools
import math
import matplotlib
import matplotlib.pyplot as plt
import numpy
import numpy as np
import random

import pandas as pd
import pathlib

from lib.memory import DSDM
from lib.utils import cleanup, configs, inference, learning, preprocess, utils 

import torch
import torchhd as thd
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F 

from tqdm import tqdm
# Type checking
from typing import List 

In [None]:
# Set device.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Fix seed.
utils.fix_seed()

In [None]:
# Parametrized cell: The variable is initialized with the dictionary read form the YAML config file.
# If the notebook is being run on its own (i.e., without runing the run_experiment.sh script and giving a configuratin YAML as input),
# the default experiment is run. Note: This should also be the case if the run_experiemnt.sh script is run on its wn (i.e., with a YAML)
# as input
config = {}

In [None]:
# The notebook is not run from run_experiment.sh script.
if config == {}: # Empty dictionary
    # Read default YAML; the default YAML is experiment-1.yaml.
    display(md("No YAML configuration file was provided. Runing default configuration: 'experiment-1.yaml' "))
    config = configs.Config.from_file("configs/experiment-1.yaml")
else:
    # Initialize config from input YAML.
    display(md("Runing provided YAML configuration."))
    config = configs.Config(config)

In [None]:
display(md(f"# Experiment {config.experiment_title} results"))
utils.display_toc()

In [None]:
cleanup = cleanup.Cleanup(config.dim)

In [None]:
# Initialize memory/ies.
if config.experiment_type == 'comparison':
    memory_normalized = DSDM.DSDM(
        address_size=config.dim,
        ema_time_period=config.DSDM.ema_time_period,
        learning_rate_update=config.DSDM.learning_rate_update,
        temperature=config.DSDM.temperature,
        normalize=True
    )
    memory_unnormalized = DSDM.DSDM(
        address_size=config.dim,
        ema_time_period=config.DSDM.ema_time_period,
        learning_rate_update=config.DSDM.learning_rate_update,
        temperature=config.DSDM.temperature,
        normalize=False
    )
    memories = {
        memory_normalized.get_memory_type(): memory_normalized,
        memory_unnormalized.get_memory_type(): memory_unnormalized
    }
else:
    memory = DSDM.DSDM(
        address_size=config.dim,
        ema_time_period=config.DSDM.ema_time_period,
        learning_rate_update=config.DSDM.learning_rate_update,
        temperature=config.DSDM.temperature,
        normalize=config.DSDM.normalize
    )
    memories = {
        memory.get_memory_type(): memory
    }

In [None]:
# Initial training
learning.online_learning_with_inference(
    cleanup=cleanup,
    memories=memories,
    data_path=config.initial_training.data_path,
    chunk_sizes=config.initial_training.chunk_sizes,
    epochs=config.initial_training.epochs,
    infer=False,
)  

In [None]:
# Get index to get the similarities of the tokens we keep track of.
index = list(itertools.product(config.inference.sentences, config.inference.tracked_tokens))

# Construct dataframes for keeping track of token similarities.
initial_sims_dfs = {}
#initial_tracked_tokens_sims_dfs = {}

display(md("## Initial training"))

for memory_type, memory in memories.items():
    initial_sims_dfs[memory_type] = inference.infer(
        config.dim,
        cleanup,
        memory,
        config.inference.sentences
    )
#     initial_tracked_tokens_sims_dfs[memory_type] = initial_sims_dfs[memory_type].loc[index]

display(HTML(f'<a id="initial-training-extracted-concepts"> </a>'))
display(md("### Extracted concepts"))
utils.column_output(
    memories=memories,
    tables=initial_sims_dfs,
    horizontal_output=False
)
#display(HTML(f'<a id="initial-training-tracked-tokens-similarities"> </a>'))
#display(md("### Tracked tokens similarties"))
#utils.column_output(
#    memories=memories,
#    tables=initial_tracked_tokens_sims_dfs,
#    horizontal_output=False
#)

In [None]:
#display(HTML(f'<a id="initial-training-memory-state"> </a>'))
#display(md("### Memory state"))
#for memory_type, memory in memories.items():
#    display(md(f"#### <ins>{memory_type.capitalize()}</ins>"))
#    concepts_df = inference.display_and_get_memory_addresses(memory, cleanup)
#    inference.get_similarity_matrix_of_addresses_mapping_to_same_concepts(concepts_df)

In [None]:
## Online training.
#sims_dfs, tracked_tokens_dfs = learning.online_learning_with_inference(
#    cleanup=cleanup,
#    memories=memories,
#    data_path=config.training.data_path,
#    chunk_sizes=config.training.chunk_sizes,
#    epochs=config.training.epochs,
#    infer=True,
#    inference_sentences=config.inference.sentences,
#    tracked_tokens=config.inference.tracked_tokens,
#)

In [None]:
#display(md("## Training"))
#
#display(HTML(f'<a id="training-extracted-concepts"> </a>'))
#display(md("### Extracted concepts"))
#utils.column_output(
#    memories=memories,
#    tables=sims_dfs,
#    horizontal_output=False
#)
#display(HTML(f'<a id="training-tracked-tokens-similarities"> </a>'))
#display(md("### Tracked tokens similarties"))
#utils.column_output(
#    memories=memories,
#    tables=tracked_tokens_dfs,
#    horizontal_output=False
#)

In [None]:
#display(HTML(f'<a id="training-memory-state"> </a>'))
#display(md("### Memory state"))
#for memory_type, memory in memories.items():
#    display(md(f"#### <ins>{memory_type.capitalize()}</ins>"))
#    concepts_df = inference.display_and_get_memory_addresses(memory, cleanup)
#    inference.get_similarity_matrix_of_addresses_mapping_to_same_concepts(concepts_df)