# Init

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
os.chdir('/Users/elad/workspace/playground/stuff/horn_kedar')

from load_data import load_data, create_dataset, get_dataset_tik_ids
from trainer import Trainer, ExperimentResults
from parameters import HyperParams, PosWeightSchedule, NUM_FIELDS

In [None]:
data = load_data()
dataset_train, dataset_val, dataset_test = create_dataset(data)
data_triplet = (dataset_train, dataset_val, dataset_test)

In [4]:
EXPERIMENTS_FOLDER = './experiments'

In [5]:
def run_train(hyperparams: HyperParams, data_triplet = data_triplet, try_load: bool = False, avoid_save: bool = False, verbose: bool = True):
    dataset_train, dataset_val, dataset_test = data_triplet
    current_experiment_path = os.path.join(EXPERIMENTS_FOLDER, hyperparams.experiment_name)
    if try_load and os.path.exists(current_experiment_path):
        res = ExperimentResults.load(current_experiment_path)
        trainer = Trainer.from_experiment_results(res)
    else:
        trainer = Trainer(hyperparams)
        losses = trainer.train(dataset_train, dataset_val, verbose=verbose)
        pr_curve = trainer.get_precision_recall_curve(dataset_test)
        res = ExperimentResults(
            hyperparams=hyperparams,
            model=trainer.best_model,
            losses=losses,
            precision_recall_curve=pr_curve,
            latest_model=trainer.latest_model,
        )
        if not avoid_save:
            res.save(EXPERIMENTS_FOLDER)
    return res, trainer

# Train

In [None]:
hyperparams = HyperParams(
    experiment_name='VERSION01_bignet_pos_weight_20_weight_decay',
    scheduler='cosine',
    pos_weight=20,
    num_rnn_layers=2,
    hidden_size=50,
    weight_decay=1e-3,
    use_max_metric='f1_pm0',
)

res, trainer = run_train(hyperparams)

res.plot_losses()
res.plot_precision_recall_curve()

In [None]:
hyperparams = HyperParams(
    experiment_name='VERSION02_v1_pos_weight_schedule_num_epochs_20k',
    scheduler='cosine',
    pos_weight=10,
    num_rnn_layers=2,
    hidden_size=50,
    weight_decay=1e-3,
    learning_rate=0.001,
    use_max_metric='f1_pm0',
    pos_weight_schedule=[
        PosWeightSchedule(num_epochs=5000, pos_weight=100),
        PosWeightSchedule(num_epochs=5000, pos_weight=20),
        PosWeightSchedule(num_epochs=5000, pos_weight=10),
        PosWeightSchedule(num_epochs=5000, pos_weight=5),
    ],
    num_epochs=20000,
    start_model_path=os.path.join(EXPERIMENTS_FOLDER, 'VERSION01_bignet_pos_weight_20_weight_decay'),
)

res, trainer = run_train(hyperparams)

res.plot_losses()
res.plot_precision_recall_curve()

In [None]:
ExperimentResults.plot_multiple_pr_curves(EXPERIMENTS_FOLDER, experiment_names=['VERSION01_bignet_pos_weight_20_weight_decay', 'VERSION02_v1_pos_weight_schedule_num_epochs_20k'])

# Inference

In [None]:
from load_data import create_unlabeled_dataset, load_unlabeled_data

unlabeled_data = load_unlabeled_data()
unlabeled_dataset, tik_ids = create_unlabeled_dataset(unlabeled_data)

len(unlabeled_dataset), len(tik_ids)


In [None]:
experiment_name = 'VERSION02_v1_pos_weight_schedule_num_epochs_20k'
threshold = 0.99
res = ExperimentResults.load(os.path.join(EXPERIMENTS_FOLDER, experiment_name))
trainer = Trainer.from_experiment_results(res)
predictions_df = trainer.infer_to_df(unlabeled_dataset, tik_ids, threshold=threshold)
predictions_df.to_csv(os.path.join(EXPERIMENTS_FOLDER, f'{experiment_name}_threshold_{int(threshold*100)}_predictions.csv'), index=False)


# Analyzing parameters

In [23]:
from matplotlib import pyplot as plt
import torch
import pandas as pd

In [None]:
fields_path = 'data/document type.csv'
fields_df = pd.read_csv(fields_path)
fields_names = fields_df['Document_type'].tolist()
fields_names_reversed = [x[::-1] for x in fields_names]
fields_df.head()

In [None]:
model_name = 'VERSION02_v1_pos_weight_schedule_num_epochs_20k'
res = ExperimentResults.load(os.path.join(EXPERIMENTS_FOLDER, model_name))
trainer = Trainer.from_experiment_results(res)
params = list(trainer.best_model.named_parameters())
p = dict(params)

# Print names and shapes
for name, param in trainer.best_model.named_parameters():
    print(name, param.shape)


In [None]:
# Calc the contribution of each element in x_t to the output
layer_0 = torch.cat([p['rnn.weight_ih_l0'], p['rnn.weight_ih_l0_reverse']], dim=0)
d_out_d_x_t = p['fc.weight'] @ torch.cat([p['rnn.weight_ih_l1'] @ layer_0, p['rnn.weight_ih_l1_reverse'] @ layer_0], dim=0)

fig, ax = plt.subplots(figsize=(15, 3))
plt.bar(range(NUM_FIELDS), d_out_d_x_t.detach().numpy().squeeze())
plt.xticks(range(NUM_FIELDS), fields_names_reversed, rotation=90)
plt.title('Contribution of each element in current time step to the output')
plt.xlabel('Field index')
plt.ylabel('Contribution')
ax.grid(True, axis='x')
plt.show()

In [None]:
# Calc the contribution of each element in x_t-1 to the output
part1 = p['rnn.weight_hh_l1'] @ p['rnn.weight_ih_l1'] @ torch.cat([p['rnn.weight_ih_l0'], p['rnn.weight_ih_l0_reverse']], dim=0)
part1_cat = torch.cat([part1, torch.zeros_like(part1)], dim=0)
part2 = p['rnn.weight_hh_l0'] @ p['rnn.weight_ih_l0']
part2_cat = torch.cat([part2, torch.zeros_like(part2)], dim=0)
part2_cat_cat = torch.cat([p['rnn.weight_ih_l1'] @ part2_cat, p['rnn.weight_ih_l1_reverse'] @ part2_cat], dim=0)
d_out_d_x_t_minus_1 = p['fc.weight'] @ (part1_cat + part2_cat_cat)

fig, ax = plt.subplots(figsize=(15, 3))
plt.bar(range(NUM_FIELDS), d_out_d_x_t_minus_1.detach().numpy().squeeze())
plt.xticks(range(NUM_FIELDS), fields_names_reversed, rotation=90)
plt.title('Contribution of each element in previous time step to the output')
plt.xlabel('Field index')
plt.ylabel('Contribution')
ax.grid(True, axis='x')
plt.show()

In [None]:
# Calc the contribution of each element in x_t+1 to the output
part1 = p['rnn.weight_hh_l1_reverse'] @ p['rnn.weight_ih_l1_reverse'] @ torch.cat([p['rnn.weight_ih_l0'], p['rnn.weight_ih_l0_reverse']], dim=0)
part1_cat = torch.cat([torch.zeros_like(part1), part1], dim=0)
part2 = p['rnn.weight_hh_l0_reverse'] @ p['rnn.weight_ih_l0_reverse']
part2_cat = torch.cat([torch.zeros_like(part2), part2], dim=0)
part2_cat_cat = torch.cat([p['rnn.weight_ih_l1'] @ part2_cat, p['rnn.weight_ih_l1_reverse'] @ part2_cat], dim=0)
d_out_d_x_t_plus_1 = p['fc.weight'] @ (part1_cat + part2_cat_cat)

fig, ax = plt.subplots(figsize=(15, 3))
plt.bar(range(NUM_FIELDS), d_out_d_x_t_plus_1.detach().numpy().squeeze())
plt.xticks(range(NUM_FIELDS), fields_names_reversed, rotation=90)
plt.title('Contribution of each element in next time step to the output')
plt.xlabel('Field index')
plt.ylabel('Contribution')
ax.grid(True, axis='x')
plt.show()