# View sample predictions

Trained model from checkpoint

In [7]:
import random
import sys
import warnings

import pandas as pd
import yaml
from transformers.models.mt5 import MT5Tokenizer

sys.path.append('..')  # Allow import of project packages
from motion_title_generator.data.motions_data_module import MotionsDataModule
from motion_title_generator.models import t5
from motion_title_generator.lit_models import MT5LitModel
from utils.encode_decode import encode, generate

warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', 500)

BASE_PATH = "../training/logs/lightning_logs/version_1/"
MODEL_PATH = BASE_PATH + "checkpoints/epoch=009-val_loss=1.242.ckpt"
HPARAMS_PATH =  BASE_PATH + "hparams.yaml"

### Load data

In [2]:
# Load pytorch-lightning experiment args
with open(HPARAMS_PATH, "r") as hparams_file:
   lightning_config = yaml.load(hparams_file, Loader=yaml.Loader)

dataset = MotionsDataModule(lightning_config)
dataset.prepare_data()
dataset.setup()
print(dataset)

INFO:Dowloading files from https://data.riksdagen.se.
100%|██████████| 1231/1231 [00:00<00:00, 41050.66it/s]
INFO:Loading data from /home/erik/proj/swedish_parliament_motion_summarization/data/downloaded/raw_swe_parl_mot.pkl into pandas dataframe.
INFO:Preprocessing data ...
INFO:Filtered 521 rows with missing values.
INFO:Filtered 7445 texts shorter than 150 characters.
INFO:Filtered 17004 texts based on their title.
INFO:Number of rows remaining: 145100
INFO:Preprocessed data saved to /home/erik/proj/swedish_parliament_motion_summarization/data/downloaded/prepped_training_data.feather
INFO:Using 145100 of 145100 examples.
INFO:Data split sizes -- train: 108825, val: 21765, test: 14510


<motion_title_generator.data.motions_data_module.MotionsDataModule object at 0x7f37997e8a60>


### Load model

In [3]:
# Instanciate model to pass to lit_model
model = t5.MT5(data_config={}, args=lightning_config)

lit_model = MT5LitModel.load_from_checkpoint(
    checkpoint_path=MODEL_PATH,
    model=model,
)
lit_model.eval()

tokenizer = MT5Tokenizer.from_pretrained(model.model_name)

Lightning automatically upgraded your loaded checkpoint from v1.6.4 to v2.0.4. To apply the upgrade to your files permanently, run `python -m lightning.pytorch.utilities.upgrade_checkpoint --file ../training/logs/lightning_logs/version_1/checkpoints/epoch=009-val_loss=1.242.ckpt`


### Generate title

In [4]:
def summarize(model, text, tokenizer, text_max_num_tokens, title_max_num_tokens):
    model.model.to("cpu")
    text_encoding = encode(text, tokenizer, text_max_num_tokens)
    return generate(model.model, tokenizer, text_encoding, title_max_num_tokens)

In [8]:
def show_sample_pred(tokenizer):
    sample_index = random.randint(0, len(dataset.data_test.data))
    text = dataset.data_test.data[sample_index]
    true_summary = dataset.data_test.targets[sample_index]
    
    model_summary = summarize(
        model=model,
        text=text,
        tokenizer=tokenizer,
        text_max_num_tokens=512,
        title_max_num_tokens=64
    )
    print("Motion text:")
    print(50*"-")
    print(text[:500])
    print(50*"-")
    print("Actual title:")
    print(true_summary)
    print(50*"-")
    print("Predicted title:")
    print(model_summary)
    print(50*"-")

show_sample_pred(lit_model.tokenizer)

Motion text:
--------------------------------------------------
I delgivningskungörelsen den 29 juni 1970 anges i 4 § att de myndigheter som finns upptagna i bilaga 2 till kungörelsen får anlita postverket för särskild postdelgivning. I förteckningen finns övervakningsnämnd icke angiven, och nämnden saknar således möjlighet att använda särskild postdelgivning. Övervakningsnämnds arbete skulle underlättas och effektiviteten avsevärt öka, om nämnden ägde rätt att i ärende som den handlägger delge den dömde medelst särskild postdelgivning. Med hänvisning till d
--------------------------------------------------
Actual title:
om rätt för övervakningsnämnd att anlita postverket för särskild postdelgivning.
--------------------------------------------------
Predicted title:
angående postverket för särskild postdelgivning.
--------------------------------------------------
