In [26]:
# necessary import statements
import utils
from transformers import AutoTokenizer, TFAutoModelForCausalLM
from rouge_score import rouge_scorer, scoring
import matplotlib.pyplot as plt
import numpy as np
from keras.models import Model, load_model
import pandas as pd
import os


In [27]:
# Load trained transformer model
tf_model = TFAutoModelForCausalLM.from_pretrained('../trained_models/gpt2-summarization/')
tokenizer = AutoTokenizer.from_pretrained('gpt2')

# Constants
TLDR = ' TL;DR '
MAX_LEN = 512

All model checkpoint layers were used when initializing TFGPT2LMHeadModel.

All the layers of TFGPT2LMHeadModel were initialized from the model checkpoint at ../trained_models/gpt2-summarization/.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFGPT2LMHeadModel for predictions without further training.


In [28]:
ah = {} # actual headlines for each dataset
ph = {} # predicted headlines for each dataset

def summarize_article_tf(model, article):
    """ Summarize a given article using the fine-tuned GPT-2 model"""
    tokenized = tokenizer(article, return_tensors="np")
    outputs = model.generate(**tokenized, max_new_tokens=32, pad_token_id=50256)
    return tokenizer.decode(outputs[0])

# Load results_tf if it exists 
if 'results_tf.csv' in os.listdir('../data/'):
    preds_df = pd.read_csv('../data/results_tf.csv')
    preds = {'predicted headline': preds_df['predicted headline'].tolist(), 'actual headline': preds_df['actual headline'].tolist()}
else:
    preds = {'predicted headline': [], 'actual headline': []}

# Generate headlines using the test data
with open('../data/test_data.txt', encoding='utf-8') as f:
    idx = len(preds_df) - 1
    lines = f.readlines()[idx:1000]
    for line in lines:
        idx += 1
        article, actual_headline = line.strip().split(TLDR)
        article = article + TLDR
        ah[idx] = actual_headline
        predicted_headline = summarize_article_tf(tf_model, article).split(TLDR)[1].replace('<|endoftext|>', '.').strip()
        ph[idx] = predicted_headline
        preds['predicted headline'].append(predicted_headline)
        preds['actual headline'].append(actual_headline)
        print(f'{idx}:')
        print(f'\tactual: {actual_headline}')
        print(f'\tpredic: {predicted_headline}')
        if idx % 50 == 0 and idx != 0:
            preds_df = pd.DataFrame.from_dict(preds, orient='columns')
            preds_df.to_csv('../data/results_tf.csv')