## Model Setup

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

tokenizer = AutoTokenizer.from_pretrained("./models/Llama2-32K/")
model = AutoModelForCausalLM.from_pretrained("./models/Llama2-32K/",
                                            #trust_remote_code = True,
                                             load_in_8bit = True,
                                             device_map="auto",
                                             torch_dtype=torch.float16,
                                            )
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
import torch

pipe = pipeline(
    task="text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.0,
    top_p=0.95,
    repetition_penalty=1.15,
)

local_llm = HuggingFacePipeline(pipeline=pipe)

  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:05<00:00,  2.84s/it]


## Summary Evaluation

In [None]:
%%capture
from datasets import load_dataset
# TEXT | SUMMARY | TITLE
billsum = load_dataset("billsum", split="ca_test")  # testi di lunghezza max = 6000 tokens

In [None]:
dataset = billsum.select(range(0,15))

In [None]:
dataset

Dataset({
    features: ['text', 'summary', 'title'],
    num_rows: 15
})

In [None]:
# Splitto ogni cella contenente il testo in chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document

text_splitter = RecursiveCharacterTextSplitter(chunk_size=3000, chunk_overlap=200)

docs = []
for d in dataset:
  text = d["text"]
  texts = text_splitter.split_text(text)
  doc = [Document(page_content=t) for t in texts]
  docs.append(doc)

In [None]:
# Controllo la lunghezza massima in token dei chunk ottenuti
max=0
for d in docs:
    for dd in d:
        text=dd.page_content
        length = len(tokenizer.tokenize(text))
        if length>max:  max=length
max

667

In [None]:
from langchain.chains.summarize import load_summarize_chain

chain = load_summarize_chain(local_llm,
                             chain_type="map_reduce", # altrimenti "stuff" per passare tutto il testo insieme
                            )

In [None]:
%%time
# Eseguo la chain sui chunk ottenuti
llm_summaries = []

for d in docs:
    summary = chain.run(d)
    llm_summaries.append(summary)

CPU times: user 22min 15s, sys: 0 ns, total: 22min 15s
Wall time: 22min 21s


In [None]:
%%time
# SOLO PER LLM CON INPUT WINDOW GRANDI, eseugo la chain sui testi originali
from langchain.docstore.document import Document
llm_summaries = []

for d in dataset:
    text = d["text"]
    doc = [Document(page_content=text)]
    summary = chain.run(doc)
    llm_summaries.append(summary)

 ## Evaluation con Rouge Score

In [None]:
dataset = dataset.add_column("new_summ", llm_summaries)

In [None]:
import evaluate
import numpy as np

rouge = evaluate.load("rouge")

def compute_metrics(dataset):
    predictions = dataset["new_summ"]
    labels = dataset["summary"]

    result = rouge.compute(predictions=predictions, references=labels, use_stemmer=True)

    prediction_lens = [np.count_nonzero(pred != tokenizer.pad_token_id) for pred in predictions]
    result["gen_len"] = np.mean(prediction_lens)

    return {k: round(v, 4) for k, v in result.items()}

In [None]:
compute_metrics(dataset)

{'rouge1': 0.3107,
 'rouge2': 0.0944,
 'rougeL': 0.1619,
 'rougeLsum': 0.2406,
 'gen_len': 1.0}