In [None]:
!pip install bitsandbytes

In [None]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension

In [None]:
summary_input_path = 'prompts/summary_prompts.pkl'
summary_output_path = 'output/summary_medium_ubuntu.pkl'
description_input_path = 'prompts/description_prompts.pkl'
description_output_path = 'output/description_medium_ubuntu.pkl'

In [None]:
# !pip install --upgrade huggingface_hub

from huggingface_hub import login
login("YOUR_KEY")

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, GenerationConfig
import transformers
from tqdm.notebook import tqdm 
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_name = "meta-llama/Llama-2-13b-chat-hf"

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map={"": 0}
)

tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float16,
    device_map="auto",
)

In [None]:
!pip install evaluate

# SUMMARIES

In [None]:
import pickle
    
with open(summary_input_path, 'rb') as f:
    summary_samples = pickle.load(f)

In [None]:
print(summary_samples[2])

In [None]:
summaries = list()

for s in summary_samples:
    
    sequences = pipeline(
          s,
          do_sample=True,
          top_k=1,
          num_return_sequences=1,
          eos_token_id=tokenizer.eos_token_id,
          max_length=4096,
    )
    
    for seq in sequences:
        out = seq['generated_text'][len(s):]
        idx = out.find("[SUMMARY]:") + len("[SUMMARY]:")
        
        summaries.append(out[idx:])
    


In [None]:
with open(summary_output_path, 'wb') as f:
    pickle.dump(summaries, f)

# DESCRIPTIONS

In [None]:
import pickle
    
with open(description_input_path, 'rb') as f:
    desc_samples = pickle.load(f)

In [None]:
print(len(desc_samples))

In [None]:
print(desc_samples[1])

In [None]:
description = list()

for d in desc_samples:
    
    sequences = pipeline(
          d,
          do_sample=True,
          top_k=1,
          num_return_sequences=1,
          eos_token_id=tokenizer.eos_token_id,
          max_length=4096,
    )
    
    for seq in sequences:
        out = seq['generated_text'][len(d):]
        idx = out.find("[DESCRIPTION]:") + len("[DESCRIPTION]:")
        
        description.append(out[idx:])

In [None]:
with open(description_output_path, 'wb') as f:
    pickle.dump(description, f)