# Load all required libs 

In [1]:
! pip install -q --upgrade pip
! pip install -q transformers[torch]
! pip install -q -U transformers==4.38.2 datasets==2.18.0 evaluate rouge_score peft

import os
from kaggle_secrets import UserSecretsClient

os.environ["HF_TOKEN"] = UserSecretsClient().get_secret("HF_TOKEN")

try: 
    import wandb
    wandb.init(mode='disabled')
except: 
    ...
    
# Check the datasets lib version
# It have to be the latest v.
import datasets 

print(datasets.__version__)

2.18.0


# Load all models

In [9]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

import torch
from torch.nn import DataParallel

class SumModel():
  def __init__(cfg, model_preset, **generation_parameters)->None:
    # requires transformers and peft installed libs

    cfg.model_preset = model_preset
    cfg.generation_params = generation_parameters

    config = PeftConfig.from_pretrained(cfg.model_preset)

    model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
    cfg.tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

    cfg.lora_model = PeftModel.from_pretrained(model, cfg.model_preset)

    cfg.lora_model.print_trainable_parameters()

    cfg.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    cfg.lora_model.model = DataParallel(cfg.lora_model.model)

  def __call__(self, text, **generation_params):
    tokens = self.tokenizer(text, return_tensors = 'pt', truncation=True, padding=True).to(self.device)
    if len(generation_params):
      gen = self.lora_model.generate(**tokens, **generation_params)
    else:
      gen = self.lora_model.generate(**tokens, **self.generation_params)

    return self.tokenizer.batch_decode(gen)

In [3]:
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

pygmalion_config = PeftConfig.from_pretrained("doublecringe123/pygmalion-dialogsum-empathetic_dialogues_llm")

pygmalion_model = AutoModelForCausalLM.from_pretrained(pygmalion_config.base_model_name_or_path)
pygmalion_tokenizer = AutoTokenizer.from_pretrained(pygmalion_config.base_model_name_or_path)

pygmalion_model = PeftModel.from_pretrained(pygmalion_model, "doublecringe123/pygmalion-dialogsum-empathetic_dialogues_llm")

adapter_config.json:   0%|          | 0.00/445 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.49k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.44G [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()


tokenizer_config.json:   0%|          | 0.00/717 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/131 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/205k [00:00<?, ?B/s]

In [28]:
from peft import PeftConfig, PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer

import torch
from torch.nn import DataParallel

class ChatModel():
    @staticmethod
    def join_dialogue(d):
      s = ""

      for message in d:
        if message['role'] == 'user':
          s += f"You: {message['content']}\n"
        else:
          s += f"[CHARACTER]: {message['content']}\n"

      return s

    def __init__(cfg, tokenizer, model, chat_summarization_fn, **generation_parameters)->None:
        # requires transformers and peft installed libs
        cfg.tokenizer = tokenizer
        
        cfg.summ = chat_summarization_fn
        cfg.generation_params = generation_parameters
        
        cfg.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        cfg.model = model.to(cfg.device)

    def __call__(self, d, messages_split:int = 6, **generation_params):
        print("0/1 Summarization process..")
        s_text = self.join_dialogue(d[:-messages_split])
        s = self.summ(s_text)
        
        text = f"*HISTORY: {s}* {self.join_dialogue(d[-messages_split:])}"
        print(f"1/1 Summarized Dialogue: {text}")
        
        tokens = self.tokenizer(text, return_tensors = 'pt', truncation=True, padding=True).to(self.device)
        if len(generation_params):
            gen = self.model.generate(**tokens, **generation_params)
        else:
            gen = self.model.generate(**tokens, **self.generation_params)

        return self.tokenizer.batch_decode(gen)

In [29]:
sum_model = SumModel(model_preset = 'doublecringe123/bardt-large-cnn-dialoguesum-booksum-lora',
                 max_length = 96,
                 min_length = 26,
                 do_sample = True,
                 temperature = 0.9,
                 num_beams = 8,
                 repetition_penalty= 2.)
summ = lambda t: sum_model(t)[0]

generation_params = {
    'max_new_tokens' : 50, 
    'do_sample' : True, 
    'temperature' : 0.9, 
    'repetition_penalty': 2.
}

chat_model = ChatModel(tokenizer = pygmalion_tokenizer, 
                       model = pygmalion_model, 
                       chat_summarization_fn = summ,  
                       **generation_params)

trainable params: 0 || all params: 408,059,904 || trainable%: 0.0


In [30]:
d = [
    {"content": "I just tried this new coffee blend, and it's amazing!", "role": "user"},
    {"content": "That's great to hear! What did you like most about it?", "role": "bot"},
    {"content": "The flavor was so rich and smooth, unlike anything I've tasted before.", "role": "user"},
    {"content": "Sounds like a high-quality blend. Did it have any specific notes or undertones?", "role": "bot"},
    {"content": "Yes, it had a hint of chocolate and caramel, which made it really indulgent.", "role": "user"},
    {"content": "Interesting! I love coffees with those flavor profiles. Where did you get it from?", "role": "bot"},
    {"content": "I got it from a local roastery that specializes in unique blends.", "role": "user"},
    {"content": "Supporting local businesses and enjoying great coffee, what a win-win!", "role": "bot"},
    {"content": "Definitely! I'm always on the lookout for new coffee experiences.", "role": "user"},
    {"content": "Have you tried any other interesting blends recently?", "role": "bot"},
    {"content": "Not yet, but I'm planning to explore more options soon. Any recommendations?", "role": "user"},
    {"content": "I recently tried a Ethiopian Yirgacheffe that had floral and fruity notes, really refreshing.", "role": "bot"},
    {"content": "That sounds amazing! I'll add it to my list of coffees to try. Thanks for the suggestion!", "role": "user"},
    {"content": "You're welcome! Enjoy your coffee adventures!", "role": "bot"},
    {"content": "I definitely will. Coffee is my passion!", "role": "user"},
    {"content": "It's always great to meet a fellow coffee enthusiast. Cheers to delicious brews!", "role": "bot"},
    {"content": "Cheers!", "role": "user"},
    {"content": "Have a wonderful day!", "role": "bot"},
    {"content": "You too!", "role": "user"}
]

chat_model(d)

0/1 Summarization process..


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


1/1 Summarized Dialogue: *HISTORY: </s><s><s>. You tells [CHARACTER] that You just tried a new coffee blend with a hint of chocolate and caramel from a local roastery. You's always on the lookout for new coffee experiences.</s>* [CHARACTER]: You're welcome! Enjoy your coffee adventures!
You: I definitely will. Coffee is my passion!
[CHARACTER]: It's always great to meet a fellow coffee enthusiast. Cheers to delicious brews!
You: Cheers!
[CHARACTER]: Have a wonderful day!
You: You too!





["*HISTORY: </s><s><s>. You tells [CHARACTER] that You just tried a new coffee blend with a hint of chocolate and caramel from a local roastery. You's always on the lookout for new coffee experiences.</s>* [CHARACTER]: You're welcome! Enjoy your coffee adventures!\nYou: I definitely will. Coffee is my passion!\n[CHARACTER]: It's always great to meet a fellow coffee enthusiast. Cheers to delicious brews!\nYou: Cheers!\n[CHARACTER]: Have a wonderful day!\nYou: You too!\n[CHARACTER]: That must be very fine, what makes him? What was like our work out he's still enjoy it may sound different things about his first drink in them are so -le at 12 weeks...ating gets here that when you make me or good thing to get yourself. *at least we can't forget this time I am not happy.* That sounds nice but even their drinks all of people love as they don't know her food are good deal. The best drinks, where an espresso, thanks you have never been going drinking. 3 such M m n<|endoftext|>"]

# need to more training :D