In [None]:
!pip install accelerate==0.21.0 \
  bitsandbytes==0.40.2 \
  peft==0.5.0 \
  transformers==4.34.0 \
  sentencepiece


In [None]:
from PIL import Image
import transformers
from transformers import BlipProcessor, BlipForConditionalGeneration, T5ForConditionalGeneration, T5Tokenizer
import numpy as np
import os
import re
import pandas as pd
import gc

from tqdm.auto import tqdm
tqdm.pandas()

import torch
torch.cuda.is_available()

In [2]:
df = pd.read_json('/kaggle/input/messages-dataset/Новая папка/chat_data.json')
path = '/kaggle/input/messages-dataset/Новая папка/stickers'

In [3]:
def clean_text(text):
    text = re.sub(r'[^\w\s.,!?;:"\'-]', '', text)
    text = ' '.join(text.split())
    return text

df['last_words'] = df['last_words'].map(clean_text)

In [4]:
# df['len_words'].hist(bins=40)
# df['len_words'].describe()

In [None]:
img_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
img_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large").to("cuda")

text_model = T5ForConditionalGeneration.from_pretrained('utrobinmv/t5_translate_en_ru_zh_small_1024').to("cuda")
text_tokenizer = T5Tokenizer.from_pretrained('utrobinmv/t5_translate_en_ru_zh_small_1024')

prefix = 'translate to ru: '

In [None]:
def image_captioning(image_path):
  raw_image = Image.open(os.path.join(path, image_path)).convert('RGB')
  inputs = img_processor(raw_image, return_tensors="pt").to("cuda")
  out = img_model.generate(**inputs)
  text = img_processor.decode(out[0], skip_special_tokens=True)

  src_text = prefix + text
  input_ids = text_tokenizer(src_text, return_tensors="pt")
  generated_tokens = text_model.generate(**input_ids.to("cuda"))
  result = text_tokenizer.batch_decode(generated_tokens, skip_special_tokens=True)
  return result[0]

df['caption'] = df['sticker_name'].progress_apply(image_captioning)
df

In [27]:
# gc.collect()
# del img_model
# del img_processor
# del text_model
# del text_tokenizer

In [12]:
# !pip install -q transformers accelerate

In [None]:
pip install peft

In [None]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

MODEL_NAME = "IlyaGusev/saiga_mistral_7b"
DEFAULT_MESSAGE_TEMPLATE = "<s>{role}\n{content}</s>"
DEFAULT_RESPONSE_TEMPLATE = "<s>bot\n"
DEFAULT_SYSTEM_PROMPT = "Ты генерируешь по набору диалогов связный промпт для последующей генерации картинок"

In [None]:
messages = [{"role": "system", "content": DEFAULT_SYSTEM_PROMPT}]
for i, row in df.iterrows():
    messages.append({"role": "user", "content": row['last_words']})
    messages.append({"role": "bot", "content": row['caption']})

In [None]:
class Conversation:
    def __init__(
        self,
        message_template=DEFAULT_MESSAGE_TEMPLATE,
        response_template=DEFAULT_RESPONSE_TEMPLATE
    ):
        self.message_template = message_template
        self.response_template = response_template
        self.messages = messages

    def add_user_message(self, message):
        self.messages.append({
            "role": "user",
            "content": message
        })

    def get_prompt(self, tokenizer):
        final_text = ""
        for message in self.messages:
            message_text = self.message_template.format(**message)
            final_text += message_text
        final_text += DEFAULT_RESPONSE_TEMPLATE
        return final_text.strip()


def generate(model, tokenizer, prompt, generation_config):
    data = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
    data = {k: v.to(model.device) for k, v in data.items()}
    output_ids = model.generate(
        **data,
        generation_config=generation_config
    )[0]
    output_ids = output_ids[len(data["input_ids"][0]):]
    output = tokenizer.decode(output_ids, skip_special_tokens=True)
    return output.strip()


config = PeftConfig.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    torch_dtype=torch.float16,
    device_map="auto"
)
model = PeftModel.from_pretrained(
    model,
    MODEL_NAME,
    torch_dtype=torch.float16
)
model.eval()

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
print(generation_config)

In [13]:
for inp in inputs:
    conversation = Conversation()
    prompt = conversation.get_prompt(tokenizer)
    
    output = generate(model, tokenizer, prompt, generation_config)

In [None]:
output, df.iloc[2]['caption']