In [1]:
# Check cuda visible devices
import torch

print(torch.cuda.is_available())

print(torch.cuda.device_count())

True
1


In [2]:
%load_ext autoreload
%autoreload 2
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

model_id = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GPTQ"
revision = "gptq-4bit-32g-actorder_True" #"gptq-3bit-128g-actorder_True"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)

#quantization_config = BitsAndBytesConfig(
#    load_in_4bit=True,
#    #bnb_4bit_compute_dtype=torch.float16,
#    #llm_int8_enable_fp32_cpu_offload=True,
#)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    #quantization_config=quantization_config,
    device_map="auto",
    revision=revision,
    #llm_int8_enable_fp32_cpu_offload=True,
)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


config.json:   0%|          | 0.00/2.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/27.4G [00:00<?, ?B/s]



In [3]:
model = model.eval()

In [4]:
from IPython.display import display, clear_output, HTML
import textwrap

prompt = """<s>[INST] Traducir del español al español rioplatense.

español: Hola hombre, ¿cómo estás?
[/INST]
¿Qué onda, chabón? ¿Cómo va?
</s>
[INST]
Traducir del español al rioplatense

español: Vaya tío, eres un cabrón
[/INST]
Che pedazo de pelotudo, sos un forro
</s>
[INST]
Traducir del español al rioplatense

español: Tu puedes hacerlo!
[/INST]
"""
inputs = tokenizer(prompt, return_tensors="pt")
next_token = None
new_tokens = []

with torch.no_grad():
    while next_token != tokenizer.eos_token_id:
        inputs = {k: v.to(model.device) for k, v in inputs.items()}
        output = model.generate(**inputs, max_new_tokens=1, pad_token_id=tokenizer.eos_token_id)

        next_token = output[0][-1].item()
        inputs = {
            "input_ids": output,
            "attention_mask": torch.ones_like(output),
        }
        new_tokens.append(next_token)
        text = tokenizer.decode(new_tokens, skip_special_tokens=True)

        #text = "\n".join(textwrap.wrap(text, width=100))
        clear_output(wait=True)
        display(HTML(text))


In [5]:
from datasets import load_dataset

sample_path = "../data/sample_100.csv"

test_ds = load_dataset("csv", data_files=sample_path)["train"]

test_ds

Dataset({
    features: ['id', 'title', 'text', 'context_tweet', 'HATEFUL', 'body', 'CALLS', 'WOMEN', 'LGBTI', 'RACISM', 'CLASS', 'POLITICS', 'DISABLED', 'APPEARANCE', 'CRIMINAL', 'num_hatred', 'text_label', 'prompt', 'pred_cot'],
    num_rows: 100
})

In [6]:
from pysentimiento.preprocessing import preprocess_tweet as pysent_preprocess
from rioplatense_hs.preprocessing import preprocess_tweet, text_to_label, labels
from rioplatense_hs.mixtral import get_prompt

tokenizer.model_max_length = 8192
tokenizer.pad_token_id = tokenizer.eos_token_id
#
def tokenize(example):
    texto = preprocess_tweet(example["text"])

    contexto = pysent_preprocess(
        example["context_tweet"],
        preprocess_hashtags=False,
        demoji=False,
        preprocess_handles=False,
    )
    model_input = get_prompt(
        context=contexto, text=texto
    )
    return tokenizer(model_input, truncation=True)


tokenized_ds = test_ds.map(tokenize, batched=False)
# prompt: Sort tokenized_ds by length of input_ids
tokenized_ds = tokenized_ds.map(lambda x: {"len": len(x["input_ids"])}, batched=False)

sorted_tokenized_ds = tokenized_ds.sort("len")

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

In [7]:
# decode first example

inputs = sorted_tokenized_ds[0]["input_ids"]

text = tokenizer.decode(inputs, skip_special_tokens=True)

text = text.replace("[INST]", "\n[INST]")
text = text.replace("[/INST]", "[/INST]\n")

print(text)


[INST] Determinar si el siguiente mensaje contiene discurso de odio.
Entendemos que hay discurso de odio en el tweet si tiene declaraciones de carácter intenso e irracional de rechazo, enemistad y aborrecimiento contra un individuo o contra un grupo, siendo objetivos de estas expresiones por poseer una característica protegida. Las características protegidas que contemplamos son:

- mujer: mujeres o movimiento feminista
- lgbti: contra gays, lesbianas, transexuales y otras identidades de género
- racismo: inmigrantes, xenofobia, o contra pueblos aborígenes
- clase: personas de bajos recursos o motivos de clase
- discapacidad: discapacidad, problemas de salud mental o de adicción al alcohol u otras drogas
- política: ideología política
- aspecto: aspecto, gordofobia o edad
- criminal: presos o delincuentes comunes

Responder una o varias de las características separadas por coma, o bien "nada" si no hay discurso de odio. Pensar paso a paso la respuesta antes de responder. Finalizar la 

In [8]:
# prompt: Build a dataloader with a Causal LLM collator

from torch.utils.data import DataLoader
from transformers import DataCollatorForLanguageModeling
batch_size = 16
collator = DataCollatorForLanguageModeling(tokenizer, mlm=False)

def collate(inputs):
    attention = [ex["attention_mask"] for ex in inputs]
    input_ids = [ex["input_ids"] for ex in inputs]
    ids = [ex["id"] for ex in inputs]
    return ids, tokenizer.pad({"input_ids": input_ids, "attention_mask": attention}, return_tensors="pt")


dataloader = DataLoader(
    sorted_tokenized_ds,
    batch_size=batch_size,
    collate_fn=collate,
    shuffle=False,
    pin_memory=True,
    num_workers=16,
)


In [10]:
from auto_gptq import exllama_set_max_input_length
model = exllama_set_max_input_length(model, max_input_length=28768)

In [11]:
ids, inputs = next(iter(dataloader))

inputs = {k: v.to(model.device) for k, v in inputs.items()}

outs = model.generate(**inputs, max_new_tokens=512)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [13]:
print(tokenizer.decode(outs[0]))

</s></s></s></s></s></s></s></s></s></s></s></s></s><s> [INST] Determinar si el siguiente mensaje contiene discurso de odio.
Entendemos que hay discurso de odio en el tweet si tiene declaraciones de carácter intenso e irracional de rechazo, enemistad y aborrecimiento contra un individuo o contra un grupo, siendo objetivos de estas expresiones por poseer una característica protegida. Las características protegidas que contemplamos son:

- mujer: mujeres o movimiento feminista
- lgbti: contra gays, lesbianas, transexuales y otras identidades de género
- racismo: inmigrantes, xenofobia, o contra pueblos aborígenes
- clase: personas de bajos recursos o motivos de clase
- discapacidad: discapacidad, problemas de salud mental o de adicción al alcohol u otras drogas
- política: ideología política
- aspecto: aspecto, gordofobia o edad
- criminal: presos o delincuentes comunes

Responder una o varias de las características separadas por coma, o bien "nada" si no hay discurso de odio. Pensar pas

: 

In [None]:
from tqdm.auto import tqdm


for ids, inputs in tqdm(dataloader):
    output = model.generate(**inputs, max_new_tokens=150)

    for k, id in enumerate(ids):
        output_text = tokenizer.decode(output[k], skip_special_tokens=True)

        #with open(path+"/{}.txt".format(id), "w") as f:
        #    f.write(output_text)

    #for _ in range(4):
    #    print("="*80)
    #print(f"contexto: {contexto}")
    #print(f"texto: {texto}")
    #print(output_text)
