In [None]:
!git clone https://github.com/bpuvaca/irony-detection-tar2024.git
%cd irony-detection-tar2024/nemojte/

In [None]:
!pip install emoji

In [None]:
import Loader

tweets, labels = Loader.parse_dataset(fp="../datasets/iSarcasm/sarcasm_test.csv", remove_hashtags=True, balance=False, dataset_type='train')

In [None]:
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [None]:
from unsloth import FastLanguageModel
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(model_name = "unsloth/llama-3-8b-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [None]:
sarcasm_prompt = """
### Instruction:
Analyze the following tweet to determine if it is sarcastic. For this task, we define sarcasm as {}. Respond with a one-word answer: "Yes" if the tweet is sarcastic, or "No" if it is not.

### Input:
{}

### Response:
{}
"""

sarcasm_definition_cambridge = "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way"

In [None]:
def generate_predictions(tweets, definition):
  predictions = []
  for tweet in tweets:
    inputs = tokenizer([sarcasm_prompt.format(definition, tweet, "")], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 5)
    prediction = tokenizer.batch_decode(outputs)[0]
    predictions.append(prediction)
  return predictions


In [None]:
predictions = generate_predictions(tweets, sarcasm_definition_cambridge)

for tweet, label, prediction in zip(tweets, labels, predictions):
  print(f"Tweet: {tweet}\nLabel: {label}\nPrediction: {prediction}\n")

In [None]:
import csv

def save_to_csv(tweets, labels, predictions, filename="isarcasm_predictions.csv"):
  with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Tweet', 'Label', 'Prediction'])
    for tweet, label, prediction in zip(tweets, labels, predictions):
      writer.writerow([tweet, label, prediction])

save_to_csv(tweets, labels, predictions)