In [None]:
!git clone https://github.com/bpuvaca/irony-detection-tar2024.git
%cd irony-detection-tar2024/nemojte/

In [None]:
import Loader

tweets, labels = Loader.parse_dataset(fp="../datasets/iSarcasm/sarcasm_test.csv", remove_hashtags=True, balance=False, dataset_type='train')

In [21]:
import torch


In [24]:

%%capture
major_version, minor_version = torch.cuda.get_device_capability()
print(f"CUDA Major Version: {major_version}")
print(f"CUDA Minor Version: {minor_version}")
print("CUDA version", torch.version.cuda)
print("torch version", torch.__version__)

In [None]:
!pip install unsloth
!pip install transformers
!pip install datasets
!pip install accelerate
!pip install evaluate

In [None]:
from unsloth import FastLanguageModel
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(model_name = "unsloth/llama-3-8b-bnb-4bit", # YOUR MODEL YOU USED FOR TRAINING
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

In [28]:
sarcasm_prompt = """
### Instruction:
Analyze the following tweet to determine if it is sarcastic. For this task, we define sarcasm as {}. Respond with a one-word answer: "Yes" if the tweet is sarcastic, or "No" if it is not.

### Input:
{}

### Response:
{}
"""

sarcasm_definition_cambridge = "the use of remarks that clearly mean the opposite of what they say, made in order to hurt someone's feelings or to criticize something in a humorous way"

In [29]:
def generate_predictions(tweets, definition):
  predictions = []
  for tweet in tweets:
    inputs = tokenizer([sarcasm_prompt.format(definition, tweet, "")], return_tensors = "pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens = 5)
    prediction = tokenizer.batch_decode(outputs)[0]
    predictions.append(prediction)
    print(f"Tweet: {tweet}\nPrediction: {prediction}\n")
  return predictions


In [None]:
predictions = generate_predictions(tweets, sarcasm_definition_cambridge)

for tweet, label, prediction in zip(tweets, labels, predictions):
  print(f"Tweet: {tweet}\nLabel: {label}\nPrediction: {prediction}\n")

In [None]:
import csv

def save_to_csv(tweets, labels, predictions, filename="isarcasm_predictions.csv"):
  with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(['Tweet', 'Label', 'Prediction'])
    for tweet, label, prediction in zip(tweets, labels, predictions):
      writer.writerow([tweet, label, prediction])

save_to_csv(tweets, labels, predictions)