In [2]:
!pip install transformers
!pip install datasets

Collecting datasets
  Downloading datasets-2.19.1-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xxhash, dill, multiprocess, datasets
Successfully installed datasets

#Part1

In [None]:
#Zero Shot Prompting
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import torch
from sklearn.metrics import accuracy_score

model_name = "SmartGitiCorp/Persian_Llama3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
dataset = load_dataset("multi_nli", split='validation_matched[:10%]').shuffle(seed=42)

def zero_shot_classification(premise, hypothesis, labels=['true', 'false', 'neither']):
    labeled_hypotheses = [f"{hypothesis} {label}." for label in labels]
    pairs = [(premise, labeled_hypothesis) for labeled_hypothesis in labeled_hypotheses]

    inputs = tokenizer(pairs, padding=True, truncation=True, return_tensors="pt", max_length=512)
    with torch.no_grad():
        outputs = model(**inputs)
    logits = outputs.logits
    probabilities = torch.softmax(logits, dim=1)
    highest_prob_index = torch.argmax(probabilities, dim=1).cpu().numpy()
    return labels[highest_prob_index[0]]

correct_labels = []
predicted_labels = []

for sample in dataset:
    premise = sample['premise']
    hypothesis = sample['hypothesis']
    gold_label = sample['label']
    label_map = {0: 'false', 1: 'true', 2: 'neither'}
    gold_label_text = label_map[gold_label]

    prediction = zero_shot_classification(premise, hypothesis)

    correct_labels.append(gold_label_text)
    predicted_labels.append(prediction)
accuracy = accuracy_score(correct_labels, predicted_labels)
print("Accuracy on multi_nli validation_matched subset: {:.2f}%".format(accuracy * 100))

In [None]:
# One-shot Prompting
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from datasets import load_dataset
import torch

model_name = "SmartGitiCorp/Persian_Llama3"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
dataset = load_dataset("multi_nli", split='validation_matched[:1%]').shuffle(seed=42)

def one_shot_classification(base_premise, base_hypothesis, base_label, new_premise, new_hypothesis):
    prompt = f"Example: '{base_premise}' is to '{base_hypothesis}' as '{base_label}'.\n"
    prompt += f"Question: '{new_premise}' is to '{new_hypothesis}' as"
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=512).to('cuda')
    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    predicted_label_index = torch.argmax(logits, dim=1).cpu().numpy()[0]
    return ["false", "true", "neither"][predicted_label_index]

base_sample = dataset[0]
base_premise = base_sample['premise']
base_hypothesis = base_sample['hypothesis']
base_label = "true"
new_sample = dataset[1]
new_premise = new_sample['premise']
new_hypothesis = new_sample['hypothesis']

result = one_shot_classification(base_premise, base_hypothesis, base_label, new_premise, new_hypothesis)
print(f"Given '{new_premise}' is to '{new_hypothesis}', the relationship is: {result}")

# Part2

In [None]:
import torch
from torch import nn
from transformers import LlamaModel, LlamaConfig, LlamaForSequenceClassification
from transformers import Trainer, TrainingArguments
from datasets import load_dataset

class QLoRaLayer(nn.Module):
    def __init__(self, config):
        super(QLoRaLayer, self).__init__()
        self.rank = 16
        self.query = nn.Linear(config.hidden_size, self.rank, bias=False)
        self.key = nn.Linear(self.rank, config.hidden_size, bias=False)

    def forward(self, hidden_states):
        low_rank = self.query(hidden_states)
        modified_states = self.key(low_rank)
        return hidden_states + modified_states

class LlamaWithQLoRa(nn.Module):
    def __init__(self, model_name):
        super(LlamaWithQLoRa, self).__init__()
        self.llama = LlamaForSequenceClassification.from_pretrained(model_name)
        self.config = self.llama.config
        self.q_lora_layers = nn.ModuleList([QLoRaLayer(self.config) for _ in range(self.config.num_hidden_layers)])

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.llama(input_ids, attention_mask=attention_mask, labels=labels)
        sequence_output = outputs.hidden_states[0]
        for q_lora_layer in self.q_lora_layers:
            sequence_output = q_lora_layer(sequence_output)

        outputs.hidden_states = (sequence_output,) + outputs.hidden_states[1:]
        return outputs

dataset = load_dataset("multi_nli")
train_set = dataset['train']
val_set = dataset['validation_matched']
model_name = 'SmartGitiCorp/Persian_Llama3'
model = LlamaWithQLoRa(model_name)

for param in model.llama.parameters():
    param.requires_grad = False

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    evaluation_strategy="steps",
    logging_dir='./logs',
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=val_set
)

trainer.train()

eval_results = trainer.evaluate()
print(eval_results)

# Part3


In [None]:
import torch
from torch import nn
from transformers import LlamaModel, LlamaConfig, Trainer, TrainingArguments
from datasets import load_dataset

class QLoRaLayer(nn.Module):
    def __init__(self, config):
        super(QLoRaLayer, self).__init__()
        self.rank = 16
        self.query = nn.Linear(config.hidden_size, self.rank, bias=False)
        self.key = nn.Linear(self.rank, config.hidden_size, bias=False)

    def forward(self, hidden_states):
        low_rank = self.query(hidden_states)
        modified_states = self.key(low_rank)
        return hidden_states + modified_states

class LlamaWithLinearAndQLoRa(nn.Module):
    def __init__(self, model_name, num_labels):
        super(LlamaWithLinearAndQLoRa, self).__init__()
        self.llama = LlamaModel.from_pretrained(model_name)
        self.config = self.llama.config
        self.q_lora_layers = nn.ModuleList([QLoRaLayer(self.config) for _ in range(self.config.num_hidden_layers)])
        self.classifier = nn.Linear(self.config.hidden_size, num_labels)

    def forward(self, input_ids, attention_mask=None, labels=None):
        outputs = self.llama(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        sequence_output = outputs.hidden_states[-1]
        for q_lora_layer in self.q_lora_layers:
            sequence_output = q_lora_layer(sequence_output)
        logits = self.classifier(sequence_output[:, 0, :])

        loss = None
        if labels is not None:
            loss_fct = nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))

        return {"loss": loss, "logits": logits}

dataset = load_dataset("multi_nli")
train_set = dataset['train']
val_set = dataset['validation_matched']

model_name = 'SmartGitiCorp/Persian_Llama3'
num_labels = 3
model = LlamaWithLinearAndQLoRa(model_name, num_labels)

for param in model.llama.parameters():
    param.requires_grad = False

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    evaluation_strategy="steps",
    logging_dir='./logs',
    logging_steps=10
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_set,
    eval_dataset=val_set
)

trainer.train()

eval_results = trainer.evaluate()
print(eval_results)