In [1]:
import sys

sys.path.insert(0, '..')
import time
from models.clr import CLR
from llm2vec.models import LlamaBiModel
from transformers import AutoTokenizer
from torch.utils.data import DataLoader, random_split
from transformers import Trainer, TrainingArguments, AutoModelForSequenceClassification
from transformers import get_scheduler
from torch.optim import AdamW   
import torch
from tqdm.auto import tqdm
import torch
import json
from torch.utils.data import Dataset
from pathlib import Path
from transformers import AutoTokenizer
from transformers import AutoModelForCausalLM
from dataloader import BiasDataset, custom_collate_fn, TransformModule
from collections import defaultdict
from ollama import chat
from ollama import ChatResponse
from ollama import Client
client = Client(
  host='http://localhost:7000',
  headers={'x-some-header': 'some-value'}
)


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
class CLRTransformModule:
    def __init__(
            self, 
            tokenizer,
            client,
            max_length=512,
            bias_threshold=0.5,
            negative_samples=5,
            positive_samples=1,
            device=None,
        ):
        """
        Args:
        - model_name (str): Model to use as tokenizer and model
        - max_length (int): Maximum length for padding/truncating sequences
        - bias_threshold (float): Threshold to classify a sentence as biased
        """
        self.tokenizer = tokenizer
        self.client = client
        
        if device is None:
            self.device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
        else:
            self.device = device

        self.max_length = max_length
        self.bias_threshold = bias_threshold
        self.negative_samples = negative_samples
        self.positive_samples = positive_samples


    def generate_positive_negative_samples(self, text):
        """
        Generate a more biased (negative) or less biased (positive) version of the input text using a language model

        Args:
            - text: Text of any arbitrary size

        Return:
            - dict: positive_text and negative_text keys with list of tokens as values.
        """
        # print('text', text)
        positive_list = []
        # or access fields directly from the response object
        for i in range(self.positive_samples):
            response = self.client.chat(model='deepseek-r1:8b', messages=[
                {
                    'role': 'system',
                    'content': """
                        Generate a paraphrase of the following article.
                    """,
                },
                {
                    'role': 'user',
                    'content': text,
                }
            ])
            positive_text = response['message']['content']
            print('positive_text', positive_text)
            # positive_text = positive_text.split("Generation begins here.")[-1]
            # print('positive_text', positive_text)
            positive_list.append(positive_text)

        negative_list = []
        for i in range(self.negative_samples):
            response = self.client.chat(model='deepseek-r1:8b', messages=[
                {
                    'role': 'system',
                    'content': """
                        Generate a biased overview of the user article,
                        giving a biased interpretation its ideas and main points.
                    """,
                },
                {
                    'role': 'user',
                    'content': text,
                }
            ])
            negative_text = response['message']['content']
            print('negative_text', negative_text)
            # positive_text = positive_text.split("Generation begins here.")[-1]
            # print('positive_text', positive_text)
            negative_list.append(negative_text)

        return {
            'positive': positive_list,
            'negative': negative_list
        }

    def hierarchical_representation(self, text):
        """
        Create hierarchical representations for scale invariance (e.g., sentence-level, document-level)
        """
        # return {
        #     "word_level": text.split(),
        #     "sentence_level": text.split(". "),
        #     "document_level": [text]
        # }
        return {
            "paragraphs": text.split('\n'),
        }

    def __call__(self, text):
        """
        Main function to call for applying transformations.

        Returns:
            - transformed_data: dictionary of positive and negative examples.
                transformed_data = {
                        "positive_sample": {
                            'input_ids': encoding['input_ids'].squeeze(0),  # ensure this is 2-dim
                            'attention_mask': encoding['attention_mask'].squeeze(0),  # 2-dim also
                            'labels': label_tensor  # 1-dim tensor (also just 1 value ig)
                        },
                        "negative_sample": {
                            'input_ids': encoding['input_ids'].squeeze(0),  # ensure this is 2-dim
                            'attention_mask': encoding['attention_mask'].squeeze(0),  # 2-dim also
                            'labels': label_tensor  # 1-dim tensor (also just 1 value ig)
                        },
                    # "hierarchical_representation": encoded_hierarchy
                }

        """
        # Create positive/negative samples
        samples = self.generate_positive_negative_samples(text)

        # re-encode them into tokens. TODO: not do this
        encoded_samples = {
            "positive": self.tokenizer(samples["positive"], truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt"),
            "negative": self.tokenizer(samples["negative"], truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        }
        
        # Create hierarchical representation (not now)
        # hierarchy = self.hierarchical_representation(text)
        # encoded_hierarchy = {
        #     "word_level": self.tokenizer(" ".join(hierarchy["word_level"]), truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt"),
        #     "sentence_level": self.tokenizer(" ".join(hierarchy["sentence_level"]), truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt"),
        #     "document_level": self.tokenizer(hierarchy["document_level"][0], truncation=True, padding="max_length", max_length=self.max_length, return_tensors="pt")
        # }
        
        # Temporary return format -> TODO: Decide how we want to implement this when loading data
        per_anchor = {
            "positive_sample": encoded_samples["positive"],
            "negative_sample": encoded_samples["negative"],
            # "hierarchical_representation": encoded_hierarchy
        }

        return per_anchor


class BiasDataset(Dataset):
    def __init__(self, root, tokenizer, transforms=None, max_length=512):
        """
        Args:
            data (list of dicts): Each dict contains {'text': str, 'labels': dict}.
            tokenizer: Pretrained tokenizer from Hugging Face.
            max_length (int): Max token length for padding/truncation.
        """
        self.root = root
        self.tokenizer = tokenizer
        self.transforms = transforms
        self.max_length = max_length

        self.extract_data()

    @staticmethod
    def create_toy_dataset():
        '''
        Simulating some sample text data with bias attributes
        '''
        data = [{
            "source": "BBC Politics",
            "title": "International Development Minister Anneliese Dodds quits over aid cuts",
            "text": "International Development Minister Anneliese Dodds has resigned over the prime minister's cuts to the aid budget. In a letter to Sir Keir Starmer, Dodds said the cuts to international aid, announced earlier this week to fund an increase in defence spending, would \"remove food and healthcare from desperate people - deeply harming the UK's reputation\". She told the PM she had delayed her resignation until after his meeting with President Trump, saying it was \"imperative that you had a united cabinet behind you as you set off for Washington\". The Oxford East MP, who attended cabinet despite not being a cabinet minister, said it was with \"sadness\" that she was resigning. She said that while Sir Keir had been clear he was not \"ideologically opposed\" to international development, the cuts were \"being portrayed as following in President Trump's slipstream of cuts to USAID\". Ahead of his trip to meet the US president, Sir Keir announced aid funding would be reduced from 0.5% of gross national income to 0.3% in 2027 in order to fund an increase in defence spending. In his reply to Dodds's resignation letter, the prime minister thanked the departing minister for her \"hard work, deep commitment and friendship\". He said cutting aid was a \"difficult and painful decision and not one I take lightly\" adding: \"We will do everything we can...to rebuild a capability on development.\" In her resignation letter, Dodds said she welcomed an increase to defence spending at a time when the post-war global order had \"come crashing down\". She added that she understood some of the increase might have to be paid for by cuts to ODA [overseas development assistance]. However, she expressed disappointment that instead of discussing \"our fiscal rules and approach to taxation\", the prime minister had opted to allow the ODA to \"absorb the entire burden\". She said the cuts would \"likely lead to a UK pull-out from numerous African, Caribbean and Western Balkan nations - at a time when Russia has been aggressively increasing its global presence\". \"It will likely lead to withdrawal from regional banks and a reduced commitment to the World Bank; the UK being shut out of numerous multilateral bodies; and a reduced voice for the UK in the G7, G20 and in climate negotiations.\" The spending cuts mean \u00a36bn less will be spent on foreign aid each year. The aid budget is already used to pay for hotels for asylum seekers in the UK, meaning the actual amount spend on aid overseas will be around 0.15% of gross national income. The prime minister's decision to increase defence spending came ahead of his meeting in Washington - the US president has been critical of European countries for not spending enough on defence and instead relying on American military support. He welcomed the UK's commitment to spend more, but Sir Keir has been attacked by international development charities and some of his own MPs for the move. Dodds held off her announcement until the prime minister's return from Washington, in order not to overshadow the crucial visit, and it was clear she did not want to make things difficult for the prime minister. But other MPs have been uneasy about the decision, including Labour MP Sarah Champion, who chairs the international development committee, who said that cutting the aid budget to fund defence spending is a false economy that would \"only make the world less safe\". Labour MP Diane Abbott, who had been critical of the cuts earlier in the week, said it was \"shameful\" that other ministers had not resigned along with Dodds. Dodds's resignation also highlights that decisions the prime minister feels he has to take will be at odds with some of the views of Labour MPs, and those will add to tensions between the leadership and backbenchers. In a post on X, Conservative leader Kemi Badenoch said: \"I disagree with the PM on many things but on reducing the foreign aid budget to fund UK defence? He's absolutely right. \"He may not be able to convince the ministers in his own cabinet, but on this subject, I will back him.\" However one of her backbenchers - and a former international development minister - Andrew Mitchell backed Dodds, accusing Labour of trying \"disgraceful and cynical actions\". \"Shame on them and kudos to a politician of decency and principle,\" he added. Liberal Democrat international development spokesperson Monica Harding said Dodds had \"done the right thing\" describing the government's position as \"unsustainable. She said it was right to increase defence spending but added that \"doing so by cutting the international aid budget is like robbing Peter to pay Paul\". \"Where we withdraw our aid, it's Russia and China who will fill the vacuum.\" Deputy Prime Minister Angela Rayner said she was \"sorry to hear\" of Dodds's resignation. \"It is a really difficult decision that was made but it was absolutely right the PM and cabinet endorse the PM's actions to spend more money on defence,\" she said. Dodds first became a Labour MP in 2017 when she was elected to represent the Oxford East constituency. Under Jeremy Corbyn's leadership of the Labour Party she served as a shadow Treasury minister and was promoted to shadow chancellor when Sir Keir took over. Following Labour's poor performance in the 2021 local elections, she was demoted to the women and equalities brief. Since July 2024, she has served as international development minister. Dodds becomes the fourth minister to leave Starmer's government, following Louise Haigh, Tulip Siddiq and Andrew Gwynne. Some Labour MPs are unhappy about Tory defector Natalie Elphicke's past comments. The BBC chairman helped him secure a loan guarantee weeks before the then-PM recommended him for the role, a paper says. The minister is accused of using demeaning and intimidating language towards a civil servant when he was defence secretary. Eddie Reeves has been the Conservatives' leader on Oxfordshire County Council since May 2021. Labour chair Anneliese Dodds demands answers over \u00a3450,000 donation from ex-Tory treasurer Sir Ehud Sheleg. Copyright 2025 BBC. All rights reserved.\u00a0\u00a0The BBC is not responsible for the content of external sites.\u00a0Read about our approach to external linking. ",
            "url": "https://www.bbc.com/news/articles/cpv44982jlgo",
            "score": 43
        },
        {
            "source": "BBC Politics",
            "title": "Donald Trump: UK-US trade deal could mean tariffs 'not necessary'",
            "text": "A trade deal between the US and UK could happen \"very quickly\", President Donald Trump said at a joint press conference with Sir Keir Starmer. Speaking during the prime minister's visit to the White House, Trump envisaged \"a real trade deal\" which could see the UK avoid the kind of tariffs the president has been threatening on some of the US's other trading partners. The trip had been seen as a key moment in Sir Keir's premiership as he sought to influence Trump's decisions on topics including Ukraine, as well as trade. Sir Keir kicked off his White House visit by presenting Trump with a letter from King Charles inviting him to an \"unprecedented\" second state visit to the UK. Receiving the letter in front of cameras in the Oval Office, Trump said it would be a \"great honour\" and described the King as \"a wonderful man\". Sir Keir said the offer of a second state visit was \"truly historic\". Traditionally US presidents have only been given one state visit. Having confirmed he would be accepting the invite, Trump, along with Sir Keir took questions from reporters for 30 minutes. The US president did most of the talking, setting out his stance on many subjects including the possibility of a Ukraine deal and the UK's potential agreement with Mauritius over the Chagos Islands. On the plane to the US, Sir Keir reiterated his willingness to send British troops to Ukraine as part of a peace deal. However, he argued that, without US security guarantees, Russian President Vladimir Putin could re-invade Ukraine. Asked if he would provide such assurances, Trump said a minerals agreement he plans to sign with Ukraine on Friday could provide a \"backstop\". He said \"nobody will play around\" if US workers were in the country, as part of the deal on minerals. The US president was pressed on whether he stood by his accusation that Ukrainian President Volodymyr Zelensky was a \"dictator\". \"Did I say that? I can't believe I said that,\" he said. He later added he had \"a lot of respect\" for Zelensky, who he will host in Washington DC on Friday. The UK's planned agreement with Mauritius over the Chagos Islands was one potential source of tension between the UK and US leaders. However, Trump appeared to back the UK's approach saying he was \"inclined to go along with it\". The deal would see the UK cede sovereignty of the Indian Ocean archipelago, but maintain control over the island of Diego Garcia, which includes a US-UK military airbase, by leasing it back. After taking questions in the Oval Office, the two leaders took part in talks and then held a formal press conference, during which Trump repeatedly spoke about a possible US-UK trade deal which could be agreed \"very quickly\". Referring to an economic, rather than a trade deal, Sir Keir said the UK and US  would begin work on an agreement centred on the potential of artificial intelligence. \"Instead of over-regulating these new technologies, we're seizing the opportunities they offer,\" he said. He said the UK and US had shaped the \"great technological innovations of the last century\" and now had the chance to do the same in the 21st Century. \"Artificial intelligence could cure cancer. That could be a moon shot for our age, and that's how we'll keep delivering for our people,\" he said. Trump has repeatedly threatened to impose tariffs - import taxes - on many of its allies, including 25% on goods made in the European Union. He also ordered a 25% import tax on all steel and aluminium entering the US - which could hit the UK. Asked if Sir Keir had tried to dissuade the president from ordering tariffs against the UK, Trump said: \"He tried.\" \"He was working hard I tell you that. He earned whatever the hell they pay him over there,\" he said. \"I think there's a very good chance that in the case of these two great, friendly countries, I think we could very well end up with a real trade deal where the tariffs wouldn't be necessary. We'll see.\" In a bid to convince the president against UK tariffs, Sir Keir said the US-UK trade relationship was \"fair, balanced and reciprocal\". Since leaving the European Union, successive British leaders have hoped to get a general free trade deal with the US. In his first term as president, Trump said talks about a \"very substantial\" trade deal with the UK were under way. However, negotiations stalled with disagreements over US agricultural exports and UK taxes on tech companies causing problems. The head of trade policy at the British Chambers of Commerce - a former Labour MP and minister - told BBC Radio 4's Today programme on Friday that businesses will be encouraged by what he called an \"important first step\". \"In trade negotiations, relationships matter,\" says William Bain, adding that seeing the two leaders find common ground on their respective economies and trade is \"helpful\". He added that a deal to keep tariffs low would most benefit automotive and pharmaceutical industries in the UK. Copyright 2025 BBC. All rights reserved.\u00a0\u00a0The BBC is not responsible for the content of external sites.\u00a0Read about our approach to external linking. ",
            "url": "https://www.bbc.com/news/articles/c7988r3q1p2o",
            "score": 50
        }]
        return data

    def extract_data(self):
        '''
        Extracts data from root folder
        '''
        if Path(self.root).exists():
            with open(self.root) as file:
                self.data = json.load(file)
        else:
            print(f'Path {self.root} not found')
            self.data = BiasDataset.create_toy_dataset()

    def __len__(self):
        '''
        Custom len function
        '''
        return len(self.data)

    def __getitem__(self, idx):
        '''
        Custom getitem function. Since we're passing this to our own custom modules, no need to follow exact
        huggingface return dictionary format.

        Return:
            dictionary of dictionaries.
            {
                'anchor': {
                    'input_ids': encoding['input_ids'].squeeze(0),  # ensure this is 2-dim
                    'attention_mask': encoding['attention_mask'].squeeze(0),  # 2-dim also
                    'labels': label_tensor  # 1-dim tensor (also just 1 value ig)
                },

                'positive_samples': [{
                    'input_ids': encoding['input_ids'].squeeze(0),  # ensure this is 2-dim
                    'attention_mask': encoding['attention_mask'].squeeze(0),  # 2-dim also
                    'labels': label_tensor  # 1-dim tensor (also just 1 value ig)
                }, ...]   # see how its a list of dicts. length is however many positive samples requested.
                ,

                'negative_samples': [{
                    'input_ids': encoding['input_ids'].squeeze(0),  # ensure this is 2-dim
                    'attention_mask': encoding['attention_mask'].squeeze(0),  # 2-dim also
                    'labels': label_tensor  # 1-dim tensor (also just 1 value ig)
                }, ...]   # see how its a list of dicts. length is however many negative samples requested.
                ,

                TODO: Add scalewise represnetations here??

        
            }

            This comprises one item of the dataset. Then, if user asks for batch_size 2, put each item into a list.
            Our own custom collate will then settle the rest. (probably return as is)
        '''
        item = self.data[idx]
        text = item['text']
        # temp_paragraphs = text.split('\n')

        # Tokenize anchor
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        )[0]

        return_dict = defaultdict(list)
        return_dict['anchor'] = encoding

        # wacky
        if self.transforms:
            # for p in temp_paragraphs:
            #     transformed_dict = self.transforms(p)
            #     for key in transformed_dict:
            #         return_dict[key].append(transformed_dict[key])  # if an error is thrown here, L bro
            transformed_dict = self.transforms(text)
            for key in transformed_dict:
                return_dict[key].append(transformed_dict[key])  # if an error is thrown here, L bro

        else:
            labels = {'bias_score': item['score']}  # Dictionary of multi-dimensional bias attributes
            label_tensor = torch.tensor(list(labels.values()), dtype=torch.float32)
            return_dict['labels'] = label_tensor
        
        with open(f'/mnt/e/NTU-DLWeek2025/model_scripts/CLRdataset/{idx}.txt', 'w') as f:
            f.write(return_dict)

In [4]:
# data_tokenizer = AutoTokenizer.from_pretrained('/mnt/e/NTU-DLWeek2025/gpt2')
# data_tokenizer.pad_token_id = data_tokenizer.eos_token_id
# data_model = AutoModelForCausalLM.from_pretrained('/mnt/e/NTU-DLWeek2025/gpt2')
# data_model.config.pad_token_id = data_model.config.eos_token_id

# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# data_model.to(device)

In [5]:
# # Defining dataset split and dataloaders
# transforms = TransformModule(
#     model=data_model,
#     tokenizer=data_tokenizer,
#     max_length=10000,
#     bias_threshold=0.5,
#     negative_samples=2,
#     positive_samples=1,
# )
# dataset = BiasDataset('/mnt/e/NTU-DLWeek2025/model_scripts/datasets/clean_with_scores.json', data_tokenizer, max_length=512, transforms=transforms)
# train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=custom_collate_fn)

# for batch in train_dataloader:
#     pass


In [6]:
# model = LlamaBiModel.from_pretrained('/mnt/e/NTU-DLWEEK2025/Llama-encoder-1.0B')
# model.config.pad_token_id = model.config.eos_token_id
# device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
# model.to(device)
# print('loaded model')
# clr = CLR(llama_model=model, infonce_reduction='mean')
# print('clr built')
tokenizer = AutoTokenizer.from_pretrained('/mnt/e/NTU-DLWEEK2025/Llama-encoder-1.0B')
tokenizer.pad_token_id = tokenizer.eos_token_id


In [None]:
# Defining dataset split and dataloaders
transforms = CLRTransformModule(
    client=client,
    tokenizer=tokenizer,
    max_length=10000,
    bias_threshold=0.5,
    negative_samples=2,
    positive_samples=1,
)
dataset = BiasDataset('/mnt/e/NTU-DLWeek2025/model_scripts/datasets/clean_with_scores.json', tokenizer, max_length=512, transforms=transforms)
train_dataloader = DataLoader(dataset, batch_size=1, shuffle=True, collate_fn=custom_collate_fn)

for batch in train_dataloader:
    pass


positive_text <think>
Okay, so I need to paraphrase this article about President Trump firing the chairman of the Joint Chiefs and other military officials. Let me read through it carefully first.

The president fired General Charles Q. Brown Jr., which is significant because he's the second African American to hold that position. Then, Trump nominated Dan Caine to replace him, even though Caine isn't as experienced or has held such a high rank before. This shake-up is causing a lot of uncertainty in the military and among politicians.

The article mentions that this is part of a broader purge targeting senior officers, especially those from certain backgrounds like Black or women. It also talks about other agencies like the FBI and CIA preparing for personnel changes, aligning with Trump's stance on reducing what he sees as "woke" officials.

Pentagon Secretary Pete Hegseth is involved in replacing high-ranking officers, and there's a draft list of who might be fired, including Brown 

In [None]:
def custom_collate_fn(batch):
    """
    Custom collate function to handle batching of data with multi-dimensional labels.
    """
    print('batch entering collate_fn', batch)
    # Extract input_ids and attention_mask from the batch
    input_ids = torch.stack([item['input_ids'] for item in batch])
    attention_mask = torch.stack([item['attention_mask'] for item in batch])
    
    # Handle labels (assumes each label is a tensor of the same size)
    labels = torch.stack([item['labels'] for item in batch])
    
    # Return a batch containing padded input_ids, attention_mask, and labels
    return {
        'input_ids': input_ids,
        'attention_mask': attention_mask,
        'labels': labels
    }

In [None]:
# Defining training args
training_args = TrainingArguments(
    output_dir='./model_scripts/pretrain_results',
    num_train_epochs=3,
    per_device_train_batch_size=1,
    save_steps=500,
    save_total_limit=2,
    logging_dir='./model_scripts/pretrain_results/logs',
    logging_steps=100,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    eval_steps=None,
    remove_unused_columns=False,
)

torch.cuda.empty_cache()

# Defining trainer
trainer = Trainer(
    model=clr,
    args=training_args,
    train_dataset=dataset,
    tokenizer=tokenizer,
    data_collator=custom_collate_fn,
)

trainer.train()


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:151643 for open-end generation.


time taken 232.0120759010315
positive_text before yoink “We must chart [a new] path in a world that is rapidly changing,” President Cyril Ramaphosa told the South African people this month in his State of the Nation address. Without mentioning U.S. President Donald Trump or Elon Musk directly, Ramaphosa gave a forceful denunciation of the remarkable swing in U.S. policy toward South Africa since Trump returned to the White House on Jan. 20. Describing the world South Africa must now navigate as characterized by rising nationalism and protectionism, Ramaphosa declared that Pretoria “will not be bullied.”



Ramaphosa’s address came in response to a whiplash-inducing turn from the era of Trump’s predecessor, former President Joe Biden, which has seen the machinery of the U.S. government deployed to advance a misinformation agenda linked to South African white nationalist movements against Pretoria. As part of that turn, last week the U.S. Embassy pledged immediate action on a petition fr

TypeError: expected Tensor as element 0 in argument 0, but got list

In [None]:
train_dataloader = DataLoader(tokenized_train, shuffle=True, batch_size=1)
# model = AutoModelForSequenceClassification.from_pretrained("Llama-encoder-1.0B", num_labels=5)
model = LlamaBiModel.from_pretrained("Llama-encoder-1.0B")
# model.config.pad_token_id = model.config.eos_token_id
optimizer = AdamW(model.parameters(), lr=5e-5)
num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

lora_config = LoraConfig(
        task_type=TaskType.SEQ_CLS,
        r=8,
        lora_alpha=32,
        lora_dropout=0.1,
        # target_modules=["query", "value"]
    )
# print(model)
model = get_peft_model(model, lora_config)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
model.print_trainable_parameters()
progress_bar = tqdm(range(num_training_steps))

# model.train()
# model.model.gradient_checkpointing = True

for epoch in range(num_epochs):
    for batch in train_dataloader:
        preprocess_batch = {}
        for k, v in batch.items():
            # print(k)
            # if k == 'labels':
            #     pass
            # elif isinstance(v, torch.Tensor):
            #     preprocess_batch[k] = v.to(device)
            # elif isinstance(v, list):
            #     preprocess_batch[k] = torch.stack(v, dim=1).to(device)

            if isinstance(v, torch.Tensor):
                preprocess_batch[k] = v.to(device)
            elif isinstance(v, list):
                preprocess_batch[k] = torch.stack(v, dim=1).to(device)

            print(preprocess_batch)
        # print(preprocess_batch)
        # with torch.no_grad():
        outputs = model(**preprocess_batch)

        # last_hidden_states = outputs.last_hidden_state
        # print(last_hidden_states, last_hidden_states.shape)
        # print(outputs.last_hidden_state.shape)
        loss = outputs.loss
        loss.backward()

        optimizer.step()
        lr_scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at Llama-encoder-1.0B and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


trainable params: 1,136,640 || all params: 1,035,659,264 || trainable%: 0.1098


  0%|          | 2/3000 [06:45<168:57:14, 202.88s/it]


{'labels': tensor([4])}
{'labels': tensor([4]), 'input_ids': tensor([[    1,  3118,   310,   278,  1900,  8230,  1078,  1921,  2363,   306,
           505,  3926,  1063,   304,   322,   306,   626,   263,  8230,  1078,
         13524,   577,   306,   505,  1063,   304,   263,  2846,   975,   278,
          2440, 29889,   450,  8693,  4359,   756,   263,  1302,  1537,  4459,
           304,   372,   448,   366,  4459,  5476,   510,   287,   278,   937,
           931,   366,  6686,   964,   278,  2058, 29889,   450, 13925,   338,
         14154,  1532, 16370,   322,  1073,   825,   896,   526,  2599, 29892,
           920,   304,  1959,   596,   883,   856,  3204,   304,   278, 11015,
          9493, 29889,   512, 21499,   727,  2833,   304,   367,   343, 14895,
          7600,  1432,  2908,   448,  6060,   393,   338,  1363,  1784,  1016,
         29915, 29873,  1073,  1048,  8230,  1078,   322,   920,  7795,  5611,
           372,   338, 29973,   306,  1016, 29915, 29873,  1073, 29889



{'labels': tensor([4])}
{'labels': tensor([4]), 'input_ids': tensor([[    1,  1128,   508,   278,  1900,   360,  2960,   262,  3872,  8842,
           306,  3926,  1063,   304,   367,   297, 29715, 29973, 29871,   306,
           505,  2360,  1539,   263,  5121,  4926, 13925, 29889, 29871,  2216,
           763,   278,  1383,   514, 29914,  7858,   297, 12321,  4006, 29892,
          5625, 29876,   366,  2305,   297, 29715,   526,  7575, 29889, 29871,
         15992,   590, 29871,  7612, 26935,   304,   639, 20309,   313, 11884,
          1754,  1854,   372,   471,   577, 11410, 12115,   315,  1745,   267,
         29892,   263,  3761,   372,   471,   577,  1781, 29889, 29871,   306,
          3512,  1250,   363,   901,   313,   974,  3236, 29892,   306, 29915,
         29885,  1401,  1150,   287,   472,  3064, 29897,   322,   263,  8455,
          3614,   975,   746,   372,  2355, 19587, 29892,   541,  1584,   750,
           931,   304, 13563,   304,   592, 29889, 29871, 23350,  1090



{'labels': tensor([2])}
{'labels': tensor([2]), 'input_ids': tensor([[    1, 10791, 29901,  2428,  8444,  4480,   261, 29892,  1781,   282,
           449,   457,   313,  4187,   474,  2355,   941,  2649,   366, 29901,
           474,  1016, 29915, 29873,  1073,   825,   515,   282,   449,   457,
         29889,   541,   474,  3282, 29915, 29873, 24817,  1009, 29879,  1283,
           278,  1591, 29892,  9343,  1073, 29973,   467,  9360, 29991,   322,
          7575, 26552,   289,  1338,   314,   293,   373,   278,  4497,   328,
         29889,   320, 29876, 29905, 29876,  3200, 29901,  1922, 29892, 26072,
           414,   871,  2041,  1532,  2309, 29973,   393,  1838, 29915, 29873,
          2289,   664,   363,   592, 29889,   363,   263,  1532,  2309,  6866,
           914,   372,   471,  3117,   714, 11235,   313, 29873, 28470,   263,
          2586,   763,   727,   471,   373,   291, 22300,  6837,   297,   278,
         27654, 29892,   541,   321, 29882,   511,   541,  2289, 29892

KeyboardInterrupt: 