In [None]:
#!pip install -qqq --upgrade bitsandbytes transformers peft accelerate datasets trl flash_attn
#!pip install huggingface_hub


In [None]:
#!pip install wandb -qqq

In [None]:
#import wandb
#wandb.login()

In [16]:
import torch
from datasets import load_dataset, concatenate_datasets

from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    AutoModelForSequenceClassification,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline
)
from trl import SFTTrainer

In [8]:
# 'hf_adapter_repo' is a variable that holds the repository name for the Hugging Face model adapter.

# 'edumunozsala/phi-3-mini-QLoRA' is the repository name, where 'edumunozsala' is the username of the repository owner and 'phi-3-mini-QLoRA' is the name of the model adapter.

# 'model_name, hf_adapter_repo, compute_dtype' is a line of code that returns the values of the 'model_name', 'hf_adapter_repo', and 'compute_dtype' variables.

# This block of code is used to set the repository name for the Hugging Face model adapter and then return the values of the 'model_name', 'hf_adapter_repo', and 'compute_dtype' variables.
hf_adapter_repo = "dhanishetty/albert-xxlarge-v2-Adapters_2"
model_name = "albert/albert-xxlarge-v2"


model_name, hf_adapter_repo

('albert/albert-xxlarge-v2', 'dhanishetty/albert-xxlarge-v2-Adapters_2')

In [10]:
# 'peft_model_id' and 'tr_model_id' are variables that hold the identifiers for the PEFT model and the transformer model, respectively.

# 'AutoModelForCausalLM.from_pretrained(tr_model_id, trust_remote_code=True, torch_dtype=compute_dtype)' is a function that loads a pre-trained transformer model for causal language modeling. 'tr_model_id' is the identifier for the pre-trained model, 'trust_remote_code=True' allows the execution of code from the model file, and 'torch_dtype=compute_dtype' sets the data type for the PyTorch tensors.

# 'PeftModel.from_pretrained(model, peft_model_id)' is a function that loads a pre-trained PEFT model. 'model' is the transformer model and 'peft_model_id' is the identifier for the pre-trained PEFT model.

# 'model.merge_and_unload()' is a method that merges the PEFT model with the transformer model and then unloads the PEFT model.

# This block of code is used to load a pre-trained transformer model and a pre-trained PEFT model, merge the two models, and then unload the PEFT model.
peft_model_id = hf_adapter_repo
tr_model_id = model_name

model = AutoModelForSequenceClassification.from_pretrained(tr_model_id, trust_remote_code=True, torch_dtype="auto", num_labels = 3)
model = PeftModel.from_pretrained(model, peft_model_id)
model = model.merge_and_unload()

Some weights of AlbertForSequenceClassification were not initialized from the model checkpoint at albert/albert-xxlarge-v2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
# 'tokenizer' is a variable that holds the tokenizer.

# 'AutoTokenizer.from_pretrained(peft_model_id)' is a function from the Hugging Face Transformers library that loads a pre-trained tokenizer. 'peft_model_id' is the identifier for the pre-trained tokenizer.

# This line of code is used to load a pre-trained tokenizer.
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)

tokenizer_config.json:   0%|          | 0.00/1.25k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

In [12]:

# 'hf_model_repo' is a variable that holds the repository name for the Hugging Face model.

# This line of code is used to reference the repository name for the Hugging Face model.
hf_model_repo = "dhanishetty/albert-xxlarge-v2-Merged"

In [13]:
# 'merged_model_id' is a variable that holds the identifier for the merged model.

# 'hf_model_repo' is the repository name for the Hugging Face model.

# 'model.push_to_hub(merged_model_id)' is a method that pushes the merged model to the Hugging Face Model Hub. 'merged_model_id' is the identifier for the merged model.

# 'tokenizer.push_to_hub(merged_model_id)' is a method that pushes the tokenizer to the Hugging Face Model Hub. 'merged_model_id' is the identifier for the tokenizer.

# This block of code is used to save the merged model and the tokenizer to the Hugging Face Model Hub.
# SAve the model merged to the Hub
merged_model_id = hf_model_repo
model.push_to_hub(merged_model_id, token= "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")
tokenizer.push_to_hub(merged_model_id, token= "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")

model.safetensors:   0%|          | 0.00/890M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


CommitInfo(commit_url='https://huggingface.co/dhanishetty/albert-xxlarge-v2-Merged/commit/b0c16d431f4e58109787ae124c34db2b5de47bf6', commit_message='Upload tokenizer', commit_description='', oid='b0c16d431f4e58109787ae124c34db2b5de47bf6', pr_url=None, pr_revision=None, pr_num=None)

In [14]:
id2label = {0: 'Negative', 1:'Neutral', 2:'Positive'}
label2id = {'Negative':0, 'Neutral':1, 'Positive':2}

#generate classification model from model_checkpoints
model = AutoModelForSequenceClassification.from_pretrained(
           merged_model_id, num_labels = 3, id2label = id2label, label2id = label2id)

config.json:   0%|          | 0.00/1.07k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/890M [00:00<?, ?B/s]

In [15]:
tokenizer = AutoTokenizer.from_pretrained(merged_model_id, add_prefix_space=True)

tokenizer_config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.27M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.02k [00:00<?, ?B/s]

In [17]:
dataset_1 = load_dataset("mteb/tweet_sentiment_extraction")

dataset_2 = dataset_1['train']
dataset_3 = dataset_1['test']

dataset = concatenate_datasets([dataset_2, dataset_3])

dataset = dataset.shuffle()

In [19]:
dataset

Dataset({
    features: ['id', 'text', 'label', 'label_text'],
    num_rows: 31015
})

In [24]:
print("Untrained model predictions:")
print("----------------------------")
x =0
while x <=30:
    # tokenize text
    inputs = tokenizer.encode(dataset['text'][x], return_tensors="pt")
    # compute logits
    logits = model(inputs).logits
    # convert logits to label
    predictions = torch.argmax(logits)
    print( (dataset['text'][x]), (dataset['label'][x]),  id2label[predictions.tolist()])
    x = x+1

Untrained model predictions:
----------------------------
spending a `girly` day wif my four big sisters! shopping. pedicures and dyeing eachother`s hair. ohh yeah 1 Neutral
Baby baby, it`s gonna be all right when I`m by your side and the whole world turns against you... I ? it when my Ipod randoms BSB songs 1 Neutral
apple fritters are like the perfect after food...don`t you think? 2 Positive
 Sometimes I wish Twitter had Facebook`s 'like' just so I could give you a thumbs up. Great job either way  - I suck too btw 2 Neutral
Just getting an additional free day 1 Neutral
_ I had curry shrimp yesterday lol I love seafood 2 Positive
 I have my 830 class  **** school! what time you work wed? if jade comes ima have to get her in antioch at amtrak. 0 Neutral
 and added it to his collection. i was too much of a punk **** to ask for it back. 0 Negative
just got home from Tillie`s dance recital. lol, it was spectacular! 2 Positive
has plopped TWICE! TWICE!  http://plurk.com/p/svs0y 0 Neutral
 