In [5]:
# Install Hugging Face Transformers, Datasets, and PEFT (for parameter-efficient fine-tuning)
!pip install transformers datasets accelerate peft



In [2]:
from datasets import load_dataset

# Load the IMDB dataset
dataset = load_dataset("imdb")

# Check the structure
print(dataset)
print(dataset['train'][0])

DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 25000
    })
    unsupervised: Dataset({
        features: ['text', 'label'],
        num_rows: 50000
    })
})
{'text': 'I rented I AM CURIOUS-YELLOW from my video store because of all the controversy that surrounded it when it was first released in 1967. I also heard that at first it was seized by U.S. customs if it ever tried to enter this country, therefore being a fan of films considered "controversial" I really had to see this for myself.<br /><br />The plot is centered around a young Swedish drama student named Lena who wants to learn everything she can about life. In particular she wants to focus her attentions to making some sort of documentary on what the average Swede thought about certain political issues such as the Vietnam War and race issues in the United States. In between asking politicians and

In [6]:
!pip install --upgrade numpy


Collecting numpy
  Obtaining dependency information for numpy from https://files.pythonhosted.org/packages/66/a3/4139296b481ae7304a43581046b8f0a20da6a0dfe0ee47a044cade796603/numpy-2.2.2-cp311-cp311-win_amd64.whl.metadata
  Using cached numpy-2.2.2-cp311-cp311-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.2.2-cp311-cp311-win_amd64.whl (12.9 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\hp\\anaconda3\\Lib\\site-packages\\~%mpy.libs\\libscipy_openblas64_-caad452230ae4ddb57899b8b3a33c55c.dll'
Consider using the `--user` option or check the permissions.



In [8]:
##Tokenization
from transformers import AutoTokenizer
# Choose a pre-trained model checkpoint
model_checkpoint = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Define a function for tokenization
def tokenize_function(examples):
    return tokenizer(examples["text"], truncation=True, padding=True, max_length=128)

# Apply the tokenizer to the dataset
tokenized_dataset = dataset.map(tokenize_function, batched=True)

# Rename labels column to "labels" if needed for huggingface Trainer compatibility
tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

RuntimeError: Failed to import transformers.models.auto.tokenization_auto because of the following error (look up to see its traceback):
Failed to import transformers.generation.utils because of the following error (look up to see its traceback):
cannot import name 'ComplexWarning' from 'numpy.core.numeric' (C:\Users\hp\anaconda3\Lib\site-packages\numpy\core\numeric.py)

In [4]:
import torch
from transformers import AutoModelForSequenceClassification #automatically loads a pre-trained Transformer model specifically tailored for sequence classification tasks.
from peft import LoraConfig, get_peft_model, TaskType #get_peft_model: A function that wraps your base model with LoRA modules.

# Load the base model
base_model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint, # string referencing the pre-trained model (e.g., "distilbert-base-uncased")
    num_labels=2  # For positive/negative classification
)

# Define LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,  # Sequence Classification
    r=8,                         # LoRA rank
    lora_alpha=32,               # adjusts how the LoRA updates get blended with existing weights.
    lora_dropout=0.1,            # Dropout rate to help prevent overfitting within the LoRA modules.
    bias="none",                 # whether to include bias parameters in LoRA modules
    target_modules=["q_lin", "k_lin", "v_lin", "out_lin"]
)

# Convert the base model to a PEFT model using LoRA
peft_model = get_peft_model(base_model, lora_config)
#peft_model is now the model you’ll train, but only a small fraction of parameters (the LoRA parameters) will be trainable.

# Print trainable parameters for confirmation
trainable_params = 0
all_params = 0
for name, param in peft_model.named_parameters():
    all_params += param.numel()
    if param.requires_grad:
        trainable_params += param.numel()

print(f"Trainable params: {trainable_params} | All params: {all_params} | Trainable%: {100 * trainable_params/all_params:.2f}%")





Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Trainable params: 887042 | All params: 67842052 | Trainable%: 1.31%


In [10]:
!pip install evaluate



In [None]:
##Training Loop Using the Trainer API

In [5]:
from transformers import TrainingArguments, Trainer

# For demonstration, we'll make a small training set
small_train_dataset = tokenized_dataset["train"].shuffle(seed=42).select(range(2000))
small_eval_dataset  = tokenized_dataset["test"].shuffle(seed=42).select(range(1000))

# Define training arguments
training_args = TrainingArguments(
    output_dir="./lora-distilbert-imdb",
    evaluation_strategy="epoch",    #Evaluates the model after each epoch.
    save_strategy="epoch",          #Saves a checkpoint at the end of every epoch
    logging_strategy="steps",       # Logs training metrics every few steps
    logging_steps=50,
    per_device_train_batch_size=8,  #Batch size per device
    per_device_eval_batch_size=8,
    num_train_epochs=2,  #  Number of full passes through the training data.
    weight_decay=0.01,   # A regularization hyperparameter to help prevent overfitting.
    learning_rate=1e-4,  # Step size for the optimizer.
    push_to_hub=False,   # Disable pushing to Hugging Face Hub
    report_to="none"     # Disable logging to W&B
    )

# Define a simple accuracy metric
import evaluate
metric_accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric_accuracy.compute(predictions=predictions, references=labels)

# Initialize the Trainer
trainer = Trainer(
    model=peft_model,
    args=training_args,
    train_dataset=small_train_dataset,
    eval_dataset=small_eval_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)


RuntimeError: Failed to import transformers.trainer because of the following error (look up to see its traceback):
Failed to import transformers.integrations.integration_utils because of the following error (look up to see its traceback):
Failed to import transformers.modeling_tf_utils because of the following error (look up to see its traceback):
module 'tensorflow._api.v2.compat.v2.__internal__' has no attribute 'register_load_context_function'

In [6]:
!pip install --upgrade tensorflow


Collecting keras>=3.5.0 (from tensorflow-intel==2.18.0->tensorflow)
  Obtaining dependency information for keras>=3.5.0 from https://files.pythonhosted.org/packages/fe/cf/aea9087c4d7fafe956a0cc0ff6c3327d10fb8442cda50f992a2186921fa0/keras-3.8.0-py3-none-any.whl.metadata
  Using cached keras-3.8.0-py3-none-any.whl.metadata (5.8 kB)
Collecting numpy<2.1.0,>=1.26.0 (from tensorflow-intel==2.18.0->tensorflow)
  Obtaining dependency information for numpy<2.1.0,>=1.26.0 from https://files.pythonhosted.org/packages/eb/57/3a3f14d3a759dcf9bf6e9eda905794726b758819df4663f217d658a58695/numpy-2.0.2-cp311-cp311-win_amd64.whl.metadata
  Using cached numpy-2.0.2-cp311-cp311-win_amd64.whl.metadata (59 kB)
Using cached keras-3.8.0-py3-none-any.whl (1.3 MB)
Using cached numpy-2.0.2-cp311-cp311-win_amd64.whl (15.9 MB)
Installing collected packages: numpy, keras
  Attempting uninstall: numpy
    Found existing installation: numpy 1.24.4
    Uninstalling numpy-1.24.4:
      Successfully uninstalled numpy-1.2

ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\hp\\anaconda3\\Lib\\site-packages\\~0mpy\\.libs\\libopenblas64__v0.3.21-gcc_10_3_0.dll'
Consider using the `--user` option or check the permissions.



In [None]:
!pip install transformers==4.25.1


In [None]:
!pip uninstall transformers

In [None]:
!pip install transformers