In [1]:
#import torch and cuda

import torch
import torch.nn.functional as F

print("Number of GPU: ", torch.cuda.device_count())
print("GPU Name: ", torch.cuda.get_device_name())

if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)} is available.")
else:
    print("No GPU available. Training will run on CPU.")

Number of GPU:  1
GPU Name:  NVIDIA GeForce GTX 1650
GPU: NVIDIA GeForce GTX 1650 is available.


In [2]:
#Configure device: Set the device configuration to GPU using the torch.device class in PyTorch:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
##test fine tune

#load
from datasets import load_dataset
imdb = load_dataset("imdb")


In [4]:
#preprocess

from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert/distilbert-base-uncased")

In [5]:
def preprocess_data(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=128)

tokenized_datasets = imdb.map(preprocess_data, batched=True)

In [6]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

In [7]:
import evaluate

accuracy = evaluate.load("accuracy")

In [8]:
import numpy as np


def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy.compute(predictions=predictions, references=labels)

In [9]:
id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

In [10]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert/distilbert-base-uncased", num_labels=2, id2label=id2label, label2id=label2id
)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [11]:
from transformers import DataCollatorWithPadding

# Adjust training arguments for GTX 1650
training_args = TrainingArguments(
    output_dir="my_awesome_model",          # Directory to save the model
    learning_rate=3e-5,                     # Fine-tuning learning rate
    per_device_train_batch_size=8,          # Reduce batch size for GPU memory
    per_device_eval_batch_size=8,           # Reduce evaluation batch size
    num_train_epochs=1,                     # Number of epochs (adjust if needed)
    weight_decay=0.01,                      # Weight decay
    eval_strategy="epoch",                  # Evaluate at the end of each epoch
    save_strategy="epoch",                  # Save checkpoint at the end of each epoch
    load_best_model_at_end=True,            # Load the best model
    push_to_hub=False,                      # Disable pushing to the Hugging Face Hub
    fp16=True,                              # Enable mixed precision training
    gradient_accumulation_steps=2,          # Accumulate gradients to simulate larger batch size
    logging_dir="./logs",                   # Directory for logs
    logging_steps=50,                       # Adjust logging frequency
)

# Data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Use a smaller dataset for quick experimentation
small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(4000))
small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))

# Define trainer with optimizations
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=small_train_dataset,     # Use smaller training dataset
    eval_dataset=small_eval_dataset,       # Use smaller evaluation dataset
    tokenizer=tokenizer,                   # Required for Trainer
    data_collator=data_collator,           # Dynamic padding
    compute_metrics=compute_metrics,       # Custom metrics (accuracy, etc.)
)

# Start training
trainer.train()


  trainer = Trainer(


  0%|          | 0/250 [00:00<?, ?it/s]

{'loss': 0.5851, 'grad_norm': 8.399691581726074, 'learning_rate': 2.4120000000000003e-05, 'epoch': 0.2}
{'loss': 0.4185, 'grad_norm': 10.592251777648926, 'learning_rate': 1.824e-05, 'epoch': 0.4}
{'loss': 0.4138, 'grad_norm': 14.494620323181152, 'learning_rate': 1.236e-05, 'epoch': 0.6}
{'loss': 0.3846, 'grad_norm': 15.497050285339355, 'learning_rate': 6.36e-06, 'epoch': 0.8}
{'loss': 0.4067, 'grad_norm': 8.180569648742676, 'learning_rate': 3.6e-07, 'epoch': 1.0}


  0%|          | 0/125 [00:00<?, ?it/s]

{'eval_loss': 0.3664637804031372, 'eval_accuracy': 0.836, 'eval_runtime': 37.0818, 'eval_samples_per_second': 26.967, 'eval_steps_per_second': 3.371, 'epoch': 1.0}
{'train_runtime': 485.838, 'train_samples_per_second': 8.233, 'train_steps_per_second': 0.515, 'train_loss': 0.4417188491821289, 'epoch': 1.0}


TrainOutput(global_step=250, training_loss=0.4417188491821289, metrics={'train_runtime': 485.838, 'train_samples_per_second': 8.233, 'train_steps_per_second': 0.515, 'total_flos': 132467398656000.0, 'train_loss': 0.4417188491821289, 'epoch': 1.0})

In [None]:
eval_results = trainer.evaluate()

In [None]:
trainer.save_model("my_awesome_model")

In [None]:
text = "I love watching rent a girlfriend because the character especially Chizuru Ichinose is beautiful, and elegant"

In [None]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis", model="my_awesome_model")
classifier(text)

In [None]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("my_awesome_model")
inputs = tokenizer(text, return_tensors="pt")



In [None]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained("my_awesome_model")
with torch.no_grad():
    logits = model(**inputs).logits

In [None]:
predicted_class_id = logits.argmax().item()
model.config.id2label[predicted_class_id]

In [None]:
from datasets.utils.logging import get_logger
import os

# Get the cache directory
cache_dir = os.getenv('HF_DATASETS_CACHE', os.path.expanduser("~/.cache/huggingface/datasets"))
print("Hugging Face datasets cache directory:", cache_dir)

In [None]:
import os
from transformers import AutoTokenizer

# Retrieve the cache directory used by Hugging Face
cache_dir = os.getenv('HF_HOME', os.path.expanduser("~/.cache/huggingface/transformers"))
print("Cache directory:", cache_dir)
