In [1]:
from pathlib import Path
import pandas as pd

BASE_DIR = Path.cwd()   # current notebook directory
DATA_PATH = BASE_DIR / "data" / "feedback_dataset.csv"

df = pd.read_csv(DATA_PATH)
df.head()


Unnamed: 0,text,label
0,The instructor explains concepts clearly,positive
1,The module is confusing and rushed,negative
2,The course is okay,neutral
3,I really enjoyed the interactive sessions,positive
4,The pace of the course is too fast,negative


In [2]:
import torch
print(torch.cuda.is_available())

False


In [3]:
import torch, transformers, accelerate, ipywidgets

print("Torch:", torch.__version__)
print("Transformers:", transformers.__version__)
print("Accelerate:", accelerate.__version__)
print("ipywidgets:", ipywidgets.__version__)


Torch: 2.10.0+cpu
Transformers: 5.1.0
Accelerate: 1.12.0
ipywidgets: 8.1.8


In [4]:
label_map = {"negative": 0, "neutral": 1, "positive": 2}
df["label"] = df["label"].map(label_map)


In [5]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")



In [6]:
def tokenize_data(texts):
    return tokenizer(
        texts,
        padding="max_length",
        truncation=True,
        max_length=128
    )


In [7]:
from sklearn.model_selection import train_test_split

train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["text"].tolist(),
    df["label"].tolist(),
    test_size=0.2
)



In [8]:
import torch

class FeedbackDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels):
        self.encodings = tokenize_data(texts)
        self.labels = labels

    def __getitem__(self, idx):
        item = {k: torch.tensor(v[idx]) for k, v in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = FeedbackDataset(train_texts, train_labels)
val_dataset = FeedbackDataset(val_texts, val_labels)


In [9]:
from transformers import AutoModelForSequenceClassification

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased-finetuned-sst-2-english", 
    num_labels=3, 
    ignore_mismatched_sizes=True
)

Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]

[1mDistilBertForSequenceClassification LOAD REPORT[0m from: distilbert-base-uncased-finetuned-sst-2-english
Key               | Status   |                                                                                       
------------------+----------+---------------------------------------------------------------------------------------
classifier.weight | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([2, 768]) vs model:torch.Size([3, 768])
classifier.bias   | MISMATCH | Reinit due to size mismatch - ckpt: torch.Size([2]) vs model:torch.Size([3])          

[3mNotes:
- MISMATCH[3m	:ckpt weights were loaded, but they did not match the original empty weight shapes.[0m


In [10]:
import accelerate
import transformers

print(accelerate.__version__)  # should be >=0.26.0
print(transformers.__version__)  # ideally >=4.30
print(transformers.__file__)

1.12.0
5.1.0
c:\Users\Gabriel Esperanza\Documents\Projects\UPang-Student-Feedback-and-Module-Evaluation-System\.venv\Lib\site-packages\transformers\__init__.py


In [11]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    save_strategy="epoch",           # Save every epoch to find the best one
    learning_rate=1e-5,              # Slightly faster learning
    per_device_train_batch_size=8,   # Smaller batches for small data
    num_train_epochs=15,              # More rounds to learn patterns
    weight_decay=0.1,                # Stronger protection against memorization
    load_best_model_at_end=True,     # Keep the best version
    metric_for_best_model="f1",      # Optimize for balance, not just accuracy
    logging_steps=10                 # Log more often since the dataset is small
)

print(training_args)

TrainingArguments(
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=True,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
do_eval=True,
do_predict=False,
do_train=False,
enable_jit_checkpoint=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=IntervalStrategy.EPOCH,
eval_use_gather_object=Fals

In [12]:
import numpy as np
from sklearn.metrics import f1_score, accuracy_score

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    # Convert the model's raw output (logits) into predicted labels (0, 1, or 2)
    predictions = np.argmax(logits, axis=-1)
    
    # Calculate F1 and Accuracy
    # 'weighted' average is best here because it handles the slight imbalance in your data
    f1 = f1_score(labels, predictions, average='weighted')
    acc = accuracy_score(labels, predictions)
    
    return {"f1": f1, "accuracy": acc}

In [13]:
from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics  # ADD THIS LINE
)

trainer.train()

  super().__init__(loader)


Epoch,Training Loss,Validation Loss,F1,Accuracy
1,1.181045,0.865424,0.835586,0.837838
2,0.829589,0.603857,0.91486,0.918919
3,0.645596,0.466287,0.945045,0.945946
4,0.422502,0.363461,0.945045,0.945946
5,0.288751,0.286917,0.945045,0.945946
6,0.188513,0.247784,0.945045,0.945946
7,0.0908,0.198986,0.918203,0.918919
8,0.061154,0.163816,0.945045,0.945946
9,0.040881,0.167765,0.918203,0.918919
10,0.039217,0.158343,0.918203,0.918919


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

  super().__init__(loader)


Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

There were missing keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.weight', 'distilbert.embeddings.LayerNorm.bias'].
There were unexpected keys in the checkpoint model loaded: ['distilbert.embeddings.LayerNorm.beta', 'distilbert.embeddings.LayerNorm.gamma'].


TrainOutput(global_step=285, training_loss=0.24832109526584023, metrics={'train_runtime': 357.5302, 'train_samples_per_second': 6.125, 'train_steps_per_second': 0.797, 'total_flos': 72527194160640.0, 'train_loss': 0.24832109526584023, 'epoch': 15.0})

In [14]:
from sklearn.metrics import classification_report
import numpy as np

predictions = trainer.predict(val_dataset)
y_pred = np.argmax(predictions.predictions, axis=1)

print(classification_report(val_labels, y_pred))


  super().__init__(loader)


              precision    recall  f1-score   support

           0       0.93      1.00      0.97        14
           1       0.91      0.91      0.91        11
           2       1.00      0.92      0.96        12

    accuracy                           0.95        37
   macro avg       0.95      0.94      0.94        37
weighted avg       0.95      0.95      0.95        37



In [15]:
# 1. Save the model using the standard format (avoids the Windows lock)
model.save_pretrained("sentiment_model_final", safe_serialization=False)

# 2. Save the tokenizer
tokenizer.save_pretrained("sentiment_model_final")

print("Model saved successfully in 'sentiment_model_final'!")

Writing model shards:   0%|          | 0/1 [00:00<?, ?it/s]

Model saved successfully in 'sentiment_model_final'!


In [16]:
import os
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# 1. Use a relative path for portability
model_path = "./sentiment_model_final"

# Verify path exists
if not os.path.exists(model_path):
    print(f"Directory NOT found: {model_path}")
    print("Please check if 'sentiment_model_final' is the correct folder name.")
else:
    # 2. Load model and tokenizer
    # Adding local_files_only=True ensures it doesn't try to search the internet
    model = AutoModelForSequenceClassification.from_pretrained(model_path, local_files_only=True)
    tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)

    # 3. Create pipeline
    classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

    # 4. Test sentences
    test_sentences = [
        "The class was absolutely wonderful and the examples were clear!",
        "I'm not sure how I feel about this module, it was okay.",
        "This was the best course I have ever taken, the instructor was a genius!"
    ]

    # 5. Get and print predictions
    results = classifier(test_sentences)
    for text, result in zip(test_sentences, results):
        print(f"Text: {text}")
        print(f"Prediction: {result['label']} (Confidence: {result['score']:.2f})\n")

Loading weights:   0%|          | 0/104 [00:00<?, ?it/s]

Text: The class was absolutely wonderful and the examples were clear!
Prediction: LABEL_2 (Confidence: 0.98)

Text: I'm not sure how I feel about this module, it was okay.
Prediction: LABEL_1 (Confidence: 0.97)

Text: This was the best course I have ever taken, the instructor was a genius!
Prediction: LABEL_2 (Confidence: 0.97)



In [17]:
from transformers import pipeline

# Load a zero-shot classification pipeline
theme_classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")

candidate_labels = ["teaching clarity", "course workload", "module materials", "instructor engagement"]

def analyze_themes(text):
    result = theme_classifier(text, candidate_labels)
    # Returns the top category based on confidence
    return result['labels'][0]

config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Loading weights:   0%|          | 0/515 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]