In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import BertTokenizer, BertForSequenceClassification, AdamW
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import csv
from transformers import BertTokenizer, BertForSequenceClassification
def load_model(model_path):
  # Load the trained model
  model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
  model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
  model.eval()
  return model
def predict(model, tokenizer, text):
  # Tokenize the input text
  encoding = tokenizer.encode_plus(
      text,
      add_special_tokens=True,
      max_length=128,
      return_token_type_ids=False,
      padding='max_length',
      truncation=True,
      return_attention_mask=True,
      return_tensors='pt',
  )

  # Move the input to the same device as the model
  input_ids = encoding['input_ids'].to(model.device)
  attention_mask = encoding['attention_mask'].to(model.device)

  # Perform inference
  with torch.no_grad():
      outputs = model(input_ids, attention_mask=attention_mask)
      _, prediction = torch.max(outputs.logits, dim=1)

  return prediction.item()
# Load the model and tokenizer
model_path = 'feedback_sentiment_model.pth'  
model = load_model(model_path)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')# Read the TSV file manually
data = []
with open('/Users/aaravnoronha/Desktop/Student Feedback Data - Training Model.tsv', 'r', encoding='utf-8') as file:
  tsv_reader = csv.reader(file, delimiter='\t')
  headers = next(tsv_reader)  # Read the header row
  for row in tsv_reader:
      if len(row) >= 2:  # Ensure we have at least Rating and Feedback
          data.append({'Rating': row[0], 'Feedback': row[1]})

# Convert to DataFrame
df = pd.DataFrame(data)

def is_positive(rating):
  try:
      return float(rating) >= 3.0
  except ValueError:
      return False

print("Predictions for all student feedback entries:")
total = 0
correct = 0
for i, row in df.iterrows():
  rating = row['Rating']
  feedback = row['Feedback']
  
  if pd.isna(feedback) or feedback.strip() == '':
      continue
  
  actual_sentiment = "Positive" if is_positive(rating) else "Negative"
  predicted_sentiment = "Positive" if predict(model, tokenizer, str(feedback)) == 1 else "Negative"
  
  print(f"Feedback {i+1}:")
  print(f"  Rating: {rating}")
  print(f"  Feedback: {feedback}")
  print(f"  Actual Sentiment: {actual_sentiment}")
  print(f"  Predicted Sentiment: {predicted_sentiment}")
  print()

  total += 1
  if (actual_sentiment == "Positive" and predicted_sentiment == "Positive") or \
     (actual_sentiment == "Negative" and predicted_sentiment == "Negative"):
      correct += 1

accuracy = correct / total if total > 0 else 0
print(f"Overall Accuracy: {accuracy:.2f}")

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Predictions for all student feedback entries:
Feedback 1:
  Rating: 3
  Feedback: Awesome class
  Actual Sentiment: Positive
  Predicted Sentiment: Positive

Feedback 2:
  Rating: 3
  Feedback: Awesome class.
  Actual Sentiment: Positive
  Predicted Sentiment: Positive

Feedback 3:
  Rating: 3.5
  Feedback: Great Class!!
  Actual Sentiment: Positive
  Predicted Sentiment: Positive

Feedback 4:
  Rating: 3
  Feedback: great class
  Actual Sentiment: Positive
  Predicted Sentiment: Positive

Feedback 5:
  Rating: 0.5
  Feedback: nobody showed up.
  Actual Sentiment: Negative
  Predicted Sentiment: Negative

Feedback 6:
  Rating: 1.5
  Feedback: Mentor wasn't present for the class. Hoping to see mentor for the next booked class :)
  Actual Sentiment: Negative
  Predicted Sentiment: Negative

Feedback 7:
  Rating: 0.5
  Feedback: No one show up for class today
  Actual Sentiment: Negative
  Predicted Sentiment: Negative

Feedback 8:
  Rating: 1
  Feedback: Quality could not hear her
  Actu