In [10]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import torch

In [11]:
# Load tokenizer
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')

# Load pre-trained model
model = RobertaForSequenceClassification.from_pretrained('roberta-base', num_labels=21)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [13]:
# Load the data
data = pd.read_csv("training.csv")

# Split data into features (X) and labels (y)
X = data['text']
y = data.drop('text', axis=1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Prepare text data
texts_train = X_train.tolist()
texts_test = X_test.tolist()

# Tokenize text
inputs_train = tokenizer(texts_train, padding=True, truncation=True, return_tensors="pt")
inputs_test = tokenizer(texts_test, padding=True, truncation=True, return_tensors="pt")

In [15]:
#Train the model
model.train()

# Define optimizer and loss function
optimizer = torch.optim.AdamW(model.parameters(), lr=2e-5)
loss_fn = torch.nn.BCEWithLogitsLoss()

# Training loop
epochs = 3
for epoch in range(epochs):
    # Iterate over batches
    for i in range(0, len(inputs_train['input_ids']), 32):  # Assuming batch size of 32
        batch_inputs = {key: val[i:i+32] for key, val in inputs_train.items()}  # Select batch
        batch_labels = y_train[i:i+32]
        
        # Forward pass
        outputs = model(**batch_inputs)
        loss = loss_fn(outputs.logits, torch.tensor(batch_labels.values, dtype=torch.float32))
        
        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [17]:
#Evaluate the model
model.eval()

# Predict emotions for the testing set
with torch.no_grad():
    outputs_test = model(**inputs_test)

# Convert logits to probabilities
probs_test = torch.sigmoid(outputs_test.logits)

# Convert probabilities to labels
threshold = 0.5  # You can adjust this threshold if needed
predicted_labels_test = (probs_test > threshold).int()

# Convert predicted labels to dataframe
predicted_labels_test_df = pd.DataFrame(predicted_labels_test.numpy(), columns=y.columns)

# Evaluate the model
print("\nAccuracy per Emotion Label on Testing Set:")
print(classification_report(y_test, predicted_labels_test_df, target_names=y.columns))


Accuracy per Emotion Label on Testing Set:
                precision    recall  f1-score   support

           Joy       0.00      0.00      0.00        16
         Anger       0.00      0.00      0.00        21
       Sadness       0.00      0.00      0.00        12
      Surprise       0.00      0.00      0.00        12
          Fear       0.00      0.00      0.00         7
       Disgust       0.00      0.00      0.00        15
          Love       0.00      0.00      0.00        18
     Confusion       0.00      0.00      0.00         8
Disappointment       0.00      0.00      0.00         7
       Neutral       0.00      0.00      0.00        35
   Frustration       0.00      0.00      0.00         2
    Admiration       0.00      0.00      0.00         4
       Dislike       0.00      0.00      0.00         1
     Annoyance       0.00      0.00      0.00         2
 Admiritation        0.00      0.00      0.00         0
    Excitement       0.00      0.00      0.00         0
   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
