In [None]:
import pandas as pd

# Load your dataset
df = pd.read_csv('employee_reviews.csv', encoding='latin-1')

# Columns to drop
columns_to_drop = [
    'company', 'location', 'dates', 'job-title', 'overall-ratings',
    'work-balance-stars', 'culture-values-stars', 'carrer-opportunities-stars',
    'comp-benefit-stars', 'senior-mangemnet-stars', 'helpful-count', 'link'
]

# Drop the unwanted columns
df.drop(columns=columns_to_drop, inplace=True)

df['advice-to-mgmt'] = df['advice-to-mgmt'].apply(lambda x: '' if str(x).strip().lower() == 'none' else str(x).strip())

# Combine text fields into one column
df['full_text'] = df[['summary', 'pros', 'cons', 'advice-to-mgmt']].fillna('').agg('. '.join, axis=1)

# Drop the original text columns
df.drop(columns=['summary', 'pros', 'cons', 'advice-to-mgmt'], inplace=True)

# Save cleaned dataset
df.to_csv('cleaned_dataset.csv', index=False)

print("Dataset cleaned and saved as 'cleaned_dataset.csv'")

Dataset cleaned and saved as 'cleaned_dataset.csv'


In [None]:
# STEP 1: Install Dependencies
!pip install transformers datasets nltk scikit-learn -q

import pandas as pd
import numpy as np
import torch
import nltk
import re
from sklearn.metrics import classification_report, accuracy_score, f1_score
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
from nltk import pos_tag, word_tokenize

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('punkt_tab') # Download the punkt_tab resource
nltk.download('averaged_perceptron_tagger_eng') # Download the resource for english language


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/491.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.2/491.2 kB[0m [31m36.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/116.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/183.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.9/183.9 kB[0m [31m17.8 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/143.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.5/143.5 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


In [None]:
# STEP 2: Load & Clean Dataset
df = pd.read_csv("cleaned_dataset_with_sentiment.csv")
df = df.dropna(subset=["full_text"])
df = df.reset_index(drop=True)

def pos_chunk_text(text):
    tokens = word_tokenize(text)
    tagged = pos_tag(tokens)
    return " ".join([f"{word}/{tag}" for word, tag in tagged])

df['pos_tagged'] = df['full_text'].apply(pos_chunk_text)

In [None]:
# STEP 3: Tokenization and Dataset Split
def tokenize_dataset(df, tokenizer):
    dataset = Dataset.from_pandas(df[['full_text', 'sentiment_score','label']])
    def tokenize_fn(example):
        return tokenizer(example['full_text'], padding="max_length", truncation=True, max_length=512)
    return dataset.map(tokenize_fn, batched=True)



In [None]:
# STEP 4: Load Models and Tokenizers
model_names = {
    "roberta-base": "roberta-base",
    "twitter-roberta" : "cardiffnlp/twitter-roberta-base-sentiment"
}

models = {}
tokenizers = {}

for name, path in model_names.items():
    tokenizers[name] = AutoTokenizer.from_pretrained(path)
    models[name] = AutoModelForSequenceClassification.from_pretrained(path, num_labels=3).to(device)

# ✅ FIX: Add a label column from sentiment_score (convert float to int class)
def convert_score_to_class(score):
    if score <= -0.33:
        return 0  # Negative
    elif score <= 0.33:
        return 1  # Neutral
    else:
        return 2  # Positive

df['label'] = df['sentiment_score'].apply(convert_score_to_class)
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [None]:
print(test_df)

       Unnamed: 0                                          full_text  \
49858       49859  You can have a long time career here. The comp...   
2574         2575  Okay, depends on your division and director. A...   
61739       61740  Microsoft Rules. Flexibility and good benefits...   
53709       53710  Life@Microsoft. Benefits Salary Brand Microsof...   
16072       16073  Prep. 13.75 for fulltime, good benifits, paid ...   
...           ...                                                ...   
49100       49101  My experience has been great in the past, unti...   
3002         3003  Great Place to Work. Great people Open, laidba...   
29857       29858  Data associate. One of the best and growing co...   
11863       11864  Great Place To Work. =-cares about your develo...   
35642       35643  Kickass company. An amazing company to work fo...   

       sentiment_score                                         pos_tagged  \
49858        -0.018771  You/PRP can/MD have/VB a/DT long/J

In [None]:
# STEP 5: Train Function
def train_model(model_name, fine_tune=False):
    tokenizer = tokenizers[model_name]
    model = models[model_name]

    train_dataset = tokenize_dataset(train_df, tokenizer)
    test_dataset = tokenize_dataset(test_df, tokenizer)

    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])

    if fine_tune:
        training_args = TrainingArguments(
            output_dir=f'./results_{model_name}',
            eval_strategy="epoch",
            save_strategy="epoch",
            learning_rate=2e-5,
            per_device_train_batch_size=8,
            per_device_eval_batch_size=8,
            num_train_epochs=3,
            weight_decay=0.01,
            logging_dir=f'./logs_{model_name}',
            logging_steps=10,
            load_best_model_at_end=True,
            report_to="none",
            fp16=True
        )

        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
            eval_dataset=test_dataset,
            tokenizer=tokenizer,
        )

        trainer.train()

    return model, tokenizer, test_dataset


In [None]:
# STEP 6: Evaluation Function
def evaluate(model, tokenizer, dataset):
    model.eval()
    preds, labels = [], []
    for item in dataset:
        input_ids = item["input_ids"].unsqueeze(0).to(device)
        attention_mask = item["attention_mask"].unsqueeze(0).to(device)
        with torch.no_grad():
            output = model(input_ids, attention_mask=attention_mask)
        preds.append(torch.argmax(output.logits, dim=1).cpu().item())
        labels.append(item["label"])

    print(classification_report(labels, preds, target_names=["negative", "neutral", "positive"]))
    print("Accuracy:", accuracy_score(labels, preds))
    print("F1 Score:", f1_score(labels, preds, average='weighted'))
    return labels, preds


In [None]:
print(test_df)

       Unnamed: 0                                          full_text  \
49858       49859  You can have a long time career here. The comp...   
2574         2575  Okay, depends on your division and director. A...   
61739       61740  Microsoft Rules. Flexibility and good benefits...   
53709       53710  Life@Microsoft. Benefits Salary Brand Microsof...   
16072       16073  Prep. 13.75 for fulltime, good benifits, paid ...   
...           ...                                                ...   
49100       49101  My experience has been great in the past, unti...   
3002         3003  Great Place to Work. Great people Open, laidba...   
29857       29858  Data associate. One of the best and growing co...   
11863       11864  Great Place To Work. =-cares about your develo...   
35642       35643  Kickass company. An amazing company to work fo...   

       sentiment_score                                         pos_tagged  \
49858        -0.018771  You/PRP can/MD have/VB a/DT long/J

In [None]:

# STEP 8: Error Analysis Example
def error_analysis(df, labels, preds):
    errors = []
    for i, (l, p) in enumerate(zip(labels, preds)):

        errors.append({
            'Text': test_df.iloc[i]['full_text'],
            'Predicted': p,
            'True Label': l,
            'Sentiment Score': test_df.iloc[i]['sentiment_score'],

        })
    return pd.DataFrame(errors)

In [None]:
# STEP 7: Train & Compare Models
all_results = {}
for model_name in model_names:
    print(f"\n🚀 Evaluating: {model_name} (pretrained only)")
    model, tokenizer, test_data = train_model(model_name, fine_tune=False)
    labels, preds = evaluate(model, tokenizer, test_data)
    all_results[f'{model_name}_pretrained'] = (labels, preds)

    # Show sample error analysis for last run
    error_df = error_analysis(test_df.reset_index(), labels, preds)
    error_df['True Label'] = error_df['True Label'].astype(int)
    error_df.to_csv(f'error_analysis_{model_name}_pretrained.csv', index=False)


    print(f"\n🎯 Fine-tuning: {model_name}")
    model, tokenizer, test_data = train_model(model_name, fine_tune=True)
    labels, preds = evaluate(model, tokenizer, test_data)
    all_results[f'{model_name}_finetuned'] = (labels, preds)

    # Show sample error analysis for last run
    error_df = error_analysis(test_df.reset_index(), labels, preds)
    error_df['True Label'] = error_df['True Label'].astype(int)
    error_df.to_csv(f'error_analysis_{model_name}_finetuned.csv', index=False)



🚀 Evaluating: roberta-base (pretrained only)


Map:   0%|          | 0/54023 [00:00<?, ? examples/s]

Map:   0%|          | 0/13506 [00:00<?, ? examples/s]

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

    negative       0.01      0.03      0.01        69
     neutral       0.00      0.00      0.00      8320
    positive       0.38      0.98      0.55      5117

    accuracy                           0.37     13506
   macro avg       0.13      0.34      0.19     13506
weighted avg       0.14      0.37      0.21     13506

Accuracy: 0.37161261661483785
F1 Score: 0.20712972868422588

🎯 Fine-tuning: roberta-base


Map:   0%|          | 0/54023 [00:00<?, ? examples/s]

Map:   0%|          | 0/13506 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.2098,0.328189
2,0.3405,0.249039
3,0.2171,0.269043


              precision    recall  f1-score   support

    negative       0.55      0.91      0.69        69
     neutral       0.96      0.94      0.95      8320
    positive       0.92      0.93      0.93      5117

    accuracy                           0.94     13506
   macro avg       0.81      0.93      0.85     13506
weighted avg       0.94      0.94      0.94     13506

Accuracy: 0.9387679549829705
F1 Score: 0.9392762252686903

🚀 Evaluating: twitter-roberta (pretrained only)


Map:   0%|          | 0/54023 [00:00<?, ? examples/s]

Map:   0%|          | 0/13506 [00:00<?, ? examples/s]

              precision    recall  f1-score   support

    negative       0.03      0.77      0.05        69
     neutral       0.88      0.19      0.31      8320
    positive       0.49      0.93      0.64      5117

    accuracy                           0.47     13506
   macro avg       0.46      0.63      0.33     13506
weighted avg       0.72      0.47      0.43     13506

Accuracy: 0.4704575744113727
F1 Score: 0.4332478840051245

🎯 Fine-tuning: twitter-roberta


Map:   0%|          | 0/54023 [00:00<?, ? examples/s]

Map:   0%|          | 0/13506 [00:00<?, ? examples/s]

  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.2026,0.273892
2,0.3883,0.232514
3,0.0026,0.256418


              precision    recall  f1-score   support

    negative       0.63      0.86      0.73        69
     neutral       0.95      0.96      0.95      8320
    positive       0.94      0.92      0.93      5117

    accuracy                           0.94     13506
   macro avg       0.84      0.91      0.87     13506
weighted avg       0.94      0.94      0.94     13506

Accuracy: 0.9423219309936325
F1 Score: 0.9424188877847604


In [None]:
# manual testing
#checkpoint_path = "cardiffnlp/twitter-roberta-base-sentiment"
checkpoint_path = "./drive/MyDrive/results_twitter-roberta/checkpoint-20259"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_path).to(device)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)

def predict_sentiment(text, model, tokenizer):
    # Tokenize the input text
    inputs = tokenizer(text, padding=True, truncation=True, return_tensors="pt", max_length=512).to(device)

    # Get model predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted label
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

    # Map predicted class to sentiment label
    sentiment_labels = ["negative", "neutral", "positive"]
    sentiment = sentiment_labels[predicted_class]

    # Get sentiment score (logits are the raw prediction scores)
    sentiment_score = torch.softmax(logits, dim=1).squeeze().cpu().numpy()

    ranking = np.argsort(sentiment_score)
    ranking = ranking[::-1]
    for i in range(sentiment_score.shape[0]):
        l = sentiment_labels[ranking[i]]
        s = sentiment_score[ranking[i]]
        print(f"{i+1}) {l} {np.round(float(s), 4)}")

    return sentiment, sentiment_score

# Example of how to use the function
custom_text = "Leaving whilst its dark is fun. #not #sucks"
sentiment, sentiment_score = predict_sentiment(custom_text, model, tokenizer)

print(f"Sentiment: {sentiment}")
print(f"Sentiment Scores: {sentiment_score}")

1) neutral 0.9999
2) positive 0.0001
3) negative 0.0
Sentiment: neutral
Sentiment Scores: [3.538896e-05 9.999013e-01 6.332858e-05]
