# **1. Installing Packages**

In [1]:
!pip install transformers
!pip install accelerate==0.20.3
!pip install torch
!pip install wget
!pip install gradio
!pip install typing_extensions

Collecting accelerate==0.20.3
  Downloading accelerate-0.20.3-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.6/227.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: accelerate
Successfully installed accelerate-0.20.3
Collecting wget
  Downloading wget-3.2.zip (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9655 sha256=3fea6cd40fc82aa234c8f79b15a3dab3116fd4414efa2ddf82228da7e38aeb9b
  Stored in directory: /root/.cache/pip/wheels/8b/f1/7f/5c94f0a7a505ca1c81cd1d9208ae2064675d97582078e6c769
Successfully built wget
Installing collected packages: wget
Successfully installed wget-3.2
Collecting gradio
  Downloading gradio-4.12.0-py3-none-any.whl (16.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.6/16.6 MB[0m [31

# **2. Importing Libraries**

In [2]:
import pandas as pd
import numpy as np
import gradio as gr
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, AdamW, get_linear_schedule_with_warmup,AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
import warnings
warnings.filterwarnings("ignore")

# **3. Importing Dataset**

In [3]:
text = pd.read_csv("/content/tweet_emotions.csv")
text = text.drop(["tweet_id"], axis=1)

# **4. Sentiment Mapping**

In [4]:
sentiment_map = {
    'empty': 0,
    'sadness': 1,
    'enthusiasm': 2,
    'neutral': 3,
    'happiness': 4,
    'love': 5,
    'worry': 6,
    'surprise': 7,
    'fun': 8,
    'relief': 9,
    'hate': 10,
    'anger': 11,
    'boredom': 12,
}
text['sentiment'] = text['sentiment'].map(sentiment_map)

# **5. Training and Testing Data**

In [5]:
train_texts, test_texts, train_labels, test_labels = train_test_split(
    text['content'].values,
    text['sentiment'].values,
    test_size=0.2,
    random_state=42
)

# **6. Tokenizing Data**

In [6]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

def tokenize_texts(texts, max_len=64):
    input_ids = []
    attention_masks = []

    for text in texts:
        encoded_dict = tokenizer.encode_plus(
                            text,
                            add_special_tokens=True,
                            padding='max_length',
                            return_attention_mask=True,
                            return_tensors='pt'
                       )

        input_ids.append(encoded_dict['input_ids'][:, :max_len])
        attention_masks.append(encoded_dict['attention_mask'][:, :max_len])

    return torch.cat(input_ids, dim=0), torch.cat(attention_masks, dim=0)

train_input_ids, train_attention_masks = tokenize_texts(train_texts, max_len=64)
test_input_ids, test_attention_masks = tokenize_texts(test_texts, max_len=64)

train_dataset = TensorDataset(train_input_ids, train_attention_masks, torch.tensor(train_labels))
test_dataset = TensorDataset(test_input_ids, test_attention_masks, torch.tensor(test_labels))

tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

# **7. Fine-Tuning of Model**

In [7]:
batch_size = 16
train_dataloader = DataLoader(train_dataset, sampler=RandomSampler(train_dataset), batch_size=batch_size)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=batch_size)

model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(sentiment_map))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = AdamW(model.parameters(), lr=3e-5)
epochs = 5
total_steps = len(train_dataloader) * epochs
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

for epoch in range(epochs):
    model.train()
    total_loss = 0
    for step, batch in enumerate(train_dataloader):
        batch_input_ids = batch[0].to(device)
        batch_attention_mask = batch[1].to(device)
        batch_labels = batch[2].to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=batch_input_ids, attention_mask=batch_attention_mask, labels=batch_labels)
        loss = outputs.loss
        total_loss += loss.item()

        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
        optimizer.step()
        scheduler.step()

    avg_train_loss = total_loss / len(train_dataloader)
    print(f'Epoch {epoch + 1}/{epochs} - Average Training Loss: {avg_train_loss}')

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/5 - Average Training Loss: 1.8492357345819472
Epoch 2/5 - Average Training Loss: 1.6179344217479228
Epoch 3/5 - Average Training Loss: 1.3202904869914054
Epoch 4/5 - Average Training Loss: 1.0004409763365985
Epoch 5/5 - Average Training Loss: 0.7674547095745802


# **8. Model Evaluation**

In [8]:
model.eval()
total_eval_accuracy = 0
for batch in test_dataloader:
    batch_input_ids = batch[0].to(device)
    batch_attention_mask = batch[1].to(device)
    batch_labels = batch[2].to(device)

    with torch.no_grad():
        outputs = model(batch_input_ids, attention_mask=batch_attention_mask)

    logits = outputs.logits
    preds = torch.argmax(logits, dim=1).flatten()
    total_eval_accuracy += (preds == batch_labels).cpu().numpy().mean()

avg_accuracy = total_eval_accuracy / len(test_dataloader)
print(f'Accuracy on test set: {avg_accuracy}')

Accuracy on test set: 0.346875


# **9. Prediction Function**

In [9]:
def predict_sentiment(text):
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        return_tensors='pt',
        padding='max_length',
        truncation=True,
        max_length=64
    )

    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    logits = outputs.logits
    preds = torch.argmax(logits, dim=1).item()

    reverse_sentiment_map = {v: k for k, v in sentiment_map.items()}
    return reverse_sentiment_map[preds]

In [10]:
print(predict_sentiment("I'm in state of delusion today!"))

worry


# **10. Gradio API**

In [12]:
def predict_sentiment(text):
    inputs = tokenizer.encode_plus(
        text,
        add_special_tokens=True,
        return_tensors='pt',
        padding='max_length',
        truncation=True,
        max_length=64
    )

    input_ids = inputs['input_ids'].to(device)
    attention_mask = inputs['attention_mask'].to(device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)

    logits = outputs.logits
    preds = torch.argmax(logits, dim=1).item()

    reverse_sentiment_map = {v: k for k, v in sentiment_map.items()}
    return reverse_sentiment_map[preds]

iface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=2, label="Enter your text here"),
    outputs=gr.Label(label="Predicted Sentiment"),
    title="Tweet Sentiment Analyzer"
)

iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://f42e2aef77e192fe47.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


