In [None]:
pip install transformers datasets pandas torch

In [None]:
pip install scikit-learn



In [None]:
import pandas as pd

# Load dataset
df = pd.read_csv("/content/sentiment_intent_data (3).csv")

# Display first few rows
print(df.head())


                                           Statement Sentiment  \
0  I'm a bit worried about my back pain, but I ho...   Anxious   
1      I have a fever and headache since last night.   Anxious   
2         Should I be concerned about my chest pain?   Anxious   
3         My knee has been swollen for two days now.   Anxious   
4        I'm scared this could be something serious.   Anxious   

   Sentiment_Label               Intent  Intent_Label  
0                0  Seeking reassurance             1  
1                0   Reporting symptoms             2  
2                0  Seeking reassurance             1  
3                0   Reporting symptoms             2  
4                0   Expressing concern             0  


In [None]:
# Drop missing values
df = df.dropna(subset=["Statement", "Sentiment_Label", "Intent_Label"])

# Convert labels to integers
df["Sentiment_Label"] = df["Sentiment_Label"].astype(int)
df["Intent_Label"] = df["Intent_Label"].astype(int)

# Print dataset size
print("Dataset size:", df.shape)


Dataset size: (20, 5)


In [None]:
from transformers import BertTokenizer

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenization function
def tokenize_data(df, max_length=128):
    return tokenizer(
        df["Statement"].tolist(),  # Convert text to list
        padding=True,
        truncation=True,
        max_length=max_length,
        return_tensors="pt"  # Convert to PyTorch tensors
    )

# Tokenize dataset
encodings = tokenize_data(df)
print("Tokenization complete!")


Tokenization complete!


In [None]:
import torch
from torch.utils.data import Dataset

class SentimentIntentDataset(Dataset):
    def __init__(self, encodings, sentiment_labels, intent_labels):
        self.encodings = encodings
        self.sentiment_labels = sentiment_labels
        self.intent_labels = intent_labels

    def __len__(self):
        return len(self.sentiment_labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = self.sentiment_labels[idx]
        item["labels"] = self.intent_labels[idx]
        return item

# Convert labels to PyTorch tensors
sentiment_labels = torch.tensor(df["Sentiment_Label"].tolist())
intent_labels = torch.tensor(df["Intent_Label"].tolist())

# Create dataset
dataset = SentimentIntentDataset(encodings, sentiment_labels, intent_labels)
print("Dataset created!")


Dataset created!


In [None]:
from sklearn.model_selection import train_test_split

# Split dataset
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Tokenize train & validation sets
train_encodings = tokenize_data(train_df)
val_encodings = tokenize_data(val_df)

# Create PyTorch datasets
train_dataset = SentimentIntentDataset(train_encodings,
                                       torch.tensor(train_df["Sentiment_Label"].tolist()),
                                       torch.tensor(train_df["Intent_Label"].tolist()))

val_dataset = SentimentIntentDataset(val_encodings,
                                     torch.tensor(val_df["Sentiment_Label"].tolist()),
                                     torch.tensor(val_df["Intent_Label"].tolist()))

print("Train/Validation split done!")


Train/Validation split done!


In [None]:
from transformers import BertForSequenceClassification

# Load models for sentiment & intent (3 classes each)
sentiment_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)
intent_model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3)

print("Models loaded!")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Models loaded!


In [None]:
from transformers import Trainer, TrainingArguments

# Define training parameters
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    report_to="none",  # Disable WandB logging
)

# Trainer for sentiment classification
sentiment_trainer = Trainer(
    model=sentiment_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Trainer for intent classification
intent_trainer = Trainer(
    model=intent_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

print("Trainers created!")


Trainers created!


In [None]:
print("🔄 Training Sentiment Model...")
sentiment_trainer.train()

print("🔄 Training Intent Model...")
intent_trainer.train()


🔄 Training Sentiment Model...


Epoch,Training Loss,Validation Loss
1,No log,1.046536
2,No log,1.031626
3,No log,1.021486


🔄 Training Intent Model...


Epoch,Training Loss,Validation Loss
1,No log,1.108754
2,No log,1.097593
3,No log,1.092268


TrainOutput(global_step=3, training_loss=1.009121338526408, metrics={'train_runtime': 17.0804, 'train_samples_per_second': 2.81, 'train_steps_per_second': 0.176, 'total_flos': 370003243680.0, 'train_loss': 1.009121338526408, 'epoch': 3.0})

In [None]:
# Save fine-tuned sentiment model
sentiment_model.save_pretrained("sentiment_model")
tokenizer.save_pretrained("sentiment_model")

# Save fine-tuned intent model
intent_model.save_pretrained("intent_model")
tokenizer.save_pretrained("intent_model")


('intent_model/tokenizer_config.json',
 'intent_model/special_tokens_map.json',
 'intent_model/vocab.txt',
 'intent_model/added_tokens.json')

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load saved sentiment model
sentiment_model = BertForSequenceClassification.from_pretrained("sentiment_model")
tokenizer = BertTokenizer.from_pretrained("sentiment_model")

# Load saved intent model
intent_model = BertForSequenceClassification.from_pretrained("intent_model")


In [None]:
def classify_text(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)

    # Get sentiment prediction
    sentiment_outputs = sentiment_model(**inputs)
    sentiment_pred = torch.argmax(sentiment_outputs.logits, dim=1).item()

    # Get intent prediction
    intent_outputs = intent_model(**inputs)
    intent_pred = torch.argmax(intent_outputs.logits, dim=1).item()

    # Define class labels
    sentiment_labels = ["Anxious", "Neutral", "Reassured"]
    intent_labels = ["Seeking reassurance", "Reporting symptoms","Expressing concern"]

    return {
        "Sentiment": sentiment_labels[sentiment_pred],
        "Intent": intent_labels[intent_pred]
    }

# Example usage
text = "it is not a  result"
output = classify_text(text)

# Print result
import json
print(json.dumps(output, indent=2))


{
  "Sentiment": "Neutral",
  "Intent": "Reporting symptoms"
}
