In [68]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

# Load dataset
dataset = load_dataset("csv", data_files={"train": "hospital-review-ai-train.csv", "test": "hospital-review-ai-test.csv"})


In [70]:

# Preprocess data and tokenize text
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
encoded_dataset = dataset.map(lambda examples: tokenizer(examples["text"], truncation=True, padding=True), batched=True)

# Define model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=6)  # Assuming 6 sentiment classes


Map: 100%|██████████| 61/61 [00:00<00:00, 1226.60 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [72]:

# Step 4: Define training arguments and instantiate Trainer object
training_args = TrainingArguments(
    output_dir="./results",  # Specify the output directory
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir="./logs",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=encoded_dataset["train"],
    eval_dataset=encoded_dataset["test"],
)


In [73]:
trainer.train()


100%|██████████| 39/39 [14:39<00:00, 22.54s/it]

{'train_runtime': 879.0026, 'train_samples_per_second': 0.334, 'train_steps_per_second': 0.044, 'train_loss': 1.438057630490034, 'epoch': 3.0}





TrainOutput(global_step=39, training_loss=1.438057630490034, metrics={'train_runtime': 879.0026, 'train_samples_per_second': 0.334, 'train_steps_per_second': 0.044, 'train_loss': 1.438057630490034, 'epoch': 3.0})

In [74]:
trainer.save_model("./trained_model")

In [75]:
import torch

In [81]:

# Save the trained model
model.save_pretrained("./sentiment_model")


In [89]:

# Example usage for sentiment prediction
model = model.from_pretrained("./sentiment_model")
model.eval()
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

def predict_sentiment(review_text, max_length=512):  # Adjust max_length according to your model's maximum input length
    inputs = tokenizer(review_text, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits[0]).item()
    return predicted_class

# Example usage for sentiment prediction
review_text = "The hospital staff was very friendly and helpful!"
predicted_sentiment = predict_sentiment(review_text)
print("Predicted Sentiment:", predicted_sentiment)



Predicted Sentiment: 5


In [91]:
import torch
import torch.nn.functional as F

def predict_sentiment(review_text, tokenizer, model):
    # Tokenize the input text
    inputs = tokenizer(review_text, return_tensors="pt", padding=True, truncation=True)
    
    # Perform forward pass
    outputs = model(**inputs)
    
    # Apply softmax to convert logits to probabilities
    probs = F.softmax(outputs.logits, dim=1)
    
    # Convert tensor to list for easier manipulation
    probs = probs.squeeze().tolist()
    
    # Get all possible sentiments along with their probabilities
    sentiment_probabilities = [(class_idx, prob) for class_idx, prob in enumerate(probs)]
    
    return sentiment_probabilities

# Example usage
review_text = "The hospital staff was very friendly and helpful!!"
sentiment_probabilities = predict_sentiment(review_text, tokenizer, model)

# Output the sentiment probabilities
for class_idx, probability in sentiment_probabilities:
    print(f"Sentiment Class {class_idx}: Probability {probability}")


Sentiment Class 0: Probability 0.0919962078332901
Sentiment Class 1: Probability 0.06554585695266724
Sentiment Class 2: Probability 0.09690117090940475
Sentiment Class 3: Probability 0.10259004682302475
Sentiment Class 4: Probability 0.2855378985404968
Sentiment Class 5: Probability 0.35742881894111633


In [3]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load test dataset
test_dataset = load_dataset("csv", data_files={"test": "hospital-review-ai-test.csv"})

# Tokenize test dataset
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
encoded_test_dataset = test_dataset.map(lambda examples: tokenizer(examples["text"], truncation=True, padding=True), batched=True)

# Load trained model
model = AutoModelForSequenceClassification.from_pretrained("./sentiment_model")
model.eval()

# Run inference on test dataset
predictions = []
for example in encoded_test_dataset["test"]:
    inputs = tokenizer(example["text"], return_tensors="pt", truncation=True, padding=True)
    outputs = model(**inputs)
    predicted_class = outputs.logits.argmax().item()
    predictions.append(predicted_class)

# Evaluate predictions
true_labels = test_dataset["test"]["label"]
accuracy = sum(pred == label for pred, label in zip(predictions, true_labels)) / len(true_labels)
print("Accuracy:", accuracy)



Accuracy: 0.3770491803278688
