In [None]:
%%capture
pip install transformers torch

In [None]:
from transformers import pipeline

# Load the pre-trained sentiment analysis model
sentiment_analysis = pipeline("sentiment-analysis")

def analyze_sentiment(text):
    # Analyze the sentiment of the provided text
    result = sentiment_analysis(text)
    return result

In [4]:
#  Example usage
if __name__ == "__main__":
    while True:
        text = input("Enter text to analyze sentiment (or type 'exit' to quit): ")
        if text.lower() == 'exit':
            break
        sentiment = analyze_sentiment(text)
        print(f"Sentiment: {sentiment}")

Enter text to analyze sentiment (or type 'exit' to quit): How are you
Sentiment: [{'label': 'NEGATIVE', 'score': 0.6735111474990845}]
Enter text to analyze sentiment (or type 'exit' to quit): exit


#Train

In [29]:
%%capture
!pip install transformers torch pandas scikit-learn datasets tensorboard

In [37]:
import pandas as pd
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from transformers import DataCollatorWithPadding
from datasets import Dataset
import os

# Disable WandB
os.environ["WANDB_DISABLED"] = "true"

# Load the dataset
df = pd.read_csv('sentiment_data.csv')

# Convert labels to integers
label_mapping = {'positive': 1, 'negative': 0}
df['label'] = df['label'].map(label_mapping)

# Split the dataset into training and validation sets
train_df, val_df = train_test_split(df, test_size=0.2, random_state=42)

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)

# Load the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize the data
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set the format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

# Load the model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,  # Limit the total amount of checkpoints. Delete the older checkpoints.
    logging_dir='./logs',  # Directory for storing logs
    logging_steps=10,
    report_to="none",  # Disable reporting to WandB
)

# Define the data collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Define the trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=data_collator,
    tokenizer=tokenizer,
)

# Train the model
trainer.train()

# Save the model
model.save_pretrained('./sentiment_model')
tokenizer.save_pretrained('./sentiment_model')


Map:   0%|          | 0/536 [00:00<?, ? examples/s]

Map:   0%|          | 0/134 [00:00<?, ? examples/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


Epoch,Training Loss,Validation Loss
1,0.0997,0.054546
2,0.0216,0.007427
3,0.0062,0.004596


('./sentiment_model/tokenizer_config.json',
 './sentiment_model/special_tokens_map.json',
 './sentiment_model/vocab.txt',
 './sentiment_model/added_tokens.json')

In [41]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

# Load the tokenizer and model
tokenizer = BertTokenizer.from_pretrained('./sentiment_model')
model = BertForSequenceClassification.from_pretrained('./sentiment_model')

# Function to predict sentiment
def predict_sentiment(text):
    # Tokenize the input text
    inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

    # Make predictions
    with torch.no_grad():
        outputs = model(**inputs)

    # Get the predicted label
    logits = outputs.logits
    predicted_class_id = logits.argmax().item()

    # Map the predicted class ID to the label
    label_mapping = {0: 'negative', 1: 'positive'}
    predicted_label = label_mapping[predicted_class_id]

    return predicted_label

# Test the model with some example sentences
test_sentences = [
    "You are a shity player.",
    "This is the worst experience I've ever had.",
    "The service was okay, but the food was delicious.",
    "I am not happy with the delivery time.",
    "Bro you are the best.",
    "I love this product! It's amazing."
]

for sentence in test_sentences:
    print(f"Sentence: {sentence}")
    print(f"Predicted Sentiment: {predict_sentiment(sentence)}\n")


Sentence: You are a shity player.
Predicted Sentiment: negative

Sentence: This is the worst experience I've ever had.
Predicted Sentiment: negative

Sentence: The service was okay, but the food was delicious.
Predicted Sentiment: positive

Sentence: I am not happy with the delivery time.
Predicted Sentiment: positive

Sentence: Bro you are the best.
Predicted Sentiment: negative

Sentence: I love this product! It's amazing.
Predicted Sentiment: positive

