## install and import necesssery libraries like NLTK’s Twitter Samples

In [None]:
import nltk
nltk.download("twitter_samples")

## load the data

In [None]:
from nltk.corpus import twitter_samples
positive_tweets=twitter_samples.strings("positive_tweets.json")
negative_tweets=twitter_samples.strings("negative_tweets.json")

In [None]:
from transformers import pipeline
from transformers import BertTokenizerFast, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import torch

## Load BERT tokenizer and model (for sentiment classification)

In [None]:
model_name="nlptown/bert-base-multilingual-uncased-sentiment"
classifier=pipeline("sentiment-analysis",model=model_name)

## Test Custom Eco-Tweets

In [None]:
eco_tweets = [
   "Compostable? It still looks like regular plastic to me.",
    "Kudos for removing plastic completely from their packaging.",
    "Their new eco line is just rebranded old stuff.",
    "Switched to these reusable wipes – never going back!",
    "Loving this reusable water bottle – keeps my drink cold all day!",
    "They said it’s recyclable, but there's no recycling info.",
    "Such a clever design for a compostable phone case!",
    "Why does this 'eco' brand still use plastic wrap?",
    "Great packaging – all cardboard and no plastic.",
    "These so-called green products are way overpriced."
]

for tweet in eco_tweets:
  result=classifier(tweet)
  print(f"{tweet},       {result}", "\n")


## generate positive and negative eco_tweets

In [None]:
import random

positive_templates = [
    "I love this {product}, it's so {adjective} and eco-friendly.",
    "This {product} is incredibly {adjective} and sustainable!",
    "{product} is the best sustainable option I've used.",
    "Using {product} helped reduce my plastic use.",
    "So glad I switched to {product} — totally worth it!",
    "{product} makes going green so much easier.",
    "Can’t believe how {adjective} this {product} is. Plus, it's eco-friendly!",
    "If you care about the environment, try {product}. It’s a game changer!",
    "My new {product} is amazing — no more guilt about plastic!",
    "Big fan of this {adjective} {product}. Sustainable and stylish!",
    "{product} works perfectly and helps the planet. Win-win!",
    "Just bought {product} and I’m impressed by its {adjective} performance.",
    "The {product} is durable, reusable, and really {adjective}.",
    "I feel good using {product} — no waste, no worries.",
    "Who knew sustainability could look this good? Love my new {product}!",
    "Hands down, the best eco purchase I’ve made: {product}.",
    "This {product} is a small change with a big environmental impact.",
    "Couldn’t be happier with my {adjective} {product}.",
    "If everyone used {product}, the planet would be better off.",
    "I highly recommend {product} to anyone trying to go green."
]

negative_templates = [
    "This {product} broke after one use.",
    "I don't trust {product}, it's just greenwashing.",
    "{product} is not as {adjective} as they claim.",
    "The {product} felt cheap and poorly made.",
    "I regret buying this {product} — not eco at all.",
    "They advertise {product} as sustainable, but it's full of plastic.",
    "{product} didn’t work and felt like a waste of money.",
    "Disappointed in the quality of this {product}.",
    "This {adjective} {product} started falling apart within days.",
    "The {product} looks eco, but it’s just for show.",
    "Honestly, {product} felt like a marketing scam.",
    "This so-called green {product} is worse than regular ones.",
    "I expected more from a {adjective} {product}.",
    "{product} is way too expensive for what you get.",
    "Not sure how this {product} is considered eco-friendly.",
    "The {product} came in plastic packaging — ironic, right?",
    "I returned the {product} because it didn’t match its claims.",
    "It’s just branding — {product} isn’t really sustainable.",
    "There are better alternatives than this {adjective} {product}.",
    "Won’t be recommending {product} to anyone who cares about the planet."
]

adjectives = ["great", "amazing", "useful", "durable", "affordable", "reliable", "versatile", "innovative", "lightweight", "simple", "basic", "average", "weak", "expensive", "ineffective", "overrated", "fragile", "unreliable", "flimsy", "disappointing"]
products = [
    "bamboo toothbrush",
    "reusable shopping bag",
    "compostable phone case",
    "eco-friendly detergent",
    "solar-powered charger",
    "recycled paper notebook",
    "biodegradable cutlery",
    "organic cotton tote",
    "plant-based food wrap",
    "stainless steel straw",
    "natural fiber rug",
    "eco laundry ball",
    "reusable food container",
    "compostable coffee cup",
    "beeswax food wrap",
    "bamboo utensil set",
    "organic skincare kit",
    "recycled plastic sandals",
    "sustainable yoga mat",
    "eco-friendly cleaning spray"
]


positive_tweets=[random.choice(positive_templates).format(adjective=random.choice(adjectives),product=random.choice(products)) for _ in range(500)]
negative_tweets=[random.choice(negative_templates).format(adjective=random.choice(adjectives),product=random.choice(products)) for _ in range(500)]



##  Fine-Tune BERT,retrain the model with new dataset

In [None]:
# combine both tweets and prepare labels for +ve and -ve
texts=negative_tweets + positive_tweets
labels=[1]*500 + [0]*500

# tokenization
tokenizer=BertTokenizerFast.from_pretrained("bert-base-uncased")
encodings=tokenizer(texts, truncation=True, padding=True, return_tensors="pt")

# Train/test split
train_texts, val_texts, train_labels, val_labels = train_test_split(encodings["input_ids"], labels, test_size=0.2)

# Convert to Dataset
class EcoTweetDataset(torch.utils.data.Dataset):
    def __init__(self, input_ids, labels):
        self.input_ids = input_ids
        self.labels = labels
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'labels': torch.tensor(self.labels[idx])
        }

train_dataset = EcoTweetDataset(train_texts, train_labels)


# Load model
model = BertForSequenceClassification.from_pretrained('bert-base-uncased')

# Training args
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=2,
    per_device_train_batch_size=8,
    logging_dir='./logs',
    logging_steps=10
)

# Train
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset
)

trainer.train()





## Re-Analyze with Fine-Tuned Model

In [None]:
from transformers import BertTokenizer, BertForSequenceClassification, pipeline

# Load the tokenizer from the original BERT base model
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Load the fine-tuned model from the checkpoint
model = BertForSequenceClassification.from_pretrained("./results/checkpoint-200")

# Create a sentiment analysis pipeline using your fine-tuned model
custom_classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

# Test tweets (real or synthetic eco-related samples)
# Run classification
for tweet in eco_tweets:
    print(f"{tweet} -> {custom_classifier(tweet)}")