<a href="https://colab.research.google.com/github/imkartik1490/Twitter-sentiment-analysis-project/blob/main/twitter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from scipy.special import softmax

def preprocess_tweet(tweet):
    """
    Preprocess the tweet by replacing mentions and URLs.
    """
    tweet_words = []
    for word in tweet.split(' '):
        if word.startswith('@') and len(word) > 1:
            word = '@user'
        elif word.startswith('http'):
            word = "http"
        tweet_words.append(word)
    return " ".join(tweet_words)

def load_model_and_tokenizer():
    """
    Load the sentiment analysis model and tokenizer.
    """
    model_name = "cardiffnlp/twitter-roberta-base-sentiment"
    try:
        model = AutoModelForSequenceClassification.from_pretrained(model_name)
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        return model, tokenizer
    except Exception as e:
        print(f"Error loading model or tokenizer: {e}")
        return None, None

def analyze_sentiment(tweet, model, tokenizer, labels):
    """
    Analyze the sentiment of a tweet using the provided model and tokenizer.
    """
    tweet_proc = preprocess_tweet(tweet)
    encoded_tweet = tokenizer(tweet_proc, return_tensors='pt')
    output = model(**encoded_tweet)
    scores = output[0][0].detach().numpy()
    scores = softmax(scores)

    sentiment_scores = {label: round(score, 4) for label, score in zip(labels, scores)}
    return sentiment_scores

def main():
    # Labels for sentiment analysis
    labels = ['Negative', 'Neutral', 'Positive']

    # Load model and tokenizer
    model, tokenizer = load_model_and_tokenizer()
    if model is None or tokenizer is None:
        print("Failed to load model or tokenizer. Exiting.")
        return

    # Allow user to input multiple tweets
    print("Enter tweets for sentiment analysis (type 'exit' to quit):")
    while True:
        tweet = input("\nEnter a tweet: ").strip()
        if tweet.lower() == 'exit':
            print("Exiting...")
            break

        sentiment_scores = analyze_sentiment(tweet, model, tokenizer, labels)
        print(f"Sentiment Scores: {sentiment_scores}")

if __name__ == "__main__":
    main()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/747 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/499M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/150 [00:00<?, ?B/s]

Enter tweets for sentiment analysis (type 'exit' to quit):

Enter a tweet: bad
Sentiment Scores: {'Negative': 0.5938, 'Neutral': 0.3332, 'Positive': 0.073}
