<a href="https://colab.research.google.com/github/VarshaMedisetti/Covid-19-Tweet-Sentiment-Analysis-/blob/main/covid_sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split


nltk.download('stopwords')
nltk.download('wordnet')


df = pd.read_excel('/content/Covid-19 Twitter Dataset (Apr-Jun 2020).xlsx')
df.fillna('', inplace=True)


lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess_text(text):
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)  # Remove mentions
    text = re.sub(r'#', '', text)  # Remove hashtags
    text = re.sub(r'RT[\s]+', '', text)  # Remove retweets
    text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation and special characters
    text = re.sub(r'\d+', '', text)  # Remove numbers
    text = text.lower().strip()  # Convert to lowercase and strip whitespace

    tokens = text.split()
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    return ' '.join(tokens)


df['clean_text'] = df['original_text'].apply(preprocess_text)


tfidf_vectorizer = TfidfVectorizer(max_features=1000)
X = tfidf_vectorizer.fit_transform(df['clean_text'].astype(str))
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)


y_pred = model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy on the test set: {accuracy:.2f}")


def predict_sentiment_from_input():
    while True:
        new_text = input("Enter a sentence (Enter '0' to exit): ")
        if new_text == '0':
            break

        new_text_clean = preprocess_text(new_text)
        new_text_vectorized = tfidf_vectorizer.transform([new_text_clean])
        prediction = model.predict(new_text_vectorized)

        if prediction[0] == 'pos':
            sentiment = 'Positive'
        else:
            sentiment = 'Negative'

        print(f"Predicted sentiment for '{new_text}': {sentiment}")

predict_sentiment_from_input()


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Accuracy on the test set: 0.79
Enter a sentence (Enter '0' to exit): call leader help protect refuge covid19 provid qualiti health care
Predicted sentiment for 'call leader help protect refuge covid19 provid qualiti health care': Positive
Enter a sentence (Enter '0' to exit): ogun state support cbn nirsal covid19 target credit facil tcf
Predicted sentiment for 'ogun state support cbn nirsal covid19 target credit facil tcf': Positive
Enter a sentence (Enter '0' to exit): covid19 oyo discharg two patient
Predicted sentiment for 'covid19 oyo discharg two patient': Negative
Enter a sentence (Enter '0' to exit): condol famili surviv
Predicted sentiment for 'condol famili surviv': Negative
Enter a sentence (Enter '0' to exit): 0
