In [11]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

data = pd.read_csv('C:/Users/eswar/Downloads/Twitter_Data.csv')

data = data.dropna(subset=['category'])
# Fill missing 'clean_text' with empty string
data['clean_text'] = data['clean_text'].fillna('')

data['clean_text'] = data['clean_text'].str.lower().str.strip()

X = data['clean_text']
y = data['category']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer(max_features=5000)
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)

y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

def predict_sentiment(text):
    # Preprocess input text
    text = str(text).lower().strip()
    text_vect = vectorizer.transform([text])
    pred = model.predict(text_vect)[0]
    sentiment = {-1.0: "Negative", 0.0: "Neutral", 1.0: "Positive"}
    return sentiment.get(pred, "Unknown")

example_text = "I love this project!"
print("Predicted Sentiment:", predict_sentiment(example_text))


Accuracy: 0.9261236385948766

Classification Report:
               precision    recall  f1-score   support

        -1.0       0.91      0.83      0.87      7230
         0.0       0.92      0.98      0.95     10961
         1.0       0.94      0.94      0.94     14404

    accuracy                           0.93     32595
   macro avg       0.92      0.91      0.92     32595
weighted avg       0.93      0.93      0.93     32595

Predicted Sentiment: Positive
