**Real-Time Tweet Sentiment Classifier using Logistic Regression**

**📌 Real-world Scenario:**

You're working with a company that wants to monitor customer feedback on Twitter to identify positive or negative sentiments regarding its product launch. Your task is to build a mini NLP pipeline that includes preprocessing, feature extraction, sentiment classification using logistic regression, and visualizations.

In [None]:
# 📦 Install dependencies
!pip install gradio nltk scikit-learn

# ✅ Imports
import re
import gradio as gr
import nltk
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, classification_report

# ✅ Download NLTK resources
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

# ✅ Setup tools
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# ✅ Sample dataset (balanced, 10 samples)
data = {
    'tweet': [
        "I love this new phone!",                          # positive
        "This is an amazing product.",                     # positive
        "Absolutely terrible service.",                    # negative
        "I’m so happy with this laptop.",                  # positive
        "The battery drains too fast.",                    # negative
        "Superb build quality and smooth interface.",      # positive
        "Worst camera ever.",                              # negative
        "Fast and reliable, totally worth it!",            # positive
        "It stopped working after one week!",              # negative
        "Customer service was so helpful!",                # positive
    ],
    'sentiment': [1, 1, 0, 1, 0, 1, 0, 1, 0, 1]  # 1 = positive, 0 = negative
}
df = pd.DataFrame(data)

# ✅ Preprocessing function
def clean_tweet(tweet):
    tweet = tweet.lower()
    tweet = re.sub(r'http\S+|@\w+|[^a-z\s]', '', tweet)
    tokens = tweet.split()
    tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words]
    return " ".join(tokens)

df['cleaned'] = df['tweet'].apply(clean_tweet)

# ✅ Feature extraction: TF-IDF + n-grams
vectorizer = TfidfVectorizer(ngram_range=(1, 2))
X = vectorizer.fit_transform(df['cleaned'])
y = df['sentiment']

# ✅ Train Logistic Regression model
model = LogisticRegression()
model.fit(X, y)

# ✅ Gradio prediction function
def predict_sentiment(text):
    cleaned = clean_tweet(text)
    vec = vectorizer.transform([cleaned])
    prob = model.predict_proba(vec)[0][1]  # probability of class 1
    if prob > 0.6:
        return f"🟢 Sentiment: Positive (Confidence: {prob:.2f})"
    elif prob < 0.4:
        return f"🔴 Sentiment: Negative (Confidence: {1 - prob:.2f})"
    else:
        return f"🟡 Sentiment: Neutral/Uncertain (Confidence: {prob:.2f})"

# ✅ Gradio Interface
demo = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter a tweet or sentence..."),
    outputs="text",
    title="🔍 Real-Time Tweet Sentiment Classifier",
    description="Uses logistic regression trained on real tweets to predict whether sentiment is Positive, Negative, or Neutral. Based on TF-IDF and preprocessing with NLTK."
)

# ✅ Launch Gradio app
demo.launch(share=True)  # Use share=True to get a public link in Colab
