# 📊 Analyse de Sentiments sur des Tweets avec NLP & Gradio

Ce projet de Data Science utilise le **NLP (Natural Language Processing)** pour analyser des tweets et prédire leur **sentiment** (positif ou négatif) à l'aide du dataset *SentimentTweet*. Nous utilisons également **Gradio** pour déployer une application web simple.

## 🛠️ 1. Imports & Préparation

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import nltk
from nltk.corpus import stopwords

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, ConfusionMatrixDisplay

import joblib
from wordcloud import WordCloud
import gradio as gr

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

## 📥 2. Chargement et Nettoyage des Données

In [None]:
df = pd.read_csv("sentimentTweet.csv", encoding='latin-1', header=None,
                 names=["sentiment", "id", "date", "query", "user", "text"])
df = df[["sentiment", "text"]]
df["sentiment"] = df["sentiment"].map({0: 0, 4: 1})

def clean_text(text):
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+", "", text)
    text = re.sub(r"#\w+", "", text)
    text = re.sub(r"[^A-Za-z\s]", "", text)
    text = text.lower()
    text = " ".join([word for word in text.split() if word not in stop_words])
    return text

df["clean_text"] = df["text"].apply(clean_text)
df.head()

## ✨ 3. Vectorisation & Split

In [None]:
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df["clean_text"])
y = df["sentiment"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## 🤖 4. Entraînement du Modèle Naive Bayes

In [None]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

y_pred = nb_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

ConfusionMatrixDisplay.from_estimator(nb_model, X_test, y_test, cmap="Blues")
plt.title("Matrice de Confusion - Naive Bayes")
plt.show()

## ☁️ 5. Visualisation des Mots avec WordCloud

In [None]:
positive_text = " ".join(df[df["sentiment"] == 1]["clean_text"])
WordCloud(width=800, height=400).generate(positive_text).to_image()

negative_text = " ".join(df[df["sentiment"] == 0]["clean_text"])
WordCloud(width=800, height=400).generate(negative_text).to_image()

## 🔍 6. Prédiction Manuelle sur un Tweet

In [None]:
def predict_sentiment(tweet):
    cleaned = clean_text(tweet)
    vectorized = vectorizer.transform([cleaned])
    prediction = nb_model.predict(vectorized)[0]
    return "🟢 Positif" if prediction == 1 else "🔴 Négatif"

print(predict_sentiment("I hate this phone, it's terrible!"))
print(predict_sentiment("I love the new features, awesome update!"))

## 💾 7. Sauvegarde du Modèle et du Vectorizer

In [None]:
joblib.dump(nb_model, "naive_bayes_model.pkl")
joblib.dump(vectorizer, "tfidf_vectorizer.pkl")

## 🌐 8. Application Gradio

In [None]:
# Recharger modèle et vectorizer
model = joblib.load("naive_bayes_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

def predict_sentiment(tweet):
    cleaned = clean_text(tweet)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    return "🟢 Positif" if prediction == 1 else "🔴 Négatif"

interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, placeholder="Enter your tweet here..."),
    outputs=gr.Text(label="Sentiment prediction"),
    title="Tweet Sentiment Analysis",
    description="This model predicts whether a tweet is positive or negative."
)

interface.launch()