In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import string
import re

# Load the dataset
df = pd.read_csv(r"D:\Python\SentimentAnalysisProject\sentiment_analysis.csv")  # Update path if needed

# Clean the text
def preprocess(text):
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r"http\S+", "", text)
        text = re.sub(r"@\w+", "", text)
        text = re.sub(r"#\w+", "", text)
        text = text.translate(str.maketrans("", "", string.punctuation))
        return text
    return ""

df['text'] = df['text'].astype(str).apply(preprocess)

# Features and target
X = df['text']
y = df['sentiment']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Convert text to TF-IDF features
vectorizer = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Train a Logistic Regression model
model = LogisticRegression()
model.fit(X_train_vec, y_train)

# Predict and evaluate
y_pred = model.predict(X_test_vec)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.67

Classification Report:
               precision    recall  f1-score   support

    negative       0.89      0.44      0.59        36
     neutral       0.51      0.90      0.65        30
    positive       0.83      0.71      0.76        34

    accuracy                           0.67       100
   macro avg       0.74      0.68      0.67       100
weighted avg       0.75      0.67      0.67       100



In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Loading of dataset
df = pd.read_csv(r"D:\Python\SentimentAnalysisProject\sentiment_analysis.csv")

# Check and clean the data
df = df[['text', 'sentiment']].dropna()  # Use only relevant columns
df['sentiment'] = df['sentiment'].str.lower().str.strip()  # Ensure consistency

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=42)

# Build a pipeline: Vectorizer + Classifier
model = make_pipeline(CountVectorizer(), MultinomialNB())

# Training the model
model.fit(X_train, y_train)

# Prediction of a custom text
custom_text = input("Enter your text for sentiment analysis: ")
prediction = model.predict([custom_text])[0]

print(f"Sentiment for custom text: {prediction}")


Enter your text for sentiment analysis:  horrible experience, would never order again


Sentiment for custom text: neutral
