<a href="https://colab.research.google.com/github/duskwood67/codtech-ml-internship/blob/main/task2_sentiment_analysis/task2_sentiment_analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Task 2 - Sentiment Analysis using TF-IDF and Logistic Regression

# 📌 Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 📌 Step 1: Creating a small sample dataset
data = {
    'review': [
        "I loved the product!",
        "Worst purchase ever",
        "Not bad, could be better",
        "Excellent quality and fast delivery",
        "Terrible experience",
        "Absolutely fantastic!",
        "I hate it",
        "Very satisfied",
        "It's okay, not great",
        "Highly recommended"
    ],
    'sentiment': [1, 0, 0, 1, 0, 1, 0, 1, 0, 1]  # 1 = Positive, 0 = Negative
}

df = pd.DataFrame(data)

# 📌 Step 2: Splitting the dataset
X = df['review']  # Features (text reviews)
y = df['sentiment']  # Labels (0 or 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 📌 Step 3: Convert text to TF-IDF features
vectorizer = TfidfVectorizer()
X_train_tfidf = vectorizer.fit_transform(X_train)  # Fit and transform training data
X_test_tfidf = vectorizer.transform(X_test)  # Only transform test data

# 📌 Step 4: Train Logistic Regression model
model = LogisticRegression()
model.fit(X_train_tfidf, y_train)

# 📌 Step 5: Make predictions
y_pred = model.predict(X_test_tfidf)

# 📌 Step 6: Evaluate the model
print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🧾 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
