In [1]:
# !pip install pandas nltk scikit-learn

import pandas as pd
import re
import nltk
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

print("Downloading NLTK resources...")
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
print("Downloads complete.")

Downloading NLTK resources...
Downloads complete.


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\tarru\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\tarru\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\tarru\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [2]:
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

In [3]:
feedback_data = {
    'text': [
        'This product is absolutely fantastic! I love it.',
        'A complete waste of money. It broke after one day.',
        'I am very happy with my purchase, highly recommended!',
        'The quality is terrible. I would not buy this again.',
        'Excellent customer service and a brilliant item.',
        'Do not buy this product. It is a scam and does not work.',
        'I had a great experience and the shipping was fast.',
        'The worst purchase I have ever made. Very disappointed.'
    ],
    'sentiment': [
        'positive', 'negative', 'positive', 'negative',
        'positive', 'negative', 'positive', 'negative'
    ]
}

df = pd.DataFrame(feedback_data)

print("--- Original Sample Dataset ---")
df

--- Original Sample Dataset ---


Unnamed: 0,text,sentiment
0,This product is absolutely fantastic! I love it.,positive
1,A complete waste of money. It broke after one ...,negative
2,"I am very happy with my purchase, highly recom...",positive
3,The quality is terrible. I would not buy this ...,negative
4,Excellent customer service and a brilliant item.,positive
5,Do not buy this product. It is a scam and does...,negative
6,I had a great experience and the shipping was ...,positive
7,The worst purchase I have ever made. Very disa...,negative


In [4]:
def preprocess_text(text):
    text = text.lower()
    tokens = word_tokenize(text)
    clean_tokens = [word for word in tokens if word.isalpha() and word not in stop_words]
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in clean_tokens]
    return " ".join(lemmatized_tokens)

df['processed_text'] = df['text'].apply(preprocess_text)

print("--- Dataset after Preprocessing ---")
df[['text', 'processed_text', 'sentiment']]

--- Dataset after Preprocessing ---


Unnamed: 0,text,processed_text,sentiment
0,This product is absolutely fantastic! I love it.,product absolutely fantastic love,positive
1,A complete waste of money. It broke after one ...,complete waste money broke one day,negative
2,"I am very happy with my purchase, highly recom...",happy purchase highly recommended,positive
3,The quality is terrible. I would not buy this ...,quality terrible would buy,negative
4,Excellent customer service and a brilliant item.,excellent customer service brilliant item,positive
5,Do not buy this product. It is a scam and does...,buy product scam work,negative
6,I had a great experience and the shipping was ...,great experience shipping fast,positive
7,The worst purchase I have ever made. Very disa...,worst purchase ever made disappointed,negative


In [5]:
X = df['processed_text']
y = df['sentiment']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

tfidf_vectorizer = TfidfVectorizer()
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

print(f"Training data shape: {X_train_tfidf.shape}")
print(f"Testing data shape: {X_test_tfidf.shape}")

Training data shape: (6, 25)
Testing data shape: (2, 25)


In [6]:
classifier = LogisticRegression()
classifier.fit(X_train_tfidf, y_train)

print("Model training complete.")

Model training complete.


In [7]:
y_pred = classifier.predict(X_test_tfidf)

cm = confusion_matrix(y_test, y_pred)
df_cm = pd.DataFrame(cm, index=['Actual Negative', 'Actual Positive'], columns=['Predicted Negative', 'Predicted Positive'])

print("--- Confusion Matrix ---")
print(df_cm)

print("\n--- Classification Report ---")
print(classification_report(y_test, y_pred))

--- Confusion Matrix ---
                 Predicted Negative  Predicted Positive
Actual Negative                   0                   2
Actual Positive                   0                   0

--- Classification Report ---
              precision    recall  f1-score   support

    negative       0.00      0.00      0.00       2.0
    positive       0.00      0.00      0.00       0.0

    accuracy                           0.00       2.0
   macro avg       0.00      0.00      0.00       2.0
weighted avg       0.00      0.00      0.00       2.0



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
