In [1]:
import random
import pandas as pd

good_templates = [
    "Excellent product, highly recommend it.",
    "Very satisfied with the quality and service.",
    "Exceeded my expectations in every way.",
    "Works perfectly and great value for money.",
    "I am extremely happy with this purchase."
]
bad_templates = [
    "Terrible experience, do not buy this.",
    "Very disappointed, broke on first use.",
    "Worst purchase I have ever made.",
    "Low quality and terrible customer support.",
    "I regret buying this product."
]

good_reviews = random.choices(good_templates, k=50)
bad_reviews  = random.choices(bad_templates,  k=50)
reviews      = good_reviews + bad_reviews
labels       = ['good'] * 50 + ['bad'] * 50

df = pd.DataFrame({'Review': reviews, 'Label': labels})
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

print(df.head())


                                       Review Label
0       Terrible experience, do not buy this.   bad
1  Low quality and terrible customer support.   bad
2  Low quality and terrible customer support.   bad
3  Works perfectly and great value for money.  good
4     Excellent product, highly recommend it.  good


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(
    max_features=300,
    lowercase=True,
    stop_words='english'
)
X = vectorizer.fit_transform(df['Review'])
y = df['Label']

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42
)

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

report = classification_report(y_test, y_pred, target_names=['bad','good'])
print(report)


              precision    recall  f1-score   support

         bad       1.00      1.00      1.00         8
        good       1.00      1.00      1.00        17

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25



In [6]:
def text_preprocess_vectorize(texts, vectorizer):

    return vectorizer.transform(texts)


In [7]:
new_texts = [
    "I love this item, works flawlessly.",
    "Really bad quality, broke immediately."
]
X_new = text_preprocess_vectorize(new_texts, vectorizer)
predictions = model.predict(X_new)
print(predictions)


['good' 'bad']
