## Importing Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import CountVectorizer

## Reading the Data

In [2]:
df_train = pd.read_csv("../artifacts/data_ingestion/train.csv")
df_test = pd.read_csv("../artifacts/data_ingestion/test.csv")

In [3]:
df_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 319999 entries, 0 to 319998
Data columns (total 2 columns):
 #   Column  Non-Null Count   Dtype 
---  ------  --------------   ----- 
 0   label   319999 non-null  int64 
 1   review  319999 non-null  object
dtypes: int64(1), object(1)
memory usage: 4.9+ MB


## Train-Test Split

In [4]:
x_train = df_train.iloc[:, 1:]
x_test = df_test.iloc[:, 1:]

y_train = df_train.iloc[:, 0]
y_test = df_test.iloc[:, 0]

In [5]:
x_train.head()

Unnamed: 0,review
0,All over the place this movie is: full of nois...
1,Listening to this album made me wish I could t...
2,I use to buy the Playtex Cross Your Heart bra ...
3,Tony Hillerman is one of the best descriptive ...
4,I dont no this movie was ok. I was confused ha...


In [6]:
y_train.head()

0    0
1    0
2    0
3    1
4    0
Name: label, dtype: int64

## CountVectorizer

In [4]:
cv = CountVectorizer()
x_train_cv = cv.fit_transform(x_train.astype(str).values.flatten())
x_test_cv = cv.transform(x_test.astype(str).values.flatten())

## Standart Scaler

In [5]:
scaler = StandardScaler(with_mean=False)
x_train_scaled = scaler.fit_transform(x_train_cv)
x_test_scaled = scaler.transform(x_test_cv)

## Logistic Regression

In [6]:
lr = LogisticRegression(solver='saga', max_iter=1000, class_weight='balanced', C=1)
lr.fit(x_train_scaled, y_train)



In [7]:
y_pred = lr.predict(x_test_cv)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.61      0.72     40229
           1       0.70      0.92      0.79     39771

    accuracy                           0.76     80000
   macro avg       0.79      0.76      0.76     80000
weighted avg       0.79      0.76      0.76     80000



In [21]:
review = """
Everything is perfect, just as shown in the images. The quality is excellent, and the delivery was fast with no delays. I highly recommend it to everyone. Temu, as you said, I received the order on time and in perfect condition. Thank you.
"""
_vector = cv.transform([review])
lr.predict(_vector)


array([1], dtype=int64)

In [22]:
review = """
The wrong product was sent; it's not like the one in the picture.
"""

_vector = cv.transform([review])
lr.predict(_vector)

array([0], dtype=int64)

## Gradient Boosting Classifier

In [11]:
gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_model.fit(x_train_scaled, y_train)

In [12]:
y_pred = gb_model.predict(x_test_scaled)

In [13]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.75      0.77     40229
           1       0.76      0.81      0.78     39771

    accuracy                           0.78     80000
   macro avg       0.78      0.78      0.78     80000
weighted avg       0.78      0.78      0.78     80000



In [23]:
review = """
Everything is perfect, just as shown in the images. The quality is excellent, and the delivery was fast with no delays. I highly recommend it to everyone. Temu, as you said, I received the order on time and in perfect condition. Thank you.
"""
_vector = cv.transform([review])
_scale = scaler.transform(_vector)
gb_model.predict(_scale)


array([1], dtype=int64)

In [24]:
review = """
The wrong product was sent; it's not like the one in the picture.
"""

_vector = cv.transform([review])
_scale = scaler.transform(_vector)
gb_model.predict(_scale)

array([0], dtype=int64)

## SGD Classifier

In [29]:
sgd = SGDClassifier(loss="hinge", penalty="l2", max_iter=500)
sgd.fit(x_train_scaled, y_train)

In [32]:
y_pred = gb_model.predict(x_test_scaled)

In [33]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.80      0.75      0.77     40229
           1       0.76      0.81      0.78     39771

    accuracy                           0.78     80000
   macro avg       0.78      0.78      0.78     80000
weighted avg       0.78      0.78      0.78     80000



In [34]:
review = """
Everything is perfect, just as shown in the images. The quality is excellent, and the delivery was fast with no delays. I highly recommend it to everyone. Temu, as you said, I received the order on time and in perfect condition. Thank you.
"""
_vector = cv.transform([review])
_scale = scaler.transform(_vector)
sgd.predict(_scale)


array([1], dtype=int64)

In [35]:
review = """
The wrong product was sent; it's not like the one in the picture.
"""

_vector = cv.transform([review])
_scale = scaler.transform(_vector)
sgd.predict(_scale)

array([0], dtype=int64)