In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/content/Amazon-Product-Reviews-Sentiment-Analysis-in-Python-Dataset.csv')


In [3]:
df.head(5)

Unnamed: 0,Review,Sentiment
0,Fast shipping but this product is very cheaply...,1
1,This case takes so long to ship and it's not e...,1
2,Good for not droids. Not good for iPhones. You...,1
3,The cable was not compatible between my macboo...,1
4,The case is nice but did not have a glow light...,1


In [4]:
df['Sentiment'].unique()

array([1, 2, 3, 4, 5])

In [5]:
df.loc[df['Sentiment']<=3,'Sentiment'] = 0 # values less than equal to 3 is bad
df.loc[df['Sentiment']>3,'Sentiment'] = 1 # values greater than 3 is Good
df['Sentiment'].unique()

array([0, 1])

In [6]:
df_good = df[df['Sentiment'] == 1].sample(n=50, random_state=42)
df_bad = df[df['Sentiment'] == 0].sample(n=50, random_state=42)
new_df = pd.concat([df_good, df_bad]).sample(frac=1, random_state=42).reset_index(drop=True)
new_df


Unnamed: 0,Review,Sentiment
0,I bought this to replace an old cover that I l...,0
1,My granddaughter had her heart set on this. I...,0
2,"Yes, it is hard to turn on, but the real probl...",0
3,"So far so good, it doesnt go as loud as I thou...",1
4,"It came on time, it is well made and it has ma...",1
...,...,...
95,Purchased this to replace my daughter's worn o...,0
96,Came with a missing key on the keyboard and th...,0
97,The Zune Silicone Case works great. It protec...,1
98,I ordered this the Intellinav 3 from the manuf...,0


In [7]:
x = new_df['Review']
y = new_df['Sentiment']


In [8]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test = train_test_split(x,y ,random_state=1,test_size=0.25)
x_train.shape

(75,)

In [9]:
from sklearn.feature_extraction.text import TfidfVectorizer
tf = TfidfVectorizer(max_features=300,stop_words='english',lowercase=True)
tf
x_train_bow = tf.fit_transform(x_train).toarray()
x_train_bow.shape

(75, 300)

In [10]:
x_test_bow = tf.transform(x_test)
x_test_bow.shape

(25, 300)

In [11]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(verbose=2,penalty='l1',solver='saga')
lr.fit(x_train_bow,y_train)

convergence after 41 epochs took 0 seconds


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s finished


In [12]:
from sklearn.metrics import accuracy_score
y_pred = lr.predict(x_test_bow)
accuracy_score(y_test,y_pred)

0.48

In [13]:
def text_preprocess_vectorize(texts, vectorizer):
  return vectorizer.transform(texts).toarray()

In [14]:
arr = ['DO NOT BUY the charging cable does not work and light indicating charging never lights up i am going to return it as soon as possible just do not buy it it is a waste of money and time',
       "This product is alway freezing up and the only thing that will play at time is the CD player when you insert the disc.  music always pause(skip) between track so you can't enjoy your music to the fullest.  lastly this product over heats.  Dont waste your money on this crap"]

In [16]:
text_preprocess_vectorize(arr,tf)

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.54589144, 0.28931262, 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.  