In [99]:
import pandas as pd
import string
from textblob import TextBlob
import nltk
import pickle
from sklearn.preprocessing import LabelEncoder
from nltk import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score , classification_report , confusion_matrix

In [100]:
df = pd.read_csv('clothes_review.csv')
df


Unnamed: 0.1,Unnamed: 0,Clothing ID,Age,Title,Review Text,Rating,Recommended IND,Positive Feedback Count,Division Name,Department Name,Class Name
0,0,767,33,,Absolutely wonderful - silky and sexy and comf...,4,1,0,Initmates,Intimate,Intimates
1,1,1080,34,,Love this dress! it's sooo pretty. i happene...,5,1,4,General,Dresses,Dresses
2,2,1077,60,Some major design flaws,I had such high hopes for this dress and reall...,3,0,0,General,Dresses,Dresses
3,3,1049,50,My favorite buy!,"I love, love, love this jumpsuit. it's fun, fl...",5,1,0,General Petite,Bottoms,Pants
4,4,847,47,Flattering shirt,This shirt is very flattering to all due to th...,5,1,6,General,Tops,Blouses
...,...,...,...,...,...,...,...,...,...,...,...
23481,23481,1104,34,Great dress for many occasions,I was very happy to snag this dress at such a ...,5,1,0,General Petite,Dresses,Dresses
23482,23482,862,48,Wish it was made of cotton,"It reminds me of maternity clothes. soft, stre...",3,1,0,General Petite,Tops,Knits
23483,23483,1104,31,"Cute, but see through","This fit well, but the top was very see throug...",3,0,1,General Petite,Dresses,Dresses
23484,23484,1084,28,"Very cute dress, perfect for summer parties an...",I bought this dress for a wedding i have this ...,3,1,2,General,Dresses,Dresses


In [101]:
df['Title']=df['Title'].fillna('')
df['combined'] = df['Title'].astype(str)+' '+df['Review Text'].astype(str)

In [102]:
df['combined']

0         Absolutely wonderful - silky and sexy and com...
1         Love this dress!  it's sooo pretty.  i happen...
2        Some major design flaws I had such high hopes ...
3        My favorite buy! I love, love, love this jumps...
4        Flattering shirt This shirt is very flattering...
                               ...                        
23481    Great dress for many occasions I was very happ...
23482    Wish it was made of cotton It reminds me of ma...
23483    Cute, but see through This fit well, but the t...
23484    Very cute dress, perfect for summer parties an...
23485    Please make more like this one! This dress in ...
Name: combined, Length: 23486, dtype: object

In [103]:
def remove_punctuations(text):
    punctuation_list = string.punctuation
    text = text.translate(str.maketrans('', '', punctuation_list))
    return text



In [104]:
review = df['combined'].str.lower()
review = review.apply(remove_punctuations)

In [134]:
# A function to measure the polarity
def textPolarity(tweet):
    return TextBlob(tweet).sentiment.polarity

def defineSentiment(polarity_score):
    score = 'neutral'
    if polarity_score < 0:
        score = 'Negative'
    elif polarity_score > 0:
        score = 'Positive'
    return score

df['polarity'] = review.apply(textPolarity)
df['score'] = df['polarity'].apply(defineSentiment)

In [142]:
label = LabelEncoder()
df['score'] = label.fit_transform(df['score'])

x_data = df['combined']
y_data = df['score']

In [143]:
stop_words = stopwords.words('english')
stop_words += list(string.punctuation)

lemmatizer = WordNetLemmatizer()

#A function to tokenize the text
def tokenize_text(text):
    tokens = word_tokenize(text)
    tokens_stopwords_none = [t.lower() for t in tokens if t.lower() not in stop_words]
    return tokens_stopwords_none

#A function to lemmatize text
def lemmatize_text(text):
    lemmatized=[]
    for word in text:
        lemmatized.append(lemmatizer.lemmatize(word))
    return lemmatized

x_data= x_data.apply(tokenize_text)
x_data= x_data.apply(lemmatize_text)
x_data=[" ".join(review) for review in x_data.values]


In [144]:
cv = TfidfVectorizer()
x_data_vect = cv.fit_transform(x_data).toarray()

In [145]:
x_train , x_test , y_train , y_test = train_test_split(x_data_vect , y_data , test_size=0.3 , random_state=1)

In [146]:
x_train.shape ,x_test.shape , y_train.shape , y_test.shape

((16440, 13440), (7046, 13440), (16440,), (7046,))

In [147]:
mnb = MultinomialNB()
mnb.fit(x_train , y_train)
pred = mnb.predict(x_test)


In [148]:
print(accuracy_score(y_test , pred))
print(confusion_matrix(y_test , pred))
print(classification_report(y_test , pred))

0.9480556344024978
[[ 255    0   17]
 [   0    0  349]
 [   0    0 6425]]
              precision    recall  f1-score   support

           0       1.00      0.94      0.97       272
           1       0.00      0.00      0.00       349
           2       0.95      1.00      0.97      6425

    accuracy                           0.95      7046
   macro avg       0.65      0.65      0.65      7046
weighted avg       0.90      0.95      0.92      7046



  _warn_prf(average, modifier, msg_start, len(result))


In [149]:
pickle.dump(cv , open("count-Vectorizer.pkl" , "wb"))
pickle.dump(mnb , open("mnb_sentiment_classifier.pkl" , "wb"))

In [150]:
load_cv = pickle.load(open('count-Vectorizer.pkl','rb'))
model = pickle.load(open("mnb_sentiment_classifier.pkl",'rb'))

In [151]:
def pred_sentence(sentence):
    sen = load_cv.transform([sentence]).toarray()
    res = model.predict(sen)[0]
    print(res)
    if res == 2:
        return 'Positive review'
    elif res==0:
        return 'Negative review'
    elif res==1:
        return 'Positive review'

In [152]:
test_sentence ='this is very good'
print(pred_sentence(test_sentence))

2
Positive review
