In [38]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

from sklearn import metrics

In [39]:
df = pd.read_csv('./tripadvisor_hotel_reviews.csv')
df

Unnamed: 0,Review,Rating
0,nice hotel expensive parking got good deal sta...,4
1,ok nothing special charge diamond member hilto...,2
2,nice rooms not 4* experience hotel monaco seat...,3
3,"unique, great stay, wonderful time hotel monac...",5
4,"great stay great stay, went seahawk game aweso...",5
...,...,...
20486,"best kept secret 3rd time staying charm, not 5...",5
20487,great location price view hotel great quick pl...,4
20488,"ok just looks nice modern outside, desk staff ...",2
20489,hotel theft ruined vacation hotel opened sept ...,1


In [40]:
# Added Sentiment based on rating.
# rating > 3 = Good
# ratingg = 3 = Medium
# rating < 3 = Bad

def set_sentiment(rating):
    if rating > 3:
        return 'Good'
    elif rating == 3:
        return 'Medium'
    return 'Bad'

df['Sentiment'] = df['Rating'].apply(set_sentiment)

In [41]:
# Define X and y

X = df['Review'].values
y = df['Sentiment'].values

In [42]:
# Bags of words

vectorizer = CountVectorizer()
X = vectorizer.fit_transform(X)

In [43]:
# Split data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.35, random_state=82)

## SVM Model

In [44]:
# SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)

SVC()

In [45]:
# Using SVM model on X_test
svm_y_pred = svm_model.predict(X_test)

## Random Forest Model

In [46]:
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

RandomForestClassifier()

In [47]:
rf_y_pred = rf_model.predict(X_test)

## Naive Bayes Model

In [48]:
nb_model = MultinomialNB()
nb_model.fit(X_train, y_train)

MultinomialNB()

In [49]:
nb_y_pred = nb_model.predict(X_test)

## Classification Reports

### SVM Model Report

In [50]:
print(metrics.classification_report(y_test, svm_y_pred))

              precision    recall  f1-score   support

         Bad       0.79      0.72      0.75      1136
        Good       0.86      0.99      0.92      5274
      Medium       0.65      0.06      0.12       762

    accuracy                           0.85      7172
   macro avg       0.77      0.59      0.60      7172
weighted avg       0.83      0.85      0.81      7172



### Random Forest Report

In [51]:
print(metrics.classification_report(y_test, rf_y_pred))

              precision    recall  f1-score   support

         Bad       0.91      0.34      0.50      1136
        Good       0.78      1.00      0.88      5274
      Medium       0.00      0.00      0.00       762

    accuracy                           0.79      7172
   macro avg       0.57      0.45      0.46      7172
weighted avg       0.72      0.79      0.72      7172



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Naive Bayes Report

In [52]:
print(metrics.classification_report(y_test, nb_y_pred))

              precision    recall  f1-score   support

         Bad       0.80      0.74      0.77      1136
        Good       0.86      0.98      0.92      5274
      Medium       0.25      0.03      0.05       762

    accuracy                           0.84      7172
   macro avg       0.64      0.58      0.58      7172
weighted avg       0.78      0.84      0.80      7172



# Evaluation using model.score() ----- Accuracy

## SVM Accurary

In [65]:
svm_model.score(X_test, y_test)

0.8467651979921919

## Random Forest Accuracy

In [66]:
rf_model.score(X_test, y_test)

0.7882041271611824

## Naive Bayes Accuracy

In [67]:
nb_model.score(X_test, y_test)

0.8424428332403793

# Evalution with F1 Score

## SVM F1

In [86]:
svm_f1 = metrics.f1_score(y_test, svm_y_pred, average=None, labels=['Good', 'Medium', 'Bad'])
svm_f1

array([0.91826118, 0.11708483, 0.75438596])

## Random Forest F1

In [87]:
rf_f1 = metrics.f1_score(y_test, rf_y_pred, average=None, labels=['Good', 'Medium', 'Bad'])
rf_f1

array([0.87598769, 0.        , 0.4964721 ])

## Naive Bayes F1

In [88]:
nb_f1 = metrics.f1_score(y_test, nb_y_pred, average=None, labels=['Good', 'Medium', 'Bad'])
nb_f1

array([0.91556262, 0.04756243, 0.76937185])

# New Text

In [93]:
new_text = "I pre-ordered 4 cameras without any reviews having been posted, so I hope that this helps someone. First off, the cameras are pretty nice, very easy to setup and get connected and the price is just about perfect. The app is where things start to fall apart. It clearly was created for the other Blink cameras that are battery dependent, so most of the quirks/problems I found are probably necessary for battery savings on the other Blink cameras. I have had cameras for well over ten years and have gone through several different apps/programs to access and store the video but I have never had an app that did NOT allow me to view all of my cameras at the same time. This one requires you to select a single camera and activate it individually to view real time video. I assume that this was designed for battery savings, but since these cameras are always on, it now serves no purpose. Additionally, not having the ability to view all of my cameras, while activated in a single 4 pane window seems a step in the wrong direction. For years I have been able to see live feeds in a multi-pane format and select the camera I want to zoom in on that one, not with this app.Second, tech support is, at best, not good. I contacted them regarding the IR light that apparently can NOT be turned off (although you can turn it off via the app for the other Blink cameras?!) and the support tech actually stated that he did not know anything about these cameras and had to ask someone else. That was a first for me. The IR light is probably not a big deal for most, but I have for years had an indoor camera pointed out a window since my front yard is pretty well lit at night and the IR light stuck in the ON position make that impossible.I never review anything, but I wanted to get the word out on this. I suspect that these cameras were kind of rushed and the app just re-purposed, so it my sincere hope that they will address the live view concerns in future app updates specific to Blink Mini owners. Soon I hope!!!! Here's to hoping!Update: I was contacted by Blink following regarding my review. They indicated that multiple people had voiced similar concerns about the apps real-time capabilities and they made it sound like they were developing an update to the app to address this. They also offered to send me an additional camera (an older model) that does allow you to turn off the IR at no charge. I asked directly if this would be in exchange for changing my review and they specifically did NOT ask me to amend this review. If nothing else, that is GREAT customer service."

new_vec = vectorizer.transform([new_text])


In [94]:
svm_model.predict(new_vec)

array(['Good'], dtype=object)

In [95]:
rf_model.predict(new_vec)

array(['Good'], dtype=object)

In [96]:
nb_model.predict(new_vec)

array(['Bad'], dtype='<U6')