# U decide ML feature : Intelligent Review Engine 1.0

# Predicting sentiment from business reviews

In [1]:
import graphlab

In [4]:
graphlab.set_runtime_config('GRAPHLAB_DEFAULT_NUM_PYLAMBDA_WORKERS', 4)

# Fetching business review data 

In [5]:
businesses = graphlab.SFrame('amazon_baby.gl/udecideReview.csv')

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,str,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [6]:
businesses.head()

name,review,rating
Thai Express,"Agaarjuulagch, AC bgui mash bugchim. Hool ih ...",1
Thai Express,Ymar ch uilchilgeenii soyl bhgui oronguut ...,1
Thai Express,Curry ni curry bish blaa. Poorts mash myy. Dahij ...,2
Oishii Ramen,"Mash amttai, ports saitai bas hamgiin gol n busad ...",5
Oishii Ramen,"Curry bol mash myy bn. Shul shig shingen, bitvv ...",2
Oishii Ramen,Ih saihan tuhalag orchin bas hool uneheer goy ...,5
Oishii Ramen,jinhene Japan rameng bodwol arai ondoo ch ...,4
BBQ chicken Mongolia,Horoollin salbartaa anhaarlaa handulaacee ...,1
BBQ chicken Mongolia,Hogiin gazar. Zahialga ugii geheer dandaa uur ...,1
BBQ chicken Mongolia,Tahian shardag tosoo ern solidiimu tanaih. Arai l ...,1


# Generate word count vector for each reviews

In [7]:
businesses['word_count'] = graphlab.text_analytics.count_words(businesses['review'])

In [8]:
businesses.head()

name,review,rating,word_count
Thai Express,"Agaarjuulagch, AC bgui mash bugchim. Hool ih ...",1,"{'ni': 1, 'ih': 1, 'agaarjuulagch,': 1, ..."
Thai Express,Ymar ch uilchilgeenii soyl bhgui oronguut ...,1,"{'ch': 1, 'yvtsgu': 1, 'uneheer': 1, 'sandal': ..."
Thai Express,Curry ni curry bish blaa. Poorts mash myy. Dahij ...,2,"{'ni': 1, 'orohgvee.': 1, 'myy.': 1, 'mash': 1, ..."
Oishii Ramen,"Mash amttai, ports saitai bas hamgiin gol n busad ...",5,"{'zoriulsan': 1, 'gol': 1, 'saitai': 1, 'bas' ..."
Oishii Ramen,"Curry bol mash myy bn. Shul shig shingen, bitvv ...",2,"{'myy': 1, 'ch': 1, 'bitvv': 1, 'hezee': 1, ..."
Oishii Ramen,Ih saihan tuhalag orchin bas hool uneheer goy ...,5,"{'ih': 1, 'mgld': 1, 'bas': 1, 'idej': 1, ..."
Oishii Ramen,jinhene Japan rameng bodwol arai ondoo ch ...,4,"{'hyamdarch': 1, 'blee': 1, 'arai': 1, 'ch': 1, ..."
BBQ chicken Mongolia,Horoollin salbartaa anhaarlaa handulaacee ...,1,"{'bvr': 1, 'hutgaa': 1, 'halbaga': 1, 'tawga' ..."
BBQ chicken Mongolia,Hogiin gazar. Zahialga ugii geheer dandaa uur ...,1,"{'oirhon': 1, 'guihimu': 1, 'ued': 1, 'manaih' ..."
BBQ chicken Mongolia,Tahian shardag tosoo ern solidiimu tanaih. Arai l ...,1,"{'dahij': 1, 'heterhii': 1, 'zahialaltgui': 1, ..."


In [9]:
graphlab.canvas.set_target('ipynb')

In [10]:
businesses['name'].show()

# businesses['rating'].show()

# Examining the reviews for popular business:

In [11]:
business_reviews = businesses[businesses['name'] == 'BBQ chicken Mongolia']

In [12]:
len(business_reviews)

9

In [13]:
business_reviews['rating'].show(view='Categorical')

# Centiment classifier

In [14]:
businesses['rating'].show(view='Categorical')

# Define what's a positive and a negative sentiment

Ignore reviews with rating = 3, cause those are neither positive nor negative meaning. Reviews >= 4 are positives, Reviews <= 2 are negatives.

In [15]:
businesses = businesses[businesses['rating'] != 3] # excluding reviews with value of 3. 

In [16]:
businesses['sentiment'] = businesses['rating'] >= 4 # positive reviews 1, negatives 0 

In [17]:
businesses[businesses['rating'] == 5].head()

name,review,rating,word_count,sentiment
Oishii Ramen,"Mash amttai, ports saitai bas hamgiin gol n busad ...",5,"{'zoriulsan': 1, 'gol': 1, 'saitai': 1, 'bas' ...",1
Oishii Ramen,Ih saihan tuhalag orchin bas hool uneheer goy ...,5,"{'ih': 1, 'mgld': 1, 'bas': 1, 'idej': 1, ...",1
BBQ chicken Mongolia,Guurnii door bdag salbariin uilchilgee ...,5,"{'door': 1, 'taalagdlaa': 1, 'salbariin': 1, ...",1
BBQ chicken Mongolia,Chicken dukbab bol nice2 jhn hymd bval humuus ih ...,5,"{'daa': 1, 'nice2': 1, 'dukbab': 1, 'humuus' ...",1
BBQ chicken Mongolia,bbg hamt olond ajiln amjilt orh durtai gazarn ...,5,"{'hamt': 1, 'ajiln': 1, 'bbg': 1, 'neg': 1, ...",1
Bojangles Grill Restaurant & Bar ...,goe yum,5,"{'goe': 1, 'yum': 1}",1
KFC mongolia,goe amtai bas tanaid ajild oroh huselt tawij ...,5,"{'bas': 1, 'tanaid': 1, 'goe': 1, 'huselt': 1, ...",1
Burger King Mongolia,Burer king-d huuhduud maani durtai ger buleere ...,5,"{'ch': 1, 'tsew': 1, 'deer': 1, 'ghiin': 1, ...",1
Food park shangrila mall,Endii pizza-g udur bolgn ideed ch uidkumaa�. ...,5,"{'hamt': 1, 'ch': 1, 'pizza-g': 1, 'ideed' ...",1
Horned Owl Food Symphony,"Saikhan hooltoi uulchulgee sain bsan, ...",5,"{'sain': 1, 'taalagdsan': 1, 'bsan,': 1, 'saikh ...",1


# Train the sentiment classifier

In [18]:
train_data, test_data = businesses.random_split(.8, seed=0)

In [19]:
# check train data 
train_data.head()

name,review,rating,word_count,sentiment
Thai Express,"Agaarjuulagch, AC bgui mash bugchim. Hool ih ...",1,"{'ni': 1, 'ih': 1, 'agaarjuulagch,': 1, ...",0
Thai Express,Ymar ch uilchilgeenii soyl bhgui oronguut ...,1,"{'ch': 1, 'yvtsgu': 1, 'uneheer': 1, 'sandal': ...",0
Thai Express,Curry ni curry bish blaa. Poorts mash myy. Dahij ...,2,"{'ni': 1, 'orohgvee.': 1, 'myy.': 1, 'mash': 1, ...",0
Oishii Ramen,"Mash amttai, ports saitai bas hamgiin gol n busad ...",5,"{'zoriulsan': 1, 'gol': 1, 'saitai': 1, 'bas' ...",1
Oishii Ramen,"Curry bol mash myy bn. Shul shig shingen, bitvv ...",2,"{'myy': 1, 'ch': 1, 'bitvv': 1, 'hezee': 1, ...",0
Oishii Ramen,Ih saihan tuhalag orchin bas hool uneheer goy ...,5,"{'ih': 1, 'mgld': 1, 'bas': 1, 'idej': 1, ...",1
Oishii Ramen,jinhene Japan rameng bodwol arai ondoo ch ...,4,"{'hyamdarch': 1, 'blee': 1, 'arai': 1, 'ch': 1, ...",1
BBQ chicken Mongolia,Horoollin salbartaa anhaarlaa handulaacee ...,1,"{'bvr': 1, 'hutgaa': 1, 'halbaga': 1, 'tawga' ...",0
BBQ chicken Mongolia,Hogiin gazar. Zahialga ugii geheer dandaa uur ...,1,"{'oirhon': 1, 'guihimu': 1, 'ued': 1, 'manaih' ...",0
BBQ chicken Mongolia,Tahian shardag tosoo ern solidiimu tanaih. Arai l ...,1,"{'dahij': 1, 'heterhii': 1, 'zahialaltgui': 1, ...",0


In [20]:
# check test data 
test_data.head()

name,review,rating,word_count,sentiment
BBQ chicken Mongolia,Guurnii door bdag salbariin uilchilgee ...,5,"{'door': 1, 'taalagdlaa': 1, 'salbariin': 1, ...",1
Bojangles Grill Restaurant & Bar ...,goe yum,5,"{'goe': 1, 'yum': 1}",1
Burger King Mongolia,Burer king-d huuhduud maani durtai ger buleere ...,5,"{'ch': 1, 'tsew': 1, 'deer': 1, 'ghiin': 1, ...",1
Burger King Mongolia,burgernii chin baitsaa bol arai shuu. hool ...,1,"{'ve': 1, 'ideed': 1, 'baitsaa': 3, 'chin': 1, ...",0
Burger King Mongolia,"Bi l lav dahij Burger King orohgui, Amt ntr ...",1,"{'bagts': 1, 'burger': 1, 'aimshigtai': 1, ...",0
Food park shangrila mall,Endii pizza-g udur bolgn ideed ch uidkumaa�. ...,5,"{'hamt': 1, 'ch': 1, 'pizza-g': 1, 'ideed' ...",1
Horned Owl Food Symphony,Goy zuragnuud haraad nzuudiigaa urialaad ...,2,"{'butsaad': 1, 'garah': 1, 'hoolnii': 1, 'ih' ...",0


In [21]:
# logistic classifier is for binary values such as negative, positive, hotdog, not hotdog etc... 

sentiment_model = graphlab.logistic_classifier.create(train_data, 
                                                     target='sentiment',
                                                     features=['word_count'],
                                                     validation_set=test_data)

# Now let's evaluate the trained model 

In [22]:
# roc_curve metric is graphical plot for binary classifier

sentiment_model.evaluate(test_data, metric='roc_curve')

{'roc_curve': Columns:
 	threshold	float
 	fpr	float
 	tpr	float
 	p	int
 	n	int
 
 Rows: 100001
 
 Data:
 +-----------+-----+-----+---+---+
 | threshold | fpr | tpr | p | n |
 +-----------+-----+-----+---+---+
 |    0.0    | 1.0 | 1.0 | 4 | 3 |
 |   1e-05   | 1.0 | 1.0 | 4 | 3 |
 |   2e-05   | 1.0 | 1.0 | 4 | 3 |
 |   3e-05   | 1.0 | 1.0 | 4 | 3 |
 |   4e-05   | 1.0 | 1.0 | 4 | 3 |
 |   5e-05   | 1.0 | 1.0 | 4 | 3 |
 |   6e-05   | 1.0 | 1.0 | 4 | 3 |
 |   7e-05   | 1.0 | 1.0 | 4 | 3 |
 |   8e-05   | 1.0 | 1.0 | 4 | 3 |
 |   9e-05   | 1.0 | 1.0 | 4 | 3 |
 +-----------+-----+-----+---+---+
 [100001 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.}

In [23]:
sentiment_model.show(view='Evaluation')

# Use trained model to understand sentiment of other business reviews

In [24]:
# Most reviewed business is Burger King Mongolia. So let's try to understand reviews of Burger King Mongolia

In [25]:
business_reviews['predicted_sentiment'] = sentiment_model.predict(business_reviews, output_type='probability')

In [26]:
business_reviews.head()

name,review,rating,word_count,predicted_sentiment
BBQ chicken Mongolia,Horoollin salbartaa anhaarlaa handulaacee ...,1,"{'bvr': 1, 'hutgaa': 1, 'halbaga': 1, 'tawga' ...",1.36938385948e-19
BBQ chicken Mongolia,Hogiin gazar. Zahialga ugii geheer dandaa uur ...,1,"{'oirhon': 1, 'guihimu': 1, 'ued': 1, 'manaih' ...",9.882623800199999e-15
BBQ chicken Mongolia,Tahian shardag tosoo ern solidiimu tanaih. Arai l ...,1,"{'dahij': 1, 'heterhii': 1, 'zahialaltgui': 1, ...",0.0001795525193
BBQ chicken Mongolia,Ystoi muuhai haraajin tuuhii mahand n hurtel ...,1,"{'ch': 1, 'mahand': 1, 'shig': 1, 'gd': 1, ...",1.82925092366e-05
BBQ chicken Mongolia,Sous ntree hvn guisiim shg hdg blchjee vneheer ...,1,"{'guisiim': 1, 'hvn': 1, 'ee': 1, 'shgee': 1, ...",7.30063173384e-06
BBQ chicken Mongolia,Guurnii door bdag salbariin uilchilgee ...,5,"{'door': 1, 'taalagdlaa': 1, 'salbariin': 1, ...",0.329986932663
BBQ chicken Mongolia,goe tahia baina,3,"{'tahia': 1, 'goe': 1, 'baina': 1} ...",0.490027046584
BBQ chicken Mongolia,Chicken dukbab bol nice2 jhn hymd bval humuus ih ...,5,"{'daa': 1, 'nice2': 1, 'dukbab': 1, 'humuus' ...",0.999588244237
BBQ chicken Mongolia,bbg hamt olond ajiln amjilt orh durtai gazarn ...,5,"{'hamt': 1, 'ajiln': 1, 'bbg': 1, 'neg': 1, ...",0.999424896494


# Let's sort the reviews 

In [27]:
business_reviews = business_reviews.sort('predicted_sentiment', ascending=False)

In [28]:
business_reviews.head()

name,review,rating,word_count,predicted_sentiment
BBQ chicken Mongolia,Chicken dukbab bol nice2 jhn hymd bval humuus ih ...,5,"{'daa': 1, 'nice2': 1, 'dukbab': 1, 'humuus' ...",0.999588244237
BBQ chicken Mongolia,bbg hamt olond ajiln amjilt orh durtai gazarn ...,5,"{'hamt': 1, 'ajiln': 1, 'bbg': 1, 'neg': 1, ...",0.999424896494
BBQ chicken Mongolia,goe tahia baina,3,"{'tahia': 1, 'goe': 1, 'baina': 1} ...",0.490027046584
BBQ chicken Mongolia,Guurnii door bdag salbariin uilchilgee ...,5,"{'door': 1, 'taalagdlaa': 1, 'salbariin': 1, ...",0.329986932663
BBQ chicken Mongolia,Tahian shardag tosoo ern solidiimu tanaih. Arai l ...,1,"{'dahij': 1, 'heterhii': 1, 'zahialaltgui': 1, ...",0.0001795525193
BBQ chicken Mongolia,Ystoi muuhai haraajin tuuhii mahand n hurtel ...,1,"{'ch': 1, 'mahand': 1, 'shig': 1, 'gd': 1, ...",1.82925092366e-05
BBQ chicken Mongolia,Sous ntree hvn guisiim shg hdg blchjee vneheer ...,1,"{'guisiim': 1, 'hvn': 1, 'ee': 1, 'shgee': 1, ...",7.30063173384e-06
BBQ chicken Mongolia,Hogiin gazar. Zahialga ugii geheer dandaa uur ...,1,"{'oirhon': 1, 'guihimu': 1, 'ued': 1, 'manaih' ...",9.882623800199999e-15
BBQ chicken Mongolia,Horoollin salbartaa anhaarlaa handulaacee ...,1,"{'bvr': 1, 'hutgaa': 1, 'halbaga': 1, 'tawga' ...",1.36938385948e-19


# The most positive reviews for the business1 

In [29]:
index = 0
business_reviews[index]['review']
# print "machine rating:" + str(business_reviews[index]['predicted_sentiment'])

'Chicken dukbab bol nice2 jhn hymd bval humuus ih zahialna daa'

# The most negative reviews for the business1 

In [30]:
# -1 is the last element of list 
index = -1
business_reviews[index]['review']

'Horoollin salbartaa anhaarlaa handulaacee ymar es surthuun hvndll bhgv ih muhai ner toriig cn unagaasn salbar we. Hool n udna zoogc n udaan gants hool idh gj bi 5 6duudlaa brg svvldee oroo orto uilcleh shahsan . Zahialga dendvv udaana. Bvvr bj bj holo ogchood tawga oghgv halbaga hutgaa ogohgvv pizza ntr idh gheer bas l... bibvr ng ngeer n duudaj oroo hlj awsan. Tgd im ym gheer urdaas urlna ogh gj bna ntr ene cn yu gsn vg ym bee?? Iim haritsaatai bj blhuu??? Ajlaa hj cadhgv bgaag n halaacde tsaguur cn l ng ajil haisan gudamj metrlsn zaluus bna yah gj ungj ner toroo unagaan bj iim hvmvvs ajluuldag ym vneheer gomdoltou bna. Uilchilgee bvr muugin muu bsn.'