In [1]:
import pandas as pd

df=pd.read_csv('./Restaurant_Reviews.tsv', sep='\t')

In [2]:
df.head()

Unnamed: 0,Review,Liked
0,Wow... Loved this place.,1
1,Crust is not good.,0
2,Not tasty and the texture was just nasty.,0
3,Stopped by during the late May bank holiday of...,1
4,The selection on the menu was great and so wer...,1


In [4]:
from textblob import TextBlob
import nltk
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

def pos_tag(text):
    try:
        return TextBlob(text).tags
    except:
        return None

df['pos'] = df['Review'].apply(pos_tag)

# df.to_csv('dataadj.csv', index=False)

[nltk_data] Downloading package punkt to
[nltk_data]     /afs/cs.stanford.edu/u/edjchen/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /afs/cs.stanford.edu/u/edjchen/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


In [5]:
def get_adjectives(text):
    blob = TextBlob(text)
    return [ word for (word,tag) in blob.tags if tag == "JJ"]

def get_adjective_noun_pairs(text):
    blob = TextBlob(text)
    collected = []
    
    for i, (word, tag) in enumerate(blob.tags):
        if tag == 'JJ':
            if i < len(blob.tags)-1 and blob.tags[i+1][1] == 'NN':
                collected.append((word, blob.tags[i+1][0]))
    return collected

df['adjectives'] = df['Review'].apply(get_adjectives)
df['adjective_plus_nouns'] = df['Review'].apply(get_adjective_noun_pairs)

In [6]:
df[:50]

Unnamed: 0,Review,Liked,pos,adjectives,adjective_plus_nouns
0,Wow... Loved this place.,1,"[(Wow, NNS), (Loved, VBN), (this, DT), (place,...",[],[]
1,Crust is not good.,0,"[(Crust, NNP), (is, VBZ), (not, RB), (good, JJ)]",[good],[]
2,Not tasty and the texture was just nasty.,0,"[(Not, RB), (tasty, JJ), (and, CC), (the, DT),...","[tasty, nasty]",[]
3,Stopped by during the late May bank holiday of...,1,"[(Stopped, VBN), (by, IN), (during, IN), (the,...",[late],[]
4,The selection on the menu was great and so wer...,1,"[(The, DT), (selection, NN), (on, IN), (the, D...",[great],[]
5,Now I am getting angry and I want my damn pho.,0,"[(Now, RB), (I, PRP), (am, VBP), (getting, VBG...",[angry],[]
6,Honeslty it didn't taste THAT fresh.),0,"[(Honeslty, NN), (it, PRP), (did, VBD), (n't, ...",[fresh],[]
7,The potatoes were like rubber and you could te...,0,"[(The, DT), (potatoes, NNS), (were, VBD), (lik...",[],[]
8,The fries were great too.,1,"[(The, DT), (fries, NNS), (were, VBD), (great,...",[great],[]
9,A great touch.,1,"[(A, DT), (great, JJ), (touch, NN)]",[great],"[(great, touch)]"


In [7]:
all_adjectives = []
for lst in df['adjectives'].tolist():
    all_adjectives += lst
    
all_adjective_noun_pairs = []
for lst in df['adjective_plus_nouns'].tolist():
    all_adjective_noun_pairs += lst

for i in range(len(all_adjective_noun_pairs)):
    all_adjective_noun_pairs[i] = ' '.join(list(all_adjective_noun_pairs[i]))

In [8]:
all_adjectives

['good',
 'tasty',
 'nasty',
 'late',
 'great',
 'angry',
 'fresh',
 'great',
 'great',
 'prompt',
 'sure',
 'human',
 'little',
 'slow',
 'worth',
 'cute',
 'beautiful',
 'right',
 'red',
 'good',
 'great',
 'friendly',
 'overwhelmed',
 'salmon',
 'decent',
 'final',
 'good',
 'quick',
 'familiar',
 'Overall',
 'only',
 'redeeming',
 'inexpensive',
 'Ample',
 'good',
 'stupid',
 'first',
 'good',
 'good',
 'sick',
 'positive',
 'attentive',
 'great',
 'only',
 'prime',
 'dessert',
 'bad',
 'damn',
 'good',
 'Greek',
 'tasty',
 'refreshing',
 'rare',
 'pink',
 'nice',
 'good',
 'horrible',
 'huge',
 'friendly',
 'great',
 'wonderful',
 'imaginative',
 'flat-lined',
 'much',
 'right',
 'ripped',
 'petrified',
 'delicious',
 'glad',
 'huge',
 'military',
 'great',
 'second',
 'amazing',
 'great',
 'great',
 'good',
 'common',
 'scallop',
 'sweet',
 'good',
 'seasoned',
 'second',
 'good',
 'much',
 'good',
 'other',
 'old',
 'sugary',
 'old',
 'poor',
 'chicken',
 'unsatisfying',
 'fresh

In [9]:
all_adjective_noun_pairs

['great touch',
 'human hair',
 'red velvet',
 'friendly staff',
 'salmon sashimi',
 'decent deal',
 'final blow',
 'quick place',
 'familiar pub',
 'redeeming quality',
 'first visit',
 'positive note',
 'great service',
 'only thing',
 'prime rib',
 'dessert section',
 'damn generic',
 'good beef',
 'Greek dressing',
 'nice char',
 'great food',
 'imaginative menu',
 'flat-lined excuse',
 'much seafood',
 'right amount',
 'ripped banana',
 'military discount',
 'great time',
 'second time',
 'great way',
 'good rice',
 'common sense',
 'scallop dish',
 'sweet potato',
 'second time',
 'good food',
 'other person',
 'old grease',
 'sugary disaster',
 'poor batter',
 'sexy party',
 'up side',
 'good food',
 'good thing',
 'fantastic afternoon',
 'great deal',
 'next trip',
 'REAL sushi',
 'actual experience',
 'Indian cuisine',
 'good sushi',
 'Phenomenal food',
 'next time',
 'vodka excellent',
 'good selection',
 'massive meatloaf',
 'delish tuna',
 'solid breakfast',
 'great menu',
