# AirBnB Review Topics

## Load Data

In [2]:
import pandas as pd
import numpy as np

from gensim.models.ldamulticore import LdaMulticore

  utils.PersistentlyDeprecated2018,


In [3]:
def display_results(results):
    """Print Full Topic Results"""
    for index, result in results:
        print(str(index) + ': ' + str(', '.join(result.split('"')[1::2])))
        
def display_topics(results, topics):
    """Display Selected and Labeled Topics"""
    for topic, topic_ids in topics.items():
        print('\n' + topic)
        
        for index, result in results:
            if index in topic_ids:
                print(str(index) + ': ' + str(', '.join(result.split('"')[1::2])))

# Full Review Tokens

Model built with full tokenization of reviews.

In [None]:
# Selected Topics
full_review_topics = {'Accuracy': [33],
                      'Cleanliness': [10,38,47],
                      'Checkin': [17,23],
                      'Communication': [7,27,],
                      'Location': [4,22,37,44],
                      'Transport': [34,35],
                      'Value': [26,30,41,49]}

In [137]:
# Load LDA
ldamodel_full = LdaMulticore.load('models/ldam_reviews_50topics_10words_50passes_full.model')

# Print Topics from Model
results_full = ldamodel_full.print_topics(num_topics=50, num_words=10)

In [145]:
# Display Topics
display_topics(results_full, full_review_topics)


Cleanliness
10: room, bathroom, private, bedroom, clean, kitchen, living, space, bed, shared
38: wonderful, host, stay, comfortable, clean, beautiful, home, location, lovely, would
47: nice, place, clean, really, super, room, stay, bed, house, comfortable

Communication
7: great, gave, local, tip, recommendation, host, city, area, helpful, provided
27: quick, question, always, quickly, respond, response, available, responded, message, john

Checkin
17: check, late, even, early, arrived, time, accommodating, let, flight, last
23: easy, check, communication, access, check-in, clean, super, communicate, made, instruction

Value
26: best, one, stayed, ever, place, airbnb, everything, 've, amazing, could
30: star, without, five, joe, easier, hesitation, recommending, joy, trader, leo
41: stay, back, place, definitely, would, time, come, next, trip, visit
49: recommend, would, place, highly, stay, definitely, great, clean, staying, anyone

Location
4: city, place, quiet, great, perfect, nei

# Non-Name Entities

Many of the groupings in the "Full Comments" model were people and place names which were not as important for categorizing reviews for the sake of this project so I identified and removed the name entities found in the initial tokens.

In [None]:
# Selected Topics
no_ne_topics = {'Accuracy': [],
                'Cleanliness': [15,27,42],
                'Checkin': [35,45],
                'Communication': [1,8,38],
                'Location': [12,25,30,32],
                'Transport': [3,9,22],
                'Value': [0,18,28,46]}

In [88]:
# Load LDA
ldamodel_non_ne = LdaMulticore.load('models/ldam_reviews_50topics_10words_50passes_no_ner.model')

# Print Topics
results_non_ne = ldamodel_non_ne.print_topics(num_topics=50, num_words=10)

In [107]:
# Display By Topics
display_topics(results_non_ne, no_ne_topics)


Cleanliness
15: room, clean, bathroom, private, nice, comfortable, shared, bed, spacious, guest
27: hotel, price, service, affordable, cleanliness, expect, amazingly, rate, compared, sense
42: nice, really, place, good, stay, enjoyed, clean, location, host, quiet

Communication
1: question, quick, always, quickly, respond, available, response, responded, message, help
8: great, gave, host, tip, recommendation, local, helpful, city, stay, went
38: last, minute, hidden, gem, booked, holiday, accommodate, lady, understanding, authentic

Checkin
35: flight, hosts, updated, checkin, checkout, larger, valley, delayed, noe, tasteful
45: easy, check, communication, check-in, described, exactly, clean, great, stay, location

Value
0: back, stay, come, definitely, place, next, would, time, great, love
18: best, experience, one, airbnb, first, place, ever, host, time, stayed
28: mission, value, district, heart, apt, star, money, san, five, right
46: recommend, would, stay, place, definitely, hig

# No Name Entities Plus Adjectives

After building models with different Parts of Speech I found that Adjectives were important in grouping the 

In [5]:
# Selected Topics
no_ne_plus_adj_topics = {'Accuracy': [26],
                         'Cleanliness': [18,43],
                         'Checkin': [11,34],
                         'Communication': [8,19,32,33,37],
                         'Location': [20,21,30,35,36],
                         'Transport': [6,41,44],
                         'Value': [22,27,28,46]}

In [131]:
# Load LDA
ldam_no_ne_plus_adj = LdaMulticore.load('models/ldam_reviews_50topics_10words_50passes_no_ner_plus_adj.model')

# Print Topics
results_non_ne_plus_adj = ldam_no_ne_plus_adj.print_topics(num_topics=50, num_words=10)

In [132]:
# Display Topics
display_topics(results_non_ne_plus_adj, no_ne_plus_adj_topics)


Cleanliness
18: comfortable, clean, convenient, bed, room, stay, place, location, space, everything
43: nice, clean, place, really, stay, room, house, host, neighborhood, everything

Communication
8: little, responsive, cozy, communicative, cheap, question, knowledgeable, space, host, spot
19: gave, provided, tip, simple, recommendation, fast, lot, information, suggestion, city
32: super, helpful, friendly, clean, cute, host, place, location, room, really
33: last, flexible, second, minute, positive, least, time, accommodated, communal, accommodating
37: quick, available, able, question, hospitable, always, respond, quickly, responded, response

Checkin
11: easy, check-in, check, access, get, made, communication, location, communicate, place
34: next, early, late, time, sweet, check, favorite, adorable, even, dog

Value
22: much, back, stay, come, place, would, definitely, enjoyed, future, thank
27: great, place, stay, location, host, would, time, definitely, really, everything
28: cl

# Other LDA I Tried

## Nouns

In [11]:
nouns_topics = {'Accuracy': [],
                'Cleanliness': [],
                'Checkin': [10,],
                'Communication': [6,11,42,43],
                'Location': [1,24,30],
                'Value': [23,25]}

In [12]:
# Load LDA
ldamodel_nouns = LdaMulticore.load('models/ldam_reviews_50topics_10words_50passes_nouns.model')

# Print Topics from Model
results_nouns = ldamodel_nouns.print_topics(num_topics=50, num_words=10)

In [13]:
# Display Topics
display_topics(results_nouns, nouns_topics)


Cleanliness

Communication
6: photo, mike, flat, amy, tony, communicator, email, thanks, pete, fast
11: michael, t, response, process, question, star, awesome, wa, booking, quick
42: responsive, rental, jonathan, share, game, app, date, knowledge, word, thought
43: check, thing, tip, recommendation, host, stay, room, information, lot, area

Checkin
10: unit, check-in, flight, door, luggage, breeze, hour, bag, stair, key

Value
23: location, great, host, value, stay, perfect, communication, room, accommodation, everything
25: highly, recommend, easy, jeff, host, destination, scott, immaculate, bob, wonderful

Location
1: place, location, host, sf, stay, everything, clean, time, nice, neighborhood
24: restaurant, place, shop, distance, location, bar, neighborhood, lot, food, street
30: city, place, uber, part, fran, access, town, area, transport, ride

Accuracy


## Adjectives

Good at Identifying Cleanliness and Value

In [6]:
# Selected Topics
adjective_topics = {'Accuracy': [],
                    'Cleanliness': [2,9,16,18,45], 
                    'Checkin': [19], 
                    'Communication': [7,11], 
                    'Location': [48], 
                    'Value': [14,28,33,42]}

In [9]:
# Load LDA
ldam_adj = LdaMulticore.load('models/ldam_reviews_50topics_5words_50passes_adjectives.model')

# Print Topics
results_adj = ldam_adj.print_topics(num_topics=50, num_words=5)

In [10]:
# Display Topics
display_topics(results_adj, adjective_topics)


Cleanliness
2: nice, clean, comfortable, small, few
9: stylish, clean, soft, reasonable, comfortable
16: cozy, peaceful, clean, accommodating, tidy
18: beautiful, enough, comfortable, delightful, clean
45: perfect, clean, neat, comfortable, outstanding

Communication
7: responsive, great, clean, helpful, comfortable
11: helpful, friendly, great, clean, welcoming

Checkin
19: check-in, flexible, prompt, less, muni

Value
14: accessible, communicative, walkable, ideal, affordable
28: ready, major, spectacular, recomendable, magical
33: next, incredible, fabulous, possible, lucky
42: quick, big, worth, clean, recommended

Location
48: easy, convenient, clean, public, comfortable

Accuracy


## Verbs

Not much information from just verbs

In [16]:
# Selected Topics
verbs_topics = {'Accuracy': [],
                'Cleanliness': [],
                'Checkin': [],
                'Communication': [1,37],
                'Location': [],
                'Value': [19]}

In [17]:
# Load LDA
ldam_verbs = LdaMulticore.load('models/ldam_reviews_50topics_10words_50passes_verbs.model')

# Print Topics
results_verbs = ldam_verbs.print_topics(num_topics=50, num_words=10)

In [18]:
# Display Topics
display_topics(results_verbs, verbs_topics)


Cleanliness

Communication
1: help, share, describe, invite, play, sightsee, read, serve, introduce, noise
37: communicate, bring, talk, furnish, mention, start, les, price, learn, pied

Checkin

Value
19: recommend, cook, choose, connect, perfect, finish, present, warm, organise, seek

Location

Accuracy
