<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Import-libraries-and-packages" data-toc-modified-id="Import-libraries-and-packages-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Import libraries and packages</a></span><ul class="toc-item"><li><span><a href="#Load-in-review-data" data-toc-modified-id="Load-in-review-data-1.1"><span class="toc-item-num">1.1&nbsp;&nbsp;</span>Load in review data</a></span></li><li><span><a href="#Clean-data" data-toc-modified-id="Clean-data-1.2"><span class="toc-item-num">1.2&nbsp;&nbsp;</span>Clean data</a></span></li></ul></li><li><span><a href="#add-user-inputs-into-dataframe" data-toc-modified-id="add-user-inputs-into-dataframe-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>add user inputs into dataframe</a></span></li><li><span><a href="#Combine-dataframes" data-toc-modified-id="Combine-dataframes-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Combine dataframes</a></span></li><li><span><a href="#create-separate-dataframes-for-places-and-users-after-applying-cosine-similarity" data-toc-modified-id="create-separate-dataframes-for-places-and-users-after-applying-cosine-similarity-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>create separate dataframes for places and users after applying cosine similarity</a></span></li><li><span><a href="#Recommender-functions:" data-toc-modified-id="Recommender-functions:-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Recommender functions:</a></span></li><li><span><a href="#Example-of-top-recommendations-for-a-place" data-toc-modified-id="Example-of-top-recommendations-for-a-place-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Example of top recommendations for a place</a></span><ul class="toc-item"><li><span><a href="#Plotly" data-toc-modified-id="Plotly-6.1"><span class="toc-item-num">6.1&nbsp;&nbsp;</span>Plotly</a></span></li></ul></li></ul></div>

# Import libraries and packages

In [8]:
import glob
import os

import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import pairwise_distances

import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot
import cufflinks as cf
import matplotlib.pyplot as plt 
import seaborn as sns 
  
sns.set_style('white') 
%matplotlib inline 

## Load in review data

In [9]:
df = pd.concat([pd.read_csv(file, index_col=0) for file in glob.glob('./reviews/text_*.csv')])

## Clean data

In [10]:
df = df[df['rating'] != 'no rating']
df = df[~df['place_name'].str.contains('-- CLOSED')]
df = df[~df['category'].str.contains('City')]
df['rating'] = df['rating'].map(lambda x: int(x.split(' ')[0]))
df = df.reset_index(drop=True)

In [11]:
df

Unnamed: 0,place_name,category,rating,user_name,review_text
0,Ann and Roy Butler Hike and Bike Trail,Attraction,5,what-moms-think,This Hike or Bike trail is just wonderful! The...
1,Mozart's Coffee Roasters,Restaurant,5,what-moms-think,Mozart's is a great place to sit by the beauti...
2,McKinney Falls State Park,Attraction,5,what-moms-think,Although it’s Fall in Austin right now and the...
3,Waller Creek,Attraction,5,what-moms-think,The art show at Waller Creek does not disappoi...
4,Vandegrift High School,Attraction,5,what-moms-think,This local highschool holds some amazing commu...
5,Steiner Ranch Hiking Trails,Attraction,5,what-moms-think,"22 Miles of pristine trails, streams, swimming..."
6,Austin City Limits Music Festival,Attraction,5,what-moms-think,My experience at ACL the last 2 yrs has been i...
7,Mayfield Park Cottage and Gardens,Attraction,5,what-moms-think,Nice hiking and walking grounds for families a...
8,360 Bridge Lookout,Attraction,5,what-moms-think,There’s a little place to pull your car off th...
9,Restoration Hardware,Attraction,5,what-moms-think,Just WOW! This giant 4 story building has it a...


In [12]:
df.dtypes

place_name     object
category       object
rating          int64
user_name      object
review_text    object
dtype: object

# add user inputs into dataframe

In [13]:
def add_new_user(un='alex_nguyen', rt='no text'):
    rows_to_add = int(input("Enter the number of rows to add:"))

    for num in range(rows_to_add):
        places = input('Enter name of place: ')
        cat = input('Enter category Type: ')
        rat = input('Enter rating: ')
        #un = input('Enter username')
        #rt = input('Enter review')
        #df1 = pd.DataFrame(data=[[places,cat,rat, un, rt]],columns=['place_name', 'category', 'rating', 'user_name', 'review_text'])
        df1.loc[len(df1)] = [places,cat,rat, un, rt]
#     #df1 = add_new_user()
#     df = pd.concat([df,df1], axis=0)
#     df.index = range(len(df.index))
#     df['rating'] = df['rating'].astype(int)

In [14]:
df1 = pd.DataFrame(columns=['place_name', 'category', 'rating', 'user_name', 'review_text'])
add_new_user()

Enter the number of rows to add:2
Enter name of place: Uchi
Enter category Type: Restaurant
Enter rating: 5
Enter name of place: Zilker Metropolitan Park
Enter category Type: Attraction
Enter rating: 5


In [15]:
df1

Unnamed: 0,place_name,category,rating,user_name,review_text
0,Uchi,Restaurant,5,alex_nguyen,no text
1,Zilker Metropolitan Park,Attraction,5,alex_nguyen,no text


# Combine dataframes

In [16]:
df = pd.concat([df,df1], axis=0)
df.index = range(len(df.index))
df['rating'] = df['rating'].astype(int)

In [17]:
df.tail()

Unnamed: 0,place_name,category,rating,user_name,review_text
5281,Mayfield Park and Nature Preserve,Attraction,5,teresa-d_4,peacocks roam the park and trails. fantastic p...
5282,Mt Bonnell,Attraction,5,teresa-d_4,fantastic views of lake austin. walk up quite ...
5283,McKinney Falls State Park,Attraction,5,teresa-d_4,beautiful natural falls and hiking trails loca...
5284,Uchi,Restaurant,5,alex_nguyen,no text
5285,Zilker Metropolitan Park,Attraction,5,alex_nguyen,no text


In [18]:
df.pivot_table(values='rating', index ='place_name', columns = 'user_name')

user_name,alan-g_5,alex_nguyen,alexandra-richmond,alice-chase,alicia-moylan,allan-z,amit-anandwala,amy-simoes,andi-w,andrew-w_3,...,travis-katz,trinidad-m_2,tyson-bramer,van-le,vicki-t,vivian-c,wanderlustoneer,wendy-p_6,what-moms-think,yelena-konetchy
place_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Comics & More,,,,,,,,,,,...,,,,,,,,,,
(512) Brewing Company,,,,,,,,,,,...,,,,,,,,,,
***Reduced! Heart of SXSW***; 3BR 2BA Downtown Home Easy Walk to Most Everything,,,,,,,,,,,...,,,,,,,,,,
"**SoCo BUNGALOW** Modern House Downtown, 3 bedroom/2 bath, sleeps 8",,,,,,,,,,,...,,,,,,,,,,
"1 World of Tennis Sq Austin, TX 78738-1101 United States",,,,,,,,,,,...,,,,,,,,,3.0,
1110 Carriage House Inn,,,,,,,,,,,...,,,,,,,,,,
15 Locks,,,,,,,,,,,...,,,,,,,,,,
1886 Cafe & Bakery,,,,,,,,,,,...,4.0,,,,,,,,,
24 Diner,5.0,,,,,,,,5.0,,...,4.0,,,5.0,,,5.0,,,
360 Bridge,,,,,,,,,,,...,,,,,,,,,5.0,


# create separate dataframes for places and users after applying cosine similarity

In [19]:
# create pivot tables
places_pivot = df.pivot_table(values='rating', index ='place_name', columns = 'user_name')
user_pivot = df.pivot_table(values='rating', index ='place_name', columns = 'user_name').T

# create sparse matrixes
places_sparse = sparse.csr_matrix(places_pivot.fillna(0))
user_sparse = sparse.csr_matrix(user_pivot.fillna(0))

# create pairwise distances
places_recommender = pairwise_distances(places_sparse, metric='cosine')
user_recommender = pairwise_distances(user_sparse, metric='cosine')

# create dataframes after performing pairwise distance using cosine similarity
places_recommender_df = pd.DataFrame(places_recommender, index=places_pivot.index, columns=places_pivot.index)
user_recommender_df = pd.DataFrame(user_recommender, index=user_pivot.index, columns=user_pivot.index)

# Recommender functions:
- similar places
- similar users
- combined

In [20]:
def place_recommender(places_recommender_df):
    search = input("Enter the name of place\n")
    print('')
    try:
        for place in df[df['place_name'].str.contains(search)]['place_name']:
            print('Average Rating', round(df[df['place_name'] == place]['rating'].mean(),4))
            print('Number of Rating', df[df['place_name'] == place].shape[0])
            print('')
            print('Recommendations')
            display(places_recommender_df[place].sort_values()[1:11])
            print('')
    except:
        print("Please check the place name")

In [21]:
def user_recommender():
    search = input("Enter username\n")
    print ('')
    try:
        for name in df[df['user_name'].str.contains(search)]['user_name']:
            print('Average Rating', round(df[df['user_name'] == name]['rating'].mean(),4))
            print('Number of Reviews', df[df['user_name'] == name].shape[0])
            print('')
            print('Recommendations')
            display(user_recommender_df[name].sort_values()[1:11])
            print('')
    except:
        print("Please check the username")

In [22]:
def recommender(places_recommender_df, user_recommender_df):
    while True:
        Query = input("\nSelect one or type 'done' to quit:\n 1) Recommendations for similar users \n 2) Recommendations for similar places\n")
        if Query == '1':
            try:
                user_recommender()
            except:
                os.system('clear')
                print("Please check the username")

        elif Query == '2':
            try:
                place_recommender()

            except:
                print("please check the place name")

        elif Query == 'done':

            break

        else:
            print("please try again")

In [23]:
recommender(places_recommender_df, user_recommender_df)


Select one or type 'done' to quit:
 1) Recommendations for similar users 
 2) Recommendations for similar places
1
Enter username
alex_nguyen

Average Rating 5.0
Number of Reviews 2

Recommendations


user_name
angie-m_9        0.532731
david-k_25       0.539520
shelby-s         0.628846
ashley-d         0.646888
satish-talim     0.661357
trinidad-m_2     0.696551
holly-w_4        0.699035
kathleen-r_13    0.702632
richard-james    0.709381
shonda           0.711325
Name: alex_nguyen, dtype: float64


Average Rating 5.0
Number of Reviews 2

Recommendations


user_name
angie-m_9        0.532731
david-k_25       0.539520
shelby-s         0.628846
ashley-d         0.646888
satish-talim     0.661357
trinidad-m_2     0.696551
holly-w_4        0.699035
kathleen-r_13    0.702632
richard-james    0.709381
shonda           0.711325
Name: alex_nguyen, dtype: float64



Select one or type 'done' to quit:
 1) Recommendations for similar users 
 2) Recommendations for similar places
done


In [24]:
df[df['user_name'] == 'shonda'].head(20)

Unnamed: 0,place_name,category,rating,user_name,review_text
4671,Rainey Street,Attraction,5,shonda,no text
4672,Franklin Barbecue,Restaurant,5,shonda,no text
4673,6th Street,Attraction,5,shonda,no text
4674,Texas State Capitol,Attraction,5,shonda,no text
4675,Zilker Metropolitan Park,Attraction,5,shonda,no text
4676,Barton Springs Pool,Attraction,5,shonda,no text


# Example of top recommendations for a place

In [25]:
places_recommender_df['Uchi'] # has to be exact name of the place

place_name
 Comics & More                                                                      1.000000
(512) Brewing Company                                                               1.000000
***Reduced! Heart of SXSW***; 3BR 2BA Downtown Home Easy Walk to Most Everything    1.000000
**SoCo BUNGALOW** Modern House Downtown, 3 bedroom/2 bath, sleeps 8                 0.784635
1 World of Tennis Sq Austin, TX  78738-1101 United States                           1.000000
1110 Carriage House Inn                                                             1.000000
15 Locks                                                                            1.000000
1886 Cafe & Bakery                                                                  0.873967
24 Diner                                                                            0.769078
360 Bridge                                                                          1.000000
360 Bridge Lookout                                         

## Plotly

In [26]:
# cf.go_offline()
# # Set the global them 
# cf.set_config_file(world_readable=True, theme='pearl', offline=True)

In [27]:
# df.pivot(columns='place_name',values='rating').iplot(kind='scatter',xTitle='Places',yTitle='Ratings')

In [28]:
# trace0 = go.Scatter(x=df['place_name'],y=df['rating'],name='Places')
# trace1 = go.Scatter(x=df['user_name'], y=df['rating'],name='Users')

In [29]:
# trace1 = go.Scatter(x=df['user_name'], y=df['rating'],name='Users',
#     mode = 'markers', 
#     marker = dict(
#         color = '#FFBAD2',
#         line = dict(width = 1)
#     )
# )
# data = [trace1]

# py.iplot(data)

In [30]:
# df['rating'].mean()

In [31]:
# trace0 = go.Histogram(
#     x=df['user_name'], y=df['rating'].mean(),name='Users'
# )
# trace1 = go.Histogram(
#     x=df['place_name'],y=df['rating'].mean(),name='Places'
# )
# data = [trace0, trace1]
# layout = go.Layout(barmode='stack')
# fig = go.Figure(data=data, layout=layout)

# py.iplot(fig)

In [32]:
# trace1 = go.Scatter(x=df['user_name'], y=df['rating'],name='Users',
#     mode='markers',
#     marker=dict(
#         size=16,
#         color = np.random.randn(6000), #set color equal to a variable
#         colorscale='Viridis',
#         showscale=True
#     )
# )
# data = [trace1]

# py.iplot(data)

In [33]:
# data = [trace0,trace1]
# #layout = go.Layout(barmode='group')

# fig = go.Figure(data=data, layout=layout)
# py.iplot(fig)
# py.iplot(fig, filename='grouped-bar')

In [34]:
# df.iplot(kind='scatter',title='Ratings of Places',xTitle='Places',yTitle='Ratings')

In [35]:
# df.iplot(kind='hist',title='Ratings of Places',xTitle='Ratings',yTitle='Places')