In [1]:
import json, ast
import pandas as pd
from django.utils.encoding import smart_str, smart_unicode
import requests_cache
import requests
import re
from bs4 import BeautifulSoup
from IPython.display import display
from ipywidgets import widgets
import sqlite3 as sql

In [2]:
requests_cache.install_cache("cache")

def modify(city):
    """
    modify the input
    
    Argument: city
    
    Return: soup from the url link
    """
    # capitalize the first letter of each word in a string
    city = city.title()
    # replace the empty space with the _
    city = city.strip().replace(' ', '_')
    url = "https://en.wikipedia.org/wiki/" + str(city) +",_California"
    info = requests.get(url).content
    wikisoup = BeautifulSoup(info, 'lxml')
    return wikisoup

In [3]:
def extract_overview(city):
    """
    extract a brief overview from the wikipedia page
    
    Argument: city
    
    Return: print the overview description
    """
    wikisoup = modify(city)
    content = wikisoup.findAll("p")
    if content[0].text == "California":
        brief = content[1].text
    else:
        brief = content[0].text
    print brief

In [4]:
def extract_info(city):
    """
    extract the basic information of the city
    
    Argument: city
    
    Return: a dataframe with the basic information
    """
    wikisoup = modify(city)
    data1 = wikisoup.findAll("tr", {"class": "mergedrow"})
    data = [dat.text for dat in data1]
    split = [dat.strip("\n") for dat in data]
    pd_info = pd.DataFrame(split)
    pd_info.rename(columns = {list(pd_info)[0]: 'info'}, inplace = True)

    #remove []
    pd_info['info'] = pd_info['info'].map(lambda x: re.sub("(\[).*?([\]])", "", x))
    #remove unwanted characters
    pd_info['info'] = pd_info['info'].map(lambda x: re.sub(ur'[\•]', ' ', x))
    pd_info['info'] = pd_info['info'].str.strip("\n")
    pd_info['info'] = pd_info['info'].str.strip()
    pd_info['info'] = pd_info['info'].str.replace("\n", " : ")
    pd_info['info'] = pd_info['info'].str.replace(" [: ]+", " : ")
    return pd_info

In [5]:
# create the interactive function to ask for user input
inputText = widgets.Text()

def modify_input(sender):
    outputText.value = "Welcome to The City of " + inputText.value.title() + "!"

inputText.on_submit(modify_input)
inputText


In [6]:
outputText = widgets.Text()
outputText

In [23]:
city = raw_input("Please enter the city that you want to visit: ")

Please enter the city that you want to visit: San Francisco


Here are some basic info of the city that you want to visit.

In [8]:
extract_overview(city)
extract_info(city)

San Francisco (SF) (/sæn frənˈsɪskoʊ/, Spanish for Saint Francis; Spanish: [san fran.ˈθis.ko]), officially the City and County of San Francisco, is the cultural, commercial, and financial center of Northern California. It is the birthplace of the United Nations.[23][24][25] Located at the north end of the San Francisco Peninsula, San Francisco is about 47.9 square miles (124 km2)[17] in area, making it the smallest county—and the only consolidated city-county[26]—within the state of California. With a density of about 18,451 people per square mile (7,124 people per km2), San Francisco is the most densely settled large city (population greater than 200,000) in California and the second-most densely populated major city in the United States after New York City.[27] San Francisco is the fourth-most populous city in California, after Los Angeles, San Diego, and San Jose, and the 13th-most populous city in the United States—with a census-estimated 2015 population of 864,816.[20] The city an

Unnamed: 0,info
0,"Motto: Oro en Paz, Fierro en Guerra (Spanish) ..."
1,State : California
2,CSA : San Jose–San Francisco–Oakland
3,Metro : San Francisco–Oakland–Hayward
4,"Incorporated : April 15, 1850"
5,Founded by : José Joaquin Moraga : Francisco P...
6,Named for : St. Francis of Assisi
7,Type : Mayor-council
8,Body : Board of Supervisors
9,Mayor : Edwin M. Lee (D)


### Please answer the following questions so that we can recommend the best restaurants, hotels, landmarks to you

### Let's find the best restaurants first!

In [87]:
res_cat = raw_input("What is your favorite food? ")

What is your favorite food? Japanese


In [88]:
res_rating = raw_input("What is your preferred range of rating for restaurants? ")

What is your preferred range of rating for restaurants? 3, 4


In [89]:
hotel_cat = raw_input("What kind of hotel do you prefer to stay in? For example, inns, hotels or resorts? ")

What kind of hotel do you prefer to stay in? For example, inns, hotels or resorts? inns


In [90]:
hotel_rating = raw_input("What is your preferred range of rating for hotels? ")

What is your preferred range of rating for hotels? 3, 4


In [91]:
land_cat = raw_input("What kind of landmarks are you looking for?")

What kind of landmarks are you looking for?clubs


In [None]:
land_rating = raw_input("What is your preferred range of rating for land? ")

In [92]:
def recommend_res(city, res_cat, res_rating):
    visit_city = city + ", CA"
    rat_1 = res_rating[0]
    rat_2 = res_rating[-1]
    conn = sql.connect("accommodation.sqlite")
    res_sql = '''SELECT categories, city, name, rating, review_count, snippet_text,url FROM Best_Restaurants WHERE city = '%s' 
                    AND categories = '%s' AND rating BETWEEN '%s' AND '%s' ''' %(visit_city, res_cat, rat_1, rat_2)
    restaurants = pd.read_sql(res_sql, conn)
    if restaurants.empty:
        print("We are sorry. We can not find a restaurant that meets your criterias in our database. Please try a different kinds of food or rating. ")
    else:
        return restaurants

In [93]:
# Find the best restaurants
recommend_res(city, res_cat, res_rating)

Unnamed: 0,categories,city,name,rating,review_count,snippet_text,url
0,Japanese,"San Francisco, CA",Kui Shin Bo,4.0,478,At this point the wife and I have been out of ...,https://www.yelp.com/biz/kui-shin-bo-san-franc...


In [105]:
# how to pass a parameter in for the like
def recommend_hotel(city, hotel_cat, res_rating):
    visit_city = city + ", CA"
    rat_1 = hotel_rating[0]
    rat_2 = hotel_rating[-1]
    conn = sql.connect("accommodation.sqlite")
    hotel_sql = '''SELECT categories, city, name, rating, review_count, snippet_text,url FROM Best_Hotels WHERE city = '%s' 
                    AND categories LIKE '%'+%s+'%' AND rating BETWEEN '%s' AND '%s' ''' %(visit_city, hotel_cat, rat_1, rat_2)
    hotels = pd.read_sql(hotel_sql, conn)
    if hotels.empty:
        print("We are sorry. We can not find a hotel that meets your criterias in our database. Please try a different kinds of hotel category or rating. ")
    else:
        return hotels

In [104]:
recommend_hotel(city, hotel_cat, res_rating)

ERROR:root:An unexpected error occurred while tokenizing input
The following traceback may be corrupted or invalid
The error message is: ('EOF in multi-line string', (1, 84))



ValueError: unsupported format character ''' (0x27) at index 148

In [108]:
hotel_sql = '''SELECT categories, city, name, rating, review_count, snippet_text,url FROM Best_Hotels WHERE city = '%s' 
                    AND rating BETWEEN '%s' AND '%s' ''' %(visit_city, rat_1, rat_2)

In [110]:
hotels = pd.read_sql(hotel_sql, conn)

DatabaseError: Execution failed on sql 'SELECT categories, city, name, rating, review_count, snippet_text,url FROM Best_Hotels WHERE city = 'San Francisco, CA' 
                    AND rating BETWEEN '3' AND '2' ': no such table: Best_Hotels

In [None]:
AND categories LIKE '%'+%s+'%' 

In [None]:
conn.close()