# Yelp Restaurant Data NY

#### Final variables/df being used: 

1. __new_list__ for list of yelp menu page
2. __final_list__ for cleaned list of yelp restaurant site url
3. __df_list__ for dataframe of information gathered from yelp restaurant where each row is a unique restaurant
4. __df_insp__ for dataframe of health inspection information gathered from NYC OpenData where each row is a unique restaurant


In [157]:
import re


In [186]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [230]:
# generate random page numbers to scrape on yelp
np.random.seed(9001)
random_list = np.random.randint(0,100,size=50)
random_list 

array([57, 37, 30, 71, 39, 62, 88, 57, 85, 61, 75, 86, 87, 14, 13, 80, 88,
       75, 88, 80, 61,  2, 43, 83,  4, 55,  3, 61, 33, 54, 33, 68, 37, 65,
       49, 67, 46,  8, 45, 44, 91, 15, 23, 17, 94, 83, 28, 54, 47, 21])

In [231]:
# multiply by 10 to get page number by 10s (how yelp is designed)
random_list = random_list*10
random_list

array([570, 370, 300, 710, 390, 620, 880, 570, 850, 610, 750, 860, 870,
       140, 130, 800, 880, 750, 880, 800, 610,  20, 430, 830,  40, 550,
        30, 610, 330, 540, 330, 680, 370, 650, 490, 670, 460,  80, 450,
       440, 910, 150, 230, 170, 940, 830, 280, 540, 470, 210])

In [232]:
# create list of urls to scrape 300 restaurants
new_list = []
for k in random_list:
    new_list.append('http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=' + str(k))
    
new_list

['http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=570',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=370',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=300',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=710',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=390',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=620',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=880',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=570',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=850',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=610',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=750',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+Y

In [233]:
new_list = set(list(new_list))
new_list

{'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=130',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=140',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=150',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=170',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=20',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=210',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=230',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=280',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=30',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=300',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=330',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+Yor

In [234]:
# grab each url in the menu page url
final_url_list =[]
for i in new_list:
    tmp = requests.get(i).text
    tmp = BeautifulSoup(tmp)
    
    url_list = []
    
    for a in tmp.select('a[href^="/biz/"]'):
        url_list.append(a['href'])
    url_list = sorted(set(url_list),key=url_list.index)  
    string = 'http://www.yelp.com'
    string2 = '?sort_by=date_desc'
    url_list = [string + x + string2 for x in url_list]
    final_url_list.append(url_list)

In [235]:
final_url_list = reduce(lambda x, y: x + y, final_url_list, [])


In [236]:
final_url_list

['http://www.yelp.com/biz/gristmill-brooklyn?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/gristmill-brooklyn?hrid=_FJs0VwSIX3dnifvy2ewWA&osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/surfish-peruvian-bistro-brooklyn-2?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/surfish-peruvian-bistro-brooklyn-2?hrid=J9Bet9-SEkNXSZ30Ia2pDA&osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/kilo-astoria-astoria?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/kilo-astoria-astoria?hrid=GbPfBFPwCOO2JVB-Onah2A&osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/casa-del-chef-woodside?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/casa-del-chef-woodside?hrid=eXh2FOXJiQ__iHxfD3xJfQ&osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/le-fond-brooklyn?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/le-fond-brooklyn?hrid=NiQNX-JCvFW6nARyoVnbww&osq=Restaurants?sort_by=date_desc',
 'http://w

In [237]:
# get rid of duplicate urls with longer id in them 
word = 'hrid='
final_list = []
for k in final_url_list:
    if word not in k:
        final_list.append(k)
    
        

In [238]:
final_list

['http://www.yelp.com/biz/gristmill-brooklyn?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/surfish-peruvian-bistro-brooklyn-2?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/kilo-astoria-astoria?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/casa-del-chef-woodside?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/le-fond-brooklyn?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/cookshop-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/izakaya-mew-new-york-3?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/char-sue-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/simple-nyc-new-york-3?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/joju-elmhurst?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/haymaker-bar-and-kitchen-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/freemans-new-york-3?osq=Restaurants?sort_by=date

In [239]:
df_list = pd.DataFrame(columns = ['restaurant'])
for k in final_list:        
        reviews = requests.get(k).text
        reviews = BeautifulSoup(reviews)
        restname1 = reviews.find('h1').get_text().strip()
        rating_value = reviews.find_all('meta', {'itemprop': 'ratingValue'})
        rating_string = [tag['content'] for tag in rating_value]
        rating_string = rating_string[0]
        review_count = reviews.find('span', {'itemprop': 'reviewCount'}).get_text().strip()
        price_range = reviews.find_all('meta', {'itemprop': 'priceRange'})
        price_string = [tag['content'] for tag in price_range]
        price_string = price_string[0]
        address_locality = reviews.find('span', {'itemprop': 'addressLocality'}).get_text().strip()
        category = reviews.find('span', {'class': 'category-str-list'}).get_text().strip()
        postal_code = reviews.find('span', {'itemprop': 'postalCode'}).get_text().strip()
        phone = reviews.find('span', {'itemprop': 'telephone'}).get_text().strip()     
        phone = "".join(_ for _ in phone if _ in "1234567890") 
        
        df = zip('0')
        df = pd.DataFrame(df)
        df['restaurant'] = restname1
        df['rating_value'] = rating_string
        df['review_count'] = review_count
        df['price_range'] = price_string
        df['area'] = address_locality
        df['category'] = category
        df['postal_code'] = postal_code
        df['phone'] = phone

        df_list = df_list.append(df)

In [240]:
df_list

Unnamed: 0,0,area,category,phone,postal_code,price_range,rating_value,restaurant,review_count
0,0,Brooklyn,"Pizza,\n American (New),\n ...",7184992424,11215,$11-30,4.5,Gristmill,94
0,0,Brooklyn,"Latin American,\n Peruvian",7187888070,11215,$11-30,4.0,Surfish Peruvian Bistro,158
0,0,Astoria,"Brazilian,\n Steakhouses",7186069300,11105,$11-30,4.5,Kilo Astoria,21
0,0,Woodside,"American (New),\n Desserts,...",7184579000,11377,$11-30,4.5,Casa Del Chef,146
0,0,Brooklyn,French,7183896859,11222,$11-30,4.5,Le Fond,99
0,0,New York,"American (New),\n Breakfast...",2129244440,10011,$11-30,4.0,Cookshop,1577
0,0,New York,"Sushi Bars,\n Izakaya,\n ...",6463689384,10001,$11-30,4.0,Izakaya MEW,1339
0,0,New York,"Asian Fusion,\n Pan Asian,\...",6464849241,10002,$11-30,5.0,Char Sue,17
0,0,New York,"Japanese,\n Hawaiian,\n ...",6468708292,10002,$11-30,4.5,Simple NYC,198
0,0,Elmhurst,"Vietnamese,\n Asian Fusion",3478080887,11373,Under $10,4.5,JoJu,1174


In [241]:
df_list.tail()

Unnamed: 0,0,area,category,phone,postal_code,price_range,rating_value,restaurant,review_count
0,0,Woodside,Filipino,3477249586,11377,$11-30,4.5,Papa’s Kitchen,164
0,0,Brooklyn,"Cocktail Bars,\n Italian,\n...",3475839961,11211,$11-30,4.5,Naive,35
0,0,New York,American (New),2128102880,10017,$11-30,4.0,Mulberry & Vine,8
0,0,New York,Seafood,2127471700,10004,$11-30,4.5,Luke’s Lobster FiDi,778
0,0,New York,"Korean,\n Barbeque",2124410005,10001,$11-30,4.0,Five Senses,623


In [242]:
df_list.count(0)

0               393
area            393
category        393
phone           393
postal_code     393
price_range     393
rating_value    393
restaurant      393
review_count    393
dtype: int64

In [243]:
df_list.describe()

Unnamed: 0,0,area,category,phone,postal_code,price_range,rating_value,restaurant,review_count
count,393,393,393,393.0,393,393,393.0,393,393
unique,1,15,301,381.0,75,5,5.0,390,279
top,0,New York,American (New),,11215,$11-30,4.0,Xi’an Famous Foods,8
freq,393,180,16,10.0,21,295,230.0,2,6


# Inspection Rating Data


In [275]:
df_insp = pd.read_csv("inspection_data.csv")

In [276]:
df_insp

Unnamed: 0,phone,DBA,BORO,STREET,ZIPCODE,CUISINE DESCRIPTION,Score Count,Score Recent,Score Max,Score Min,...,Grade B Count,Grade C Count,Not Yet Graded,P Count,Z Count,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22
0,0,SUITE PANTRY SUITE 30,BRONX,EAST 161 STREET,10451,American,3,0.0,2,0,...,0,0,0,0,0,,,,,
1,1646644665,STATUS Q,BROOKLYN,3RD AVE,11209,Hotdogs,6,11.0,22,11,...,0,0,0,0,0,,,,,
2,1646670060,HONEY BADGER IN LEFFERTS GARDENS,BROOKLYN,FENIMORE ST,11225,American,1,5.0,5,5,...,0,0,1,0,0,,,,,
3,1729986806,TOUS LES JOURS,QUEENS,PRINCE ST,11354,Bakery,3,12.0,12,5,...,0,0,0,0,0,,,,,
4,2012054054,ATO,MANHATTAN,GRAND ST,10013,Other,4,32.0,32,32,...,0,0,4,0,0,,,,,
5,2012207599,MORRIS HEIGHTS PIZZA,BRONX,UNIVERSITY AVE,10453,American,10,28.0,28,3,...,0,0,0,0,0,,,,,
6,2012335992,CHARLEY'S GRILLED SUBS,QUEENS,QUEENS BLVD,11373,Sandwiches/Salads/Mixed Buffet,5,9.0,13,9,...,0,0,0,0,0,,,,,
7,2012565165,HOUSE OF THAI,QUEENS,40TH AVE,11101,Thai,9,8.0,24,2,...,0,0,0,0,0,,,,,
8,2013042844,ALPHAVILLE,BROOKLYN,WILSON AVE,11237,American,31,26.0,51,7,...,5,0,0,0,0,,,,,
9,2014031014,CHARLEYS PHILLY STEAK,BRONX,BAYCHESTER AVE,10475,American,5,17.0,17,13,...,3,0,0,0,0,,,,,


In [277]:
df_insp = df_insp.drop(df_insp.columns[[18,19,20,21,22]], axis=1)
df_insp

Unnamed: 0,phone,DBA,BORO,STREET,ZIPCODE,CUISINE DESCRIPTION,Score Count,Score Recent,Score Max,Score Min,Score Average,Score StdDev,Grade A Count,Grade B Count,Grade C Count,Not Yet Graded,P Count,Z Count
0,0,SUITE PANTRY SUITE 30,BRONX,EAST 161 STREET,10451,American,3,0.0,2,0,0.666667,1.154700538,3,0,0,0,0,0
1,1646644665,STATUS Q,BROOKLYN,3RD AVE,11209,Hotdogs,6,11.0,22,11,18.333333,5.680375574,2,0,0,0,0,0
2,1646670060,HONEY BADGER IN LEFFERTS GARDENS,BROOKLYN,FENIMORE ST,11225,American,1,5.0,5,5,5.000000,#DIV/0!,0,0,0,1,0,0
3,1729986806,TOUS LES JOURS,QUEENS,PRINCE ST,11354,Bakery,3,12.0,12,5,9.666667,4.041451884,2,0,0,0,0,0
4,2012054054,ATO,MANHATTAN,GRAND ST,10013,Other,4,32.0,32,32,32.000000,0,0,0,0,4,0,0
5,2012207599,MORRIS HEIGHTS PIZZA,BRONX,UNIVERSITY AVE,10453,American,10,28.0,28,3,18.600000,9.143303561,2,0,0,0,0,0
6,2012335992,CHARLEY'S GRILLED SUBS,QUEENS,QUEENS BLVD,11373,Sandwiches/Salads/Mixed Buffet,5,9.0,13,9,11.400000,2.19089023,5,0,0,0,0,0
7,2012565165,HOUSE OF THAI,QUEENS,40TH AVE,11101,Thai,9,8.0,24,2,16.444444,9.193898943,3,0,0,0,0,0
8,2013042844,ALPHAVILLE,BROOKLYN,WILSON AVE,11237,American,31,26.0,51,7,29.774194,14.54810338,6,5,0,0,0,0
9,2014031014,CHARLEYS PHILLY STEAK,BRONX,BAYCHESTER AVE,10475,American,5,17.0,17,13,15.400000,2.19089023,0,3,0,0,0,0


In [288]:
df_yelp = df_list.merge(df_insp,on='phone')

In [289]:
df_yelp

Unnamed: 0,0,area,category,phone,postal_code,price_range,rating_value,restaurant,review_count,DBA,...,Score Max,Score Min,Score Average,Score StdDev,Grade A Count,Grade B Count,Grade C Count,Not Yet Graded,P Count,Z Count
0,0,Brooklyn,"Pizza,\n American (New),\n ...",7184992424,11215,$11-30,4.5,Gristmill,94,GRISTMILL,...,18,13,16.125000,2.587745848,3,0,0,0,0,0
1,0,Brooklyn,"Latin American,\n Peruvian",7187888070,11215,$11-30,4.0,Surfish Peruvian Bistro,158,SURFISH BISTRO,...,9,0,6.000000,4.242640687,4,0,0,0,0,0
2,0,Astoria,"Brazilian,\n Steakhouses",7186069300,11105,$11-30,4.5,Kilo Astoria,21,KILO ASTORIA,...,8,8,8.000000,#DIV/0!,0,0,0,1,0,0
3,0,Woodside,"American (New),\n Desserts,...",7184579000,11377,$11-30,4.5,Casa Del Chef,146,CASA DEL CHEF BISTRO,...,21,0,13.555556,6.109031137,6,0,0,0,0,0
4,0,Brooklyn,French,7183896859,11222,$11-30,4.5,Le Fond,99,LE FOND,...,19,5,14.750000,4.482421808,7,4,0,0,0,0
5,0,New York,"American (New),\n Breakfast...",2129244440,10011,$11-30,4.0,Cookshop,1577,10TH AVENUE COOKSHOP,...,27,9,15.722222,5.406932646,12,8,0,0,0,0
6,0,New York,"Sushi Bars,\n Izakaya,\n ...",6463689384,10001,$11-30,4.0,Izakaya MEW,1339,MEW,...,35,17,24.813953,6.103300489,0,15,6,0,0,0
7,0,New York,"Japanese,\n Hawaiian,\n ...",6468708292,10002,$11-30,4.5,Simple NYC,198,SIMPLE ELDRIDGE,...,4,2,3.333333,1.154700538,2,0,0,0,0,0
8,0,Elmhurst,"Vietnamese,\n Asian Fusion",3478080887,11373,Under $10,4.5,JoJu,1174,JOJU,...,65,0,31.233333,21.90840389,8,3,0,0,0,0
9,0,New York,"Gastropubs,\n Beer Bar",6464298237,10001,$11-30,4.0,Haymaker Bar and Kitchen,163,HAYMAKER BAR & KITCHEN,...,32,7,20.416667,10.79948091,4,0,0,0,0,0


In [290]:
df_yelp.describe()

Unnamed: 0,ZIPCODE,Score Count,Score Recent,Score Max,Score Min,Score Average,Grade A Count,Grade B Count,Grade C Count,Not Yet Graded,P Count,Z Count
count,272.0,272.0,272.0,272.0,272.0,272.0,272.0,272.0,272.0,272.0,272.0,272.0
mean,10718.400735,14.308824,11.367647,23.492647,6.897059,15.74622,5.595588,1.180147,0.272059,0.150735,0.029412,0.018382
std,593.206707,10.479651,6.812522,13.045193,5.337024,7.346834,3.492488,2.570176,1.142709,0.656559,0.225371,0.30317
min,10001.0,1.0,0.0,2.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,10016.75,5.0,8.0,13.0,4.0,10.653846,2.0,0.0,0.0,0.0,0.0,0.0
50%,11201.0,13.0,11.0,22.0,7.0,14.696429,6.0,0.0,0.0,0.0,0.0,0.0
75%,11219.25,22.0,13.0,30.0,9.0,19.166667,8.0,0.0,0.0,0.0,0.0,0.0
max,11385.0,49.0,59.0,69.0,45.0,48.555556,15.0,17.0,6.0,4.0,2.0,5.0


In [295]:
df_yelp = df_yelp.drop(['DBA','BORO','ZIPCODE'],1)

In [296]:
df_yelp

Unnamed: 0,0,area,category,phone,postal_code,price_range,rating_value,restaurant,review_count,STREET,...,Score Max,Score Min,Score Average,Score StdDev,Grade A Count,Grade B Count,Grade C Count,Not Yet Graded,P Count,Z Count
0,0,Brooklyn,"Pizza,\n American (New),\n ...",7184992424,11215,$11-30,4.5,Gristmill,94,5TH AVE,...,18,13,16.125000,2.587745848,3,0,0,0,0,0
1,0,Brooklyn,"Latin American,\n Peruvian",7187888070,11215,$11-30,4.0,Surfish Peruvian Bistro,158,3RD AVE,...,9,0,6.000000,4.242640687,4,0,0,0,0,0
2,0,Astoria,"Brazilian,\n Steakhouses",7186069300,11105,$11-30,4.5,Kilo Astoria,21,DITMARS BLVD,...,8,8,8.000000,#DIV/0!,0,0,0,1,0,0
3,0,Woodside,"American (New),\n Desserts,...",7184579000,11377,$11-30,4.5,Casa Del Chef,146,64TH ST,...,21,0,13.555556,6.109031137,6,0,0,0,0,0
4,0,Brooklyn,French,7183896859,11222,$11-30,4.5,Le Fond,99,NORMAN AVENUE,...,19,5,14.750000,4.482421808,7,4,0,0,0,0
5,0,New York,"American (New),\n Breakfast...",2129244440,10011,$11-30,4.0,Cookshop,1577,10 AVENUE,...,27,9,15.722222,5.406932646,12,8,0,0,0,0
6,0,New York,"Sushi Bars,\n Izakaya,\n ...",6463689384,10001,$11-30,4.0,Izakaya MEW,1339,W 35TH ST,...,35,17,24.813953,6.103300489,0,15,6,0,0,0
7,0,New York,"Japanese,\n Hawaiian,\n ...",6468708292,10002,$11-30,4.5,Simple NYC,198,ELDRIDGE ST,...,4,2,3.333333,1.154700538,2,0,0,0,0,0
8,0,Elmhurst,"Vietnamese,\n Asian Fusion",3478080887,11373,Under $10,4.5,JoJu,1174,BROADWAY,...,65,0,31.233333,21.90840389,8,3,0,0,0,0
9,0,New York,"Gastropubs,\n Beer Bar",6464298237,10001,$11-30,4.0,Haymaker Bar and Kitchen,163,W 29TH ST,...,32,7,20.416667,10.79948091,4,0,0,0,0,0


# Other...

In [159]:
# 
master_list = set(list(['http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=0',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=10',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=20',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=30',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=40',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=50',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=60',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=70',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=80',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=90',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=100',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=110',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=120',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=130',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=140',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=150',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=160',                 
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=170',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=180',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=190',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=200',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=210',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=220',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=230',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=240',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=250',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=260',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=270',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=280',
                     'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=290']))

In [160]:
master_list

{'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=0',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=10',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=100',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=110',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=120',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=130',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=140',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=150',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=160',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=170',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York,+NY&start=180',
 'http://www.yelp.com/search?find_desc=Restaurants&find_loc=New+York

In [121]:
restname1

u'Thai Villa'

In [107]:
reviews = requests.get(final_list[0]).text
reviews = BeautifulSoup(reviews)
restname1 = reviews.find_all('meta', {'itemprop': 'name'})
restnamestring = [tag['content'] for tag in restname1]
restnamestring


['Yelp', 'Thai Villa']

In [61]:
title = reviews.find('title').get_text().strip()
title

u'Thai Villa - Order Food Online - 1609 Photos & 495 Reviews - Thai - Flatiron - New York, NY - Phone Number - Menu - Yelp'

In [65]:
name = reviews.find('h1').get_text().strip()
name

u'Thai Villa'

In [145]:
rating_value = reviews.find_all('meta', {'itemprop': 'ratingValue'})
rating_string = [tag['content'] for tag in rating_value]
rating_string = rating_string[0]
rating_string

'4.0'

In [75]:
review_count = reviews.find('span', {'itemprop': 'reviewCount'}).get_text().strip()
review_count

u'495'

In [79]:
price_range = reviews.find_all('meta', {'itemprop': 'priceRange'})
price_string = [tag['content'] for tag in price_range]
price_string

['$11-30']

In [81]:
address_locality = reviews.find('span', {'itemprop': 'addressLocality'}).get_text().strip()
address_locality

u'New York'

In [83]:
category = reviews.find('span', {'class': 'category-str-list'}).get_text().strip()
category

u'Thai'

In [85]:
postal_code = reviews.find('span', {'itemprop': 'postalCode'}).get_text().strip()
postal_code

u'10003'

In [87]:
phone = reviews.find('span', {'itemprop': 'telephone'}).get_text().strip()     
phone = "".join(_ for _ in phone if _ in "1234567890") 
phone

u'2128029999'

In [142]:
final_list_5 = final_list[0:5]
final_list_5

['http://www.yelp.com/biz/thai-villa-new-york-2?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/rocket-pig-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/the-spaniard-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/gramercy-tavern-new-york?osq=Restaurants?sort_by=date_desc',
 'http://www.yelp.com/biz/chikarashi-new-york?osq=Restaurants?sort_by=date_desc']