In [1]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## 1.1 Query to pull some data

In [2]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [3]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisplayable': 1,
   'errorFlag': 0}]}

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


## 1.2 What happens if there's an error in our query?

In [4]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025'

In [5]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

<Response [200]>

In [None]:
## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
#naep_wrong_resp.json() # uncomment to see error

In [8]:
dir(wrong_naep_query)

['__add__',
 '__class__',
 '__contains__',
 '__delattr__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__getnewargs__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__len__',
 '__lt__',
 '__mod__',
 '__mul__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__rmod__',
 '__rmul__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 'capitalize',
 'casefold',
 'center',
 'count',
 'encode',
 'endswith',
 'expandtabs',
 'find',
 'format',
 'format_map',
 'index',
 'isalnum',
 'isalpha',
 'isascii',
 'isdecimal',
 'isdigit',
 'isidentifier',
 'islower',
 'isnumeric',
 'isprintable',
 'isspace',
 'istitle',
 'isupper',
 'join',
 'ljust',
 'lower',
 'lstrip',
 'maketrans',
 'partition',
 'removeprefix',
 'removesuffix',
 'replace',
 'rfind',
 'rindex',
 'rjust',
 'rpartition',
 'rsplit',
 'rstrip',
 'split',
 'splitlines',
 'startswith',
 'stri

### 1.2.2 More all-purpose way of allowing remainder of calls to run: try, except

In [6]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except Exception as e:
    print('Failed to get result from API due to error:')
    print(e) # or just: pass

Failed to get result from API due to error:
Invalid control character at: line 1 column 289 (char 288)


### 1.2.3 Can usually also find more targeted way but that varies more across APIs

In [7]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along those lines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("NAEP results not found")

'{"status":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2025R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2091\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1781\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 628\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 179"}'

'{"status":200,"result": [{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"1","varValueLabel":"Male","value":139.099504632971,"isStatDisplayable":1,"errorFlag":0},{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"2","varValueLabel":"Female","value":158.567104984955,"isStatDisplayable":1,"errorFlag":0}]}'

NAEP results not found


## Activity 1: writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [39]:
# your code here

def pull_multiple(args):
    dflist = []
    
    for arg in args:
#         print(arg)
        
        try:
            naep_query = ('https://www.nationsreportcard.gov/'
                          'Dataservice/GetAdhocData.aspx?'
                          'type=data&subject=writing&grade=' + str(arg) + '&'
                          'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')
#             print(naep_query)
            naep_resp = requests.get(naep_query)
            naep_resp_j = naep_resp.json()
#             print(naep_resp_j)
            naep_resp_d = pd.DataFrame(naep_resp_j['result'])
            
#             display(naep_resp_d)
            dflist.append(naep_resp_d)
            
        except Exception as e:
            print('Failed to get result of arg \"' + str(arg) + '\" from API due to error')
#             print(e) # or just: pass
            pass
            
    display(pd.concat(dflist))
            
pull_multiple([4, 8, 12])

Failed to get result of arg "4" from API due to error


Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0
0,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,1,Male,141.256978,1,0
1,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,2,Female,155.385917,1,0


In [22]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=4&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'
)
example_naep_query

## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=4&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

<Response [200]>

<class 'requests.models.Response'>


JSONDecodeError: Invalid control character at: line 1 column 289 (char 288)

# 2. Example 2: needs credentials; no wrapper

Create an account here: https://www.yelp.com/developers/v3/manage_app

In [40]:
## get the key
API_KEY = "1Gh9eKod5_-vX1otu_KEzcFphmJC58Kwb2qsJbcKPn76WUI7eW6biYfFe6R6Pmx_7NRaaQraF-Co_eLwcjvLl-DGQ4A0wHE8Z6WlFm9Bum6lEFvh9wiLufavouMtZXYx"

In [41]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Hanover,NH,03755"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [42]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': 'XVGEEIH5rVB2QzW-qywcJw',
 'alias': 'base-camp-cafe-hanover',
 'name': 'Base Camp Cafe',
 'image_url': 'https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiEQI7p_JF82eaWa9Xg/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/base-camp-cafe-hanover?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=AYAiHNSGxz_RRHzq3cO46w',
 'review_count': 238,
 'categories': [{'alias': 'himalayan', 'title': 'Himalayan/Nepalese'}],
 'rating': 4.5,
 'coordinates': {'latitude': 43.700626, 'longitude': -72.2887803},
 'transactions': ['delivery'],
 'price': '$$',
 'location': {'address1': '3 Lebanon St',
  'address2': 'Ste 13',
  'address3': '',
  'city': 'Hanover',
  'zip_code': '03755',
  'country': 'US',
  'state': 'NH',
  'display_address': ['3 Lebanon St', 'Ste 13', 'Hanover, NH 03755']},
 'phone': '+16036432007',
 'display_phone': '(603) 643-2007',
 'distance': 196.1397581876442}

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,False,https://www.yelp.com/biz/base-camp-cafe-hanove...,238,"[{'alias': 'himalayan', 'title': 'Himalayan/Ne...",4.5,"{'latitude': 43.700626, 'longitude': -72.2887803}",[delivery],$$,"{'address1': '3 Lebanon St', 'address2': 'Ste ...",16036432007,(603) 643-2007,196.139758
1,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,False,https://www.yelp.com/biz/mollys-restaurant-and...,500,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 43.701144, 'longitude': -72.2894249}",[delivery],$$,"{'address1': '43 South Main St', 'address2': '...",16036432570,(603) 643-2570,250.83016
2,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,False,https://www.yelp.com/biz/sawtooth-kitchen-hano...,19,"[{'alias': 'chickenshop', 'title': 'Chicken Sh...",4.0,"{'latitude': 43.70158, 'longitude': -72.289641}",[],,"{'address1': '33 S Main St', 'address2': '', '...",16036435134,(603) 643-5134,242.607552
3,neBEWvgHNhsXIBImCYx_6A,murphy-s-on-the-green-hanover,Murphy’s on the Green,https://s3-media2.fl.yelpcdn.com/bphoto/76DCQr...,False,https://www.yelp.com/biz/murphy-s-on-the-green...,173,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 43.702, 'longitude': -72.2894949}",[delivery],$$,"{'address1': '11 S Main St', 'address2': '', '...",16036434075,(603) 643-4075,295.207523
4,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,False,https://www.yelp.com/biz/lous-restaurant-and-b...,374,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 43.70143, 'longitude': -72.289001}",[delivery],$$,"{'address1': '30 S Main St', 'address2': '', '...",16036433321,(603) 643-3321,244.006059


In [43]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,https://www.yelp.com/biz/base-camp-cafe-hanove...,$$,16036432007,(603) 643-2007
8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,https://www.yelp.com/biz/mollys-restaurant-and...,$$,16036432570,(603) 643-2570
5WW4g_LRwau29KyjZGLyAA,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,https://www.yelp.com/biz/sawtooth-kitchen-hano...,,16036435134,(603) 643-5134
neBEWvgHNhsXIBImCYx_6A,neBEWvgHNhsXIBImCYx_6A,murphy-s-on-the-green-hanover,Murphy’s on the Green,https://s3-media2.fl.yelpcdn.com/bphoto/76DCQr...,https://www.yelp.com/biz/murphy-s-on-the-green...,$$,16036434075,(603) 643-4075
KA8yhrd-ClVYMyOefXdVYg,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,https://www.yelp.com/biz/lous-restaurant-and-b...,$$,16036433321,(603) 643-3321
34j_2nRCVQBTKafJvncZlg,34j_2nRCVQBTKafJvncZlg,pine-restaurant-hanover-2,PINE Restaurant,https://s3-media2.fl.yelpcdn.com/bphoto/rD7w_r...,https://www.yelp.com/biz/pine-restaurant-hanov...,$$$,16036468000,(603) 646-8000
vMyN7JL5cJExJORgIobbQg,vMyN7JL5cJExJORgIobbQg,tuk-tuk-thai-cuisine-hanover,Tuk Tuk Thai Cuisine,https://s3-media4.fl.yelpcdn.com/bphoto/--bUG3...,https://www.yelp.com/biz/tuk-tuk-thai-cuisine-...,$$,16032779192,(603) 277-9192


# Activity 2: pull restaurants in a different location

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the business' fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://docs.developer.yelp.com/reference/v3_business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of business ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results, i.e. turns them into a single, usable DataFrame

In [44]:
# your code here
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "McLean,VA,22102"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [45]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):
    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
            if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
1i0gf9awbeXMkrkpjWOaJg,1i0gf9awbeXMkrkpjWOaJg,aracosia-mclean-mclean,Aracosia McLean,https://s3-media1.fl.yelpcdn.com/bphoto/LT0pmv...,https://www.yelp.com/biz/aracosia-mclean-mclea...,$$$,17032693820.0,(703) 269-3820
0mNdAXmNy9ZqxZPVc2I1Ow,0mNdAXmNy9ZqxZPVc2I1Ow,yu-noodles-mclean,Yu Noodles,https://s3-media2.fl.yelpcdn.com/bphoto/ZanJHZ...,https://www.yelp.com/biz/yu-noodles-mclean?adj...,$$,,
OP5VkWXLTp4nxyvCJYutMA,OP5VkWXLTp4nxyvCJYutMA,roots-kitchen-and-bar-west-mclean,Roots Kitchen & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/9JiPYC...,https://www.yelp.com/biz/roots-kitchen-and-bar...,$$,17037127850.0,(703) 712-7850
h2k1JX0-YfKwvFJqEOYSeA,h2k1JX0-YfKwvFJqEOYSeA,joon-vienna,Joon,https://s3-media3.fl.yelpcdn.com/bphoto/jkBsAG...,https://www.yelp.com/biz/joon-vienna?adjust_cr...,$$$,15713781390.0,(571) 378-1390
b8dRf3aexGfBqGeaXlQByA,b8dRf3aexGfBqGeaXlQByA,pikoteo-mclean,Pikoteo,https://s3-media3.fl.yelpcdn.com/bphoto/JXbApH...,https://www.yelp.com/biz/pikoteo-mclean?adjust...,,17038910123.0,(703) 891-0123
oh_lkEz-iMydob_oqlaUgA,oh_lkEz-iMydob_oqlaUgA,kusshi-ko-vienna-3,Kusshi Ko,https://s3-media3.fl.yelpcdn.com/bphoto/mMp993...,https://www.yelp.com/biz/kusshi-ko-vienna-3?ad...,,15717771997.0,(571) 777-1997
hxhmK2KSZovFPjf3zqC-Cw,hxhmK2KSZovFPjf3zqC-Cw,cafe-ile-mclean,Cafe Ile,https://s3-media4.fl.yelpcdn.com/bphoto/riNo3a...,https://www.yelp.com/biz/cafe-ile-mclean?adjus...,,15713781168.0,(571) 378-1168


In [49]:
# curl --request GET \
#      --url 'https://api.yelp.com/v3/businesses/1i0gf9awbeXMkrkpjWOaJg/reviews?limit=20&sort_by=yelp_sort' \
#      --header 'Authorization: Bearer 1Gh9eKod5_-vX1otu_KEzcFphmJC58Kwb2qsJbcKPn76WUI7eW6biYfFe6R6Pmx_7NRaaQraF-Co_eLwcjvLl-DGQ4A0wHE8Z6WlFm9Bum6lEFvh9wiLufavouMtZXYx' \
#      --header 'accept: application/json'

In [47]:
{
  "reviews": [
    {
      "id": "45AklVdUryQaNMnq4zQVzA",
      "url": "https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=45AklVdUryQaNMnq4zQVzA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w",
      "text": "One of my favorite places I've been to this year. I've had Afghan cuisine before but don't recall ever dining in at a restaurant. The decoration here is...",
      "rating": 5,
      "time_created": "2023-09-07 04:19:54",
      "user": {
        "id": "pRpG-CfgQmPgu2QHg0I7nQ",
        "profile_url": "https://www.yelp.com/user_details?userid=pRpG-CfgQmPgu2QHg0I7nQ",
        "image_url": "https://s3-media3.fl.yelpcdn.com/photo/2Kr21VDDyEuecHxoAWx54w/o.jpg",
        "name": "James H."
      }
    },
    {
      "id": "MvxZ5hh8qMT18X54ZXuMKw",
      "url": "https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=MvxZ5hh8qMT18X54ZXuMKw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w",
      "text": "Great ambiance and servers. Friendly staff. But the food is mediocre if you know authentic afghani food. Good selections. We shall definitely be back for...",
      "rating": 3,
      "time_created": "2023-09-09 20:01:19",
      "user": {
        "id": "LCQkl51EpPnwEZrv57QVwA",
        "profile_url": "https://www.yelp.com/user_details?userid=LCQkl51EpPnwEZrv57QVwA",
        "image_url": "https://s3-media2.fl.yelpcdn.com/photo/Aw4XTPXbxy4VwRKpS500KQ/o.jpg",
        "name": "Punga I."
      }
    },
    {
      "id": "kIkbmZqnobu4AQ17AhjE0A",
      "url": "https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=kIkbmZqnobu4AQ17AhjE0A&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w",
      "text": "Aracosia was one of my favorite restaurant places I've been to in a long time. This was an incredible dining experience from beginning to end. I'll start...",
      "rating": 5,
      "time_created": "2023-09-04 19:32:15",
      "user": {
        "id": "LWfakWRcaOYddGGS4vaRxw",
        "profile_url": "https://www.yelp.com/user_details?userid=LWfakWRcaOYddGGS4vaRxw",
        "image_url": "https://s3-media1.fl.yelpcdn.com/photo/c-tEKInIpASwyXFm2fGbkA/o.jpg",
        "name": "Mitch S."
      }
    }
  ],
  "total": 358,
  "possible_languages": [
    "en"
  ]
}

{'reviews': [{'id': '45AklVdUryQaNMnq4zQVzA',
   'url': 'https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=45AklVdUryQaNMnq4zQVzA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w',
   'text': "One of my favorite places I've been to this year. I've had Afghan cuisine before but don't recall ever dining in at a restaurant. The decoration here is...",
   'rating': 5,
   'time_created': '2023-09-07 04:19:54',
   'user': {'id': 'pRpG-CfgQmPgu2QHg0I7nQ',
    'profile_url': 'https://www.yelp.com/user_details?userid=pRpG-CfgQmPgu2QHg0I7nQ',
    'image_url': 'https://s3-media3.fl.yelpcdn.com/photo/2Kr21VDDyEuecHxoAWx54w/o.jpg',
    'name': 'James H.'}},
  {'id': 'MvxZ5hh8qMT18X54ZXuMKw',
   'url': 'https://www.yelp.com/biz/aracosia-mclean-mclean?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=MvxZ5hh8qMT18X54ZXuMKw&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w',
   

In [76]:
# curl --request GET \
#      --url 'https://api.yelp.com/v3/businesses/1i0gf9awbeXMkrkpjWOaJg/reviews?limit=20&sort_by=yelp_sort' \
#      --header 'Authorization: Bearer 1Gh9eKod5_-vX1otu_KEzcFphmJC58Kwb2qsJbcKPn76WUI7eW6biYfFe6R6Pmx_7NRaaQraF-Co_eLwcjvLl-DGQ4A0wHE8Z6WlFm9Bum6lEFvh9wiLufavouMtZXYx' \
#      --header 'accept: application/json'

# your code here
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
def pull_restaurants_reviews(list_of_biz_ids):
    df_list=[]
    
    for biz_id in list_of_biz_ids:
        yelp_genquery = "https://api.yelp.com/v3/businesses/" + biz_id + "/reviews?limit=20&sort_by=yelp_sort"
        

        header = {'Authorization': f"Bearer {API_KEY}",
                 'accept' : 'application/json'}
        yelp_genresp = requests.get(yelp_genquery, headers = header)
        print(yelp_genresp)

        ## then, look at structure of response
        yelp_genjson = yelp_genresp.json().get('reviews')

        df_list.append(pd.DataFrame(yelp_genjson))
        
    binded = pd.concat(df_list)
    return binded


In [None]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):
    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
            if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)

In [78]:
test = pull_restaurants_reviews(['1i0gf9awbeXMkrkpjWOaJg', '0mNdAXmNy9ZqxZPVc2I1Ow', 'OP5VkWXLTp4nxyvCJYutMA', 'h2k1JX0-YfKwvFJqEOYSeA'])

display(test)

<Response [200]>
<Response [200]>
<Response [200]>
<Response [200]>


Unnamed: 0,id,url,text,rating,time_created,user
0,45AklVdUryQaNMnq4zQVzA,https://www.yelp.com/biz/aracosia-mclean-mclea...,One of my favorite places I've been to this ye...,5,2023-09-07 04:19:54,"{'id': 'pRpG-CfgQmPgu2QHg0I7nQ', 'profile_url'..."
1,MvxZ5hh8qMT18X54ZXuMKw,https://www.yelp.com/biz/aracosia-mclean-mclea...,Great ambiance and servers. Friendly staff. Bu...,3,2023-09-09 20:01:19,"{'id': 'LCQkl51EpPnwEZrv57QVwA', 'profile_url'..."
2,kIkbmZqnobu4AQ17AhjE0A,https://www.yelp.com/biz/aracosia-mclean-mclea...,Aracosia was one of my favorite restaurant pla...,5,2023-09-04 19:32:15,"{'id': 'LWfakWRcaOYddGGS4vaRxw', 'profile_url'..."
0,GupSHVLxxpW9hlc5QbRnnA,https://www.yelp.com/biz/yu-noodles-mclean?adj...,pros:\nsaturday noon - restaurant is busy and ...,5,2023-09-16 10:10:22,"{'id': 'nf-lpwZXsjle7VE_RnhhIA', 'profile_url'..."
1,qT73fzaXplgtYm1aNWwNdQ,https://www.yelp.com/biz/yu-noodles-mclean?adj...,We stopped at Yu's to try their dumplings and ...,5,2023-09-14 11:49:55,"{'id': 'CYHUeJEvS8P-Z9LdTR8b8Q', 'profile_url'..."
2,jaBSn5j8_fTUDuPXPAogSg,https://www.yelp.com/biz/yu-noodles-mclean?adj...,"I really like noodles, but I thought their noo...",4,2023-08-29 18:28:34,"{'id': 'tCczhwSxoivFMXLc6qsQKQ', 'profile_url'..."
0,nv--PY2n2CKfMdM4GVRNiQ,https://www.yelp.com/biz/roots-kitchen-and-bar...,I had some colleagues in from out of state thi...,4,2023-08-07 17:30:55,"{'id': 'rX83Ej2C0HuBQErJhy-48A', 'profile_url'..."
1,lI1sNx6c2GKjyCu-f1OjmQ,https://www.yelp.com/biz/roots-kitchen-and-bar...,We really enjoyed the food. It's a cool place ...,4,2023-07-27 06:53:26,"{'id': '0Vvy9rg2WQgCtfAA33Ux2A', 'profile_url'..."
2,lBI2cJjYodyVMkV11zGxgQ,https://www.yelp.com/biz/roots-kitchen-and-bar...,Loved it! Food is delicious and the restaurant...,5,2023-10-11 11:51:25,"{'id': 'TBZG901kjBnYH6EP2e_fWA', 'profile_url'..."
0,78S8uSumgJ48Vb-wYDYKhg,https://www.yelp.com/biz/joon-vienna?adjust_cr...,"Great sharing appetizers. So favorable, great ...",5,2023-10-13 19:03:19,"{'id': 'jZ-yEpJrjBgKYy_5aKfBrQ', 'profile_url'..."
