In [None]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## 1.1 Query to pull some data

In [4]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [5]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'serviceVersion': '3.28.2025.1',
 'dwellTimeMS': '15.6214',
 'avgWebHostCPUTotalLoad': 'N/A',
 'dataHitType': 'FROM_MEMORY',
 'Source': 'B11B',
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isSt

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


In [6]:
for i in range(5):
    print("Our current number is {}, the next is {}".format(i, i+1))

Our current number is 0, the next is 1
Our current number is 1, the next is 2
Our current number is 2, the next is 3
Our current number is 3, the next is 4
Our current number is 4, the next is 5


## 1.2 What happens if there's an error in our query?

In [8]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2028'

In [9]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

<Response [400]>

In [10]:
## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
#naep_wrong_resp.json() # uncomment to see error

### 1.2.2 More all-purpose way of allowing remainder of calls to run: try, except

In [12]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except Exception as e:
    print('Failed to get result from API due to error:')
    print(e) # or just: pass

Failed to get result from API due to error:
Invalid control character at: line 1 column 293 (char 292)


### 1.2.3 Can usually also find more targeted way but that varies more across APIs

In [14]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along those lines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("NAEP results not found")

'{"statusCode":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2028R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2547\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 2168\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 807\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 261"}'

'{"status":200,"serviceVersion":"3.28.2025.1","dwellTimeMS":"15.6214","avgWebHostCPUTotalLoad":"N/A","dataHitType":"FROM_MEMORY","Source":"B11B","result": [{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"1","varValueLabel":"Male","value":139.099504632971,"isStatDisplayable":1,"errorFlag":0},{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","jurisLabel":"National public","variable":"GENDER","variableLabel":"Gender","varValue":"2","varValueLabel":"Female","value":158.567104984955,"isStatDisplayable":1,"errorFlag":0}]}'

NAEP results not found


## Activity 1: writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [16]:
# your code here

In [17]:
# your code here

grades = ['4','8','12']

R = []

for grade in grades: 
    example_naep_query = (
        'https://www.nationsreportcard.gov/'
        'Dataservice/GetAdhocData.aspx?'
        f'type=data&subject=writing&grade={grade}&'
        'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')
    naep_resp = requests.get(example_naep_query)
    
    try: 
        naep_resp_j = naep_resp.json()
        naep_resp_j
        naep_resp_d = pd.DataFrame(naep_resp_j['result'])
        # result = pd.concat([result,naep_resp_d]) 
        R.append(naep_resp_d)
    
    except Exception as e:
        print(e)
        
pd.concat(R)

Invalid control character at: line 1 column 293 (char 292)


{'status': 200,
 'serviceVersion': '3.28.2025.1',
 'dwellTimeMS': '0',
 'avgWebHostCPUTotalLoad': 'N/A',
 'dataHitType': 'FROM_MEMORY',
 'Source': 'B11B',
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisp

{'status': 200,
 'serviceVersion': '3.28.2025.1',
 'dwellTimeMS': '0',
 'avgWebHostCPUTotalLoad': 'N/A',
 'dataHitType': 'FROM_MEMORY',
 'Source': 'B11B',
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 3,
   'CohortLabel': 'Grade 12',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 12,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 141.256977963264,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 3,
   'CohortLabel': 'Grade 12',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 12,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 155.385916780351,
   'isStat

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0
0,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,1,Male,141.256978,1,0
1,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,National public,GENDER,Gender,2,Female,155.385917,1,0


# 2. Example 2: needs credentials; no wrapper

Create an account here: https://www.yelp.com/developers/v3/manage_app

In [20]:
## get the key
API_KEY = "PASTE KEY HERE"

In [21]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Hanover,NH,03755"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [22]:
yelp_genjson['businesses'][0]

{'id': 'wyV_NfYn4ZOfp_sHMDPcAw',
 'alias': 'bistro-at-six-hanover',
 'name': 'Bistro at Six',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/i4jvssUsxa79VYVLOD3TmQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/bistro-at-six-hanover?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=AYAiHNSGxz_RRHzq3cO46w',
 'review_count': 2,
 'categories': [{'alias': 'lounges', 'title': 'Lounges'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'},
  {'alias': 'tradamerican', 'title': 'American'}],
 'rating': 4.0,
 'coordinates': {'latitude': 43.7001146, 'longitude': -72.2877078},
 'transactions': [],
 'price': '$$',
 'location': {'address1': '6 E South St',
  'address2': None,
  'address3': '',
  'city': 'Hanover',
  'zip_code': '03755',
  'country': 'US',
  'state': 'NH',
  'display_address': ['6 E South St', 'Hanover, NH 03755']},
 'phone': '+16036430600',
 'display_phone': '(603) 643-0600',
 'distance': 198.6517

In [23]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': 'wyV_NfYn4ZOfp_sHMDPcAw',
 'alias': 'bistro-at-six-hanover',
 'name': 'Bistro at Six',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/i4jvssUsxa79VYVLOD3TmQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/bistro-at-six-hanover?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=AYAiHNSGxz_RRHzq3cO46w',
 'review_count': 2,
 'categories': [{'alias': 'lounges', 'title': 'Lounges'},
  {'alias': 'cocktailbars', 'title': 'Cocktail Bars'},
  {'alias': 'tradamerican', 'title': 'American'}],
 'rating': 4.0,
 'coordinates': {'latitude': 43.7001146, 'longitude': -72.2877078},
 'transactions': [],
 'price': '$$',
 'location': {'address1': '6 E South St',
  'address2': None,
  'address3': '',
  'city': 'Hanover',
  'zip_code': '03755',
  'country': 'US',
  'state': 'NH',
  'display_address': ['6 E South St', 'Hanover, NH 03755']},
 'phone': '+16036430600',
 'display_phone': '(603) 643-0600',
 'distance': 198.6517

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,wyV_NfYn4ZOfp_sHMDPcAw,bistro-at-six-hanover,Bistro at Six,https://s3-media2.fl.yelpcdn.com/bphoto/i4jvss...,False,https://www.yelp.com/biz/bistro-at-six-hanover...,2,"[{'alias': 'lounges', 'title': 'Lounges'}, {'a...",4.0,"{'latitude': 43.7001146, 'longitude': -72.2877...",[],$$,"{'address1': '6 E South St', 'address2': None,...",16036430600,(603) 643-0600,198.651788
1,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,False,https://www.yelp.com/biz/base-camp-cafe-hanove...,256,"[{'alias': 'himalayan', 'title': 'Himalayan/Ne...",4.4,"{'latitude': 43.700626, 'longitude': -72.2887803}",[delivery],$$,"{'address1': '3 Lebanon St', 'address2': 'Ste ...",16036432007,(603) 643-2007,196.139758
2,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,False,https://www.yelp.com/biz/mollys-restaurant-and...,561,"[{'alias': 'tradamerican', 'title': 'American'...",3.9,"{'latitude': 43.701144, 'longitude': -72.2894249}",[delivery],$$,"{'address1': '43 South Main St', 'address2': '...",16036432570,(603) 643-2570,250.83016
3,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,False,https://www.yelp.com/biz/lous-restaurant-and-b...,430,"[{'alias': 'tradamerican', 'title': 'American'...",4.2,"{'latitude': 43.70143, 'longitude': -72.289001}",[delivery],$$,"{'address1': '30 S Main St', 'address2': '', '...",16036433321,(603) 643-3321,245.079232
4,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,False,https://www.yelp.com/biz/sawtooth-kitchen-hano...,30,"[{'alias': 'chickenshop', 'title': 'Chicken Sh...",4.2,"{'latitude': 43.70158, 'longitude': -72.289641}",[],,"{'address1': '33 S Main St', 'address2': '', '...",16036435134,(603) 643-5134,242.607552


In [24]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
wyV_NfYn4ZOfp_sHMDPcAw,wyV_NfYn4ZOfp_sHMDPcAw,bistro-at-six-hanover,Bistro at Six,https://s3-media2.fl.yelpcdn.com/bphoto/i4jvss...,https://www.yelp.com/biz/bistro-at-six-hanover...,$$,16036430600,(603) 643-0600
XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/p8_YiE...,https://www.yelp.com/biz/base-camp-cafe-hanove...,$$,16036432007,(603) 643-2007
8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media4.fl.yelpcdn.com/bphoto/TJLrrA...,https://www.yelp.com/biz/mollys-restaurant-and...,$$,16036432570,(603) 643-2570
KA8yhrd-ClVYMyOefXdVYg,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media3.fl.yelpcdn.com/bphoto/VAx8H9...,https://www.yelp.com/biz/lous-restaurant-and-b...,$$,16036433321,(603) 643-3321
5WW4g_LRwau29KyjZGLyAA,5WW4g_LRwau29KyjZGLyAA,sawtooth-kitchen-hanover,Sawtooth Kitchen,https://s3-media3.fl.yelpcdn.com/bphoto/61MNG4...,https://www.yelp.com/biz/sawtooth-kitchen-hano...,,16036435134,(603) 643-5134
neBEWvgHNhsXIBImCYx_6A,neBEWvgHNhsXIBImCYx_6A,murphy-s-on-the-green-hanover,Murphy’s on the Green,https://s3-media2.fl.yelpcdn.com/bphoto/76DCQr...,https://www.yelp.com/biz/murphy-s-on-the-green...,$$,16036434075,(603) 643-4075
34j_2nRCVQBTKafJvncZlg,34j_2nRCVQBTKafJvncZlg,pine-restaurant-hanover-2,PINE Restaurant,https://s3-media3.fl.yelpcdn.com/bphoto/c2SgUR...,https://www.yelp.com/biz/pine-restaurant-hanov...,$$$,16036468000,(603) 646-8000


In [49]:
# change location
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"

my_location_tpe = "Taipei,Taiwan,03456"
yelp_genquery_tpe = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location_tpe)

## use requests to call the API
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp_tpe = requests.get(yelp_genquery_tpe, headers = header)
yelp_genresp_tpe

yelp_genresp_tpe.json()

<Response [200]>

{'businesses': [{'id': '5gNHAIyajmJ_4i5vXwOVRw',
   'alias': '永和豆漿大王-大安區-2',
   'name': 'Yong He Soy Milk King',
   'image_url': 'https://s3-media4.fl.yelpcdn.com/bphoto/wXiJOju-YwgwdpYuzm2D1A/o.jpg',
   'is_closed': False,
   'url': 'https://www.yelp.com/biz/%E6%B0%B8%E5%92%8C%E8%B1%86%E6%BC%BF%E5%A4%A7%E7%8E%8B-%E5%A4%A7%E5%AE%89%E5%8D%80-2?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=AYAiHNSGxz_RRHzq3cO46w',
   'review_count': 238,
   'categories': [{'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'},
    {'alias': 'gourmet', 'title': 'Specialty Food'},
    {'alias': 'taiwanese', 'title': 'Taiwanese'}],
   'rating': 4.4,
   'coordinates': {'latitude': 25.029722, 'longitude': 121.543248},
   'transactions': [],
   'price': '$',
   'location': {'address1': '復興南路二段102號',
    'address2': '1樓',
    'address3': None,
    'city': "Da'an District",
    'zip_code': '106',
    'country': 'TW',
    'state': 'TPE',
    'di

In [26]:
yelp_genjson_tpe = yelp_genresp_tpe.json()

## turn JSON into usable data (DF)
yelp_gendf = pd.DataFrame(yelp_genjson_tpe['businesses'])
#list(yelp_gendf_marlow) # list columns
yelp_gendf['location1'] = yelp_gendf.location.apply(lambda loclist: loclist['address1'])
yelp_gendf[['alias', 'name', 'url', 'review_count', 'rating', 'location1', 'price']]

Unnamed: 0,alias,name,url,review_count,rating,location1,price
0,永和豆漿大王-大安區-2,Yong He Soy Milk King,https://www.yelp.com/biz/%E6%B0%B8%E5%92%8C%E8...,238,4.4,復興南路二段102號,$
1,上引水產-中山區,Addiction Aquatic Development,https://www.yelp.com/biz/%E4%B8%8A%E5%BC%95%E6...,428,4.3,民族東路410巷2弄18號,$$
2,麥而美早餐店-台北市萬華區,麥而美早餐店,https://www.yelp.com/biz/%E9%BA%A5%E8%80%8C%E7...,4,4.3,成都路119號,
3,原西園橋下-萬華區,原西園橋下,https://www.yelp.com/biz/%E5%8E%9F%E8%A5%BF%E5...,5,4.8,西園路一段242號,
4,阜杭豆漿-中正區,Fuhang Soy Milk,https://www.yelp.com/biz/%E9%98%9C%E6%9D%AD%E8...,429,4.2,忠孝東路一段108號,$
5,金峰魯肉飯-中正區,Jin Feng Braised Pork Rice,https://www.yelp.com/biz/%E9%87%91%E5%B3%B0%E9...,158,4.3,羅斯福路一段10-1號,$
6,大腕燒肉專門店-台北市大安區,Da-Wan Yakiniku Dining Restaurant,https://www.yelp.com/biz/%E5%A4%A7%E8%85%95%E7...,57,4.7,敦化南路一段177巷22號,$$$
7,馬辣-萬華區-2,Mala Spicy Hot Pot,https://www.yelp.com/biz/%E9%A6%AC%E8%BE%A3-%E...,56,4.5,西寧南路157號,$$
8,西門金峰-台北市萬華區,Xi Men Jin Feng,https://www.yelp.com/biz/%E8%A5%BF%E9%96%80%E9...,9,4.2,昆明街89號,$
9,豪大大雞排-萬華區,Hot-Star Large Fried Chicken,https://www.yelp.com/biz/%E8%B1%AA%E5%A4%A7%E5...,52,4.2,漢中街121-1號,$


In [35]:
# change location
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"

my_location_tpe = "Pacific Palisades, 90272"
yelp_genquery_tpe = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location_tpe)

## use requests to call the API
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp_tpe = requests.get(yelp_genquery_tpe, headers = header)
# yelp_genresp_tpe

# yelp_genresp_tpe.json()

yelp_genjson_tpe = yelp_genresp_tpe.json()

## turn JSON into usable data (DF)
yelp_gendf = pd.DataFrame(yelp_genjson_tpe['businesses'])
#list(yelp_gendf_marlow) # list columns
yelp_gendf['location1'] = yelp_gendf.location.apply(lambda loclist: loclist['address1'])
yelp_gendf[['alias', 'name', 'url', 'review_count', 'rating', 'location1', 'price']]

Unnamed: 0,alias,name,url,review_count,rating,location1,price
0,palisades-garden-cafe-pacific-palisades,Palisades Garden Cafe,https://www.yelp.com/biz/palisades-garden-cafe...,315,4.2,15231 La Cruz Dr,$
1,spruzzo-restaurant-and-bar-pacific-palisades,Spruzzo Restaurant & Bar,https://www.yelp.com/biz/spruzzo-restaurant-an...,78,4.6,538 Palisades Dr,
2,cafe-vida-pacific-palisades-2,Cafe Vida,https://www.yelp.com/biz/cafe-vida-pacific-pal...,853,4.4,15317 Antioch St,$$
3,armav-los-angeles,Armav,https://www.yelp.com/biz/armav-los-angeles?adj...,85,4.7,970 Monument St,
4,juicy-ladies-organic-cafe-pacific-palisades,Juicy Ladies,https://www.yelp.com/biz/juicy-ladies-organic-...,301,4.0,17361 Sunset Blvd,$$$
5,gladstones-pacific-palisades-4,Gladstones,https://www.yelp.com/biz/gladstones-pacific-pa...,2447,3.0,17300 Pacific Coast Hwy,$$$$
6,prima-cantina-los-angeles,Prima Cantina,https://www.yelp.com/biz/prima-cantina-los-ang...,38,3.7,15246 W Sunset Blvd,
7,hanks-los-angeles-2,Hank's,https://www.yelp.com/biz/hanks-los-angeles-2?a...,321,3.6,1033 N Swarthmore Ave,$$
8,moku-sushi-pacific-palisades,Moku Sushi,https://www.yelp.com/biz/moku-sushi-pacific-pa...,229,4.3,524 Palisades Dr,$$
9,angelini-ristorante-and-bar-los-angeles,Angelini Ristorante & Bar,https://www.yelp.com/biz/angelini-ristorante-a...,106,3.7,1038 N Swarthmore Ave,$$$


# Activity 2: pull restaurants in a different location

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the business' fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://docs.developer.yelp.com/reference/v3_business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of business ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results, i.e. turns them into a single, usable DataFrame

In [29]:
# your code here

In [31]:
# look at reviews of business with this id
base_url_reviews = f'https://api.yelp.com/v3/businesses/{biz_id}/reviews'
yelp_genquery_reviews = (base_url_reviews)

## use requests to call the API
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp_reviews = requests.get(yelp_genquery_reviews, headers = header)
yelp_genresp_reviews

## then, look at structure of response
yelp_genjson_reviews = yelp_genresp_reviews.json()
yelp_genjson_reviews

<Response [200]>

{'reviews': [{'id': '8vZyIz2hl6GcjazfvPTaSA',
   'url': 'https://www.yelp.com/biz/%E4%B8%8A%E5%BC%95%E6%B0%B4%E7%94%A2-%E4%B8%AD%E5%B1%B1%E5%8D%80?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=8vZyIz2hl6GcjazfvPTaSA&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=AYAiHNSGxz_RRHzq3cO46w',
   'text': 'This one of our top meals on our trip to Taiwan.\n\nExcellent service, beautiful place, accompanied by some of the freshest, best tasting seafood you can...',
   'rating': 5,
   'time_created': '2025-01-11 00:18:17',
   'user': {'id': 'rhnFDusn-D-fDu3DeyY2kQ',
    'profile_url': 'https://www.yelp.com/user_details?userid=rhnFDusn-D-fDu3DeyY2kQ',
    'image_url': 'https://s3-media2.fl.yelpcdn.com/photo/MCERkNe7zxLyM2QQ19pFkQ/o.jpg',
    'name': 'Felix G.'}},
  {'id': 'urVeePoK7c4WL4UfDfGDiQ',
   'url': 'https://www.yelp.com/biz/%E4%B8%8A%E5%BC%95%E6%B0%B4%E7%94%A2-%E4%B8%AD%E5%B1%B1%E5%8D%80?adjust_creative=AYAiHNSGxz_RRHzq3cO46w&hrid=urVeePoK7c4WL4UfDfGDiQ&utm_campaign=y