In [1]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# 1. Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## 1.1 Query to pull some data

In [2]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [4]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'jurisLabel': 'National public',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisplayable': 1,
   'errorFlag': 0}]}

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,jurisLabel,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,National public,GENDER,Gender,2,Female,158.567105,1,0


## 1.2 What happens if there's an error in our query?

In [3]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025'

In [None]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

In [None]:
## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
#naep_wrong_resp.json() # uncomment to see error

### 1.2.2 More all-purpose way of allowing remainder of calls to run: try, except

In [None]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except Exception as e:
    print('Failed to get result from API due to error:')
    print(e) # or just: pass

### 1.2.3 Can usually also find more targeted way but that varies more across APIs

In [None]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along those lines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("NAEP results not found")

## Activity 1: writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [7]:
# your code here
# your code here
def doGrade(grade):
    link = 'https://www.nationsreportcard.gov/''Dataservice/GetAdhocData.aspx?''type=data&subject=writing&grade=' + str(grade) + '&''subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'
    naep_resp1 = requests.get(link)
    print(naep_resp1)
    print(type(naep_resp1))
    try:
        results = naep_resp1.json()['result']
        return results
    except Exception as e:
        print('Failed to get result from API due to error:')
        print(e) # or just: pass
doGrade(4)
doGrade(8)
doGrade(12)

<Response [200]>
<class 'requests.models.Response'>
Failed to get result from API due to error:
Invalid control character at: line 1 column 289 (char 288)
<Response [200]>
<class 'requests.models.Response'>


[{'year': 2011,
  'sample': 'R3',
  'yearSampleLabel': '2011',
  'Cohort': 2,
  'CohortLabel': 'Grade 8',
  'stattype': 'MN:MN',
  'subject': 'WRI',
  'grade': 8,
  'scale': 'WRIRP',
  'jurisdiction': 'NP',
  'jurisLabel': 'National public',
  'variable': 'GENDER',
  'variableLabel': 'Gender',
  'varValue': '1',
  'varValueLabel': 'Male',
  'value': 139.099504632971,
  'isStatDisplayable': 1,
  'errorFlag': 0},
 {'year': 2011,
  'sample': 'R3',
  'yearSampleLabel': '2011',
  'Cohort': 2,
  'CohortLabel': 'Grade 8',
  'stattype': 'MN:MN',
  'subject': 'WRI',
  'grade': 8,
  'scale': 'WRIRP',
  'jurisdiction': 'NP',
  'jurisLabel': 'National public',
  'variable': 'GENDER',
  'variableLabel': 'Gender',
  'varValue': '2',
  'varValueLabel': 'Female',
  'value': 158.567104984955,
  'isStatDisplayable': 1,
  'errorFlag': 0}]

<Response [200]>
<class 'requests.models.Response'>


[{'year': 2011,
  'sample': 'R3',
  'yearSampleLabel': '2011',
  'Cohort': 3,
  'CohortLabel': 'Grade 12',
  'stattype': 'MN:MN',
  'subject': 'WRI',
  'grade': 12,
  'scale': 'WRIRP',
  'jurisdiction': 'NP',
  'jurisLabel': 'National public',
  'variable': 'GENDER',
  'variableLabel': 'Gender',
  'varValue': '1',
  'varValueLabel': 'Male',
  'value': 141.256977963264,
  'isStatDisplayable': 1,
  'errorFlag': 0},
 {'year': 2011,
  'sample': 'R3',
  'yearSampleLabel': '2011',
  'Cohort': 3,
  'CohortLabel': 'Grade 12',
  'stattype': 'MN:MN',
  'subject': 'WRI',
  'grade': 12,
  'scale': 'WRIRP',
  'jurisdiction': 'NP',
  'jurisLabel': 'National public',
  'variable': 'GENDER',
  'variableLabel': 'Gender',
  'varValue': '2',
  'varValueLabel': 'Female',
  'value': 155.385916780351,
  'isStatDisplayable': 1,
  'errorFlag': 0}]

# 2. Example 2: needs credentials; no wrapper

Create an account here: https://www.yelp.com/developers/v3/manage_app

In [11]:
## get the key
API_KEY = "hDmREkKL9dz5eDOpQ6AKykDFKpZDlfqKoRP6N9R6e4N3-35RRIcgoF7-HKfZbDiEOm6gPZueckvz6nE8o1bEEVdU2Qpo0M_SNvQ5IcEGiJGmCJ7PMTw5x0JY7q4zZnYx"

In [15]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Boulder,CO,80303"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [16]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns for things like categories
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': 'GygGWp-ODQcqnizOxr5VLg',
 'alias': 'postino-boulder-boulder-2',
 'name': 'Postino Boulder',
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/qs98C86gzRNDcJ8I7GqemA/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/postino-boulder-boulder-2?adjust_creative=cVBMMgOprygzhvGPlqJb5w&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=cVBMMgOprygzhvGPlqJb5w',
 'review_count': 119,
 'categories': [{'alias': 'wine_bars', 'title': 'Wine Bars'},
  {'alias': 'breakfast_brunch', 'title': 'Breakfast & Brunch'},
  {'alias': 'catering', 'title': 'Caterers'}],
 'rating': 4.7,
 'coordinates': {'latitude': 40.01843, 'longitude': -105.27622},
 'transactions': ['delivery', 'pickup'],
 'location': {'address1': '1468 Pearl St',
  'address2': 'Ste 110',
  'address3': '',
  'city': 'Boulder',
  'zip_code': '80302',
  'country': 'US',
  'state': 'CO',
  'display_address': ['1468 Pearl St', 'Ste 110', 'Boulder, CO 80302']},
 'phone': '+13032853755',
 'display_phone':

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,location,phone,display_phone,distance,attributes,price
0,GygGWp-ODQcqnizOxr5VLg,postino-boulder-boulder-2,Postino Boulder,https://s3-media2.fl.yelpcdn.com/bphoto/qs98C8...,False,https://www.yelp.com/biz/postino-boulder-bould...,119,"[{'alias': 'wine_bars', 'title': 'Wine Bars'},...",4.7,"{'latitude': 40.01843, 'longitude': -105.27622}","[delivery, pickup]","{'address1': '1468 Pearl St', 'address2': 'Ste...",13032853755,(303) 285-3755,8365.566524,"{'business_temp_closed': None, 'menu_url': 'ht...",
1,PPFY_lw5KkPNfJwpI8dbbw,king-dumpling-louisville,King Dumpling,https://s3-media3.fl.yelpcdn.com/bphoto/BSjNo3...,False,https://www.yelp.com/biz/king-dumpling-louisvi...,14,"[{'alias': 'chinese', 'title': 'Chinese'}, {'a...",4.8,"{'latitude': 39.96548077308674, 'longitude': -...",[restaurant_reservation],"{'address1': '316 McCaslin Blvd', 'address2': ...",17207285903,(720) 728-5903,3773.646571,"{'business_temp_closed': None, 'menu_url': Non...",
2,_8vtC2Jo29HZwyNhQ3PWrg,blackbelly-market-boulder-2,Blackbelly Market,https://s3-media2.fl.yelpcdn.com/bphoto/R0Fcme...,False,https://www.yelp.com/biz/blackbelly-market-bou...,635,"[{'alias': 'butcher', 'title': 'Butcher'}, {'a...",4.2,"{'latitude': 40.015306, 'longitude': -105.227417}",[delivery],"{'address1': '1606 Conestoga St', 'address2': ...",13032471000,(303) 247-1000,5782.037744,"{'business_temp_closed': None, 'menu_url': 'ht...",$$
3,mbrY3doqkZoNDjzLFfXnyQ,parma-trattoria-and-mozzarella-bar-louisville,Parma Trattoria & Mozzarella Bar,https://s3-media4.fl.yelpcdn.com/bphoto/iFSEUe...,False,https://www.yelp.com/biz/parma-trattoria-and-m...,1014,"[{'alias': 'italian', 'title': 'Italian'}, {'a...",4.4,"{'latitude': 39.961607, 'longitude': -105.167235}","[delivery, pickup]","{'address1': '1132 W Dillon Rd', 'address2': '...",13032842741,(303) 284-2741,3319.287565,"{'business_temp_closed': None, 'menu_url': 'ht...",$$
4,1LMe5UqMS2ei_ubt46FbNA,the-buff-restaurant-boulder,The Buff Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/Hll4hZ...,False,https://www.yelp.com/biz/the-buff-restaurant-b...,1864,"[{'alias': 'breakfast_brunch', 'title': 'Break...",4.4,"{'latitude': 40.017245, 'longitude': -105.2602...",[delivery],"{'address1': '2600 Canyon Blvd', 'address2': '...",13034429150,(303) 442-9150,7335.257038,"{'business_temp_closed': None, 'menu_url': Non...",$$


In [17]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items()
             if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,phone,display_phone,price
GygGWp-ODQcqnizOxr5VLg,GygGWp-ODQcqnizOxr5VLg,postino-boulder-boulder-2,Postino Boulder,https://s3-media2.fl.yelpcdn.com/bphoto/qs98C8...,https://www.yelp.com/biz/postino-boulder-bould...,13032853755,(303) 285-3755,
PPFY_lw5KkPNfJwpI8dbbw,PPFY_lw5KkPNfJwpI8dbbw,king-dumpling-louisville,King Dumpling,https://s3-media3.fl.yelpcdn.com/bphoto/BSjNo3...,https://www.yelp.com/biz/king-dumpling-louisvi...,17207285903,(720) 728-5903,
_8vtC2Jo29HZwyNhQ3PWrg,_8vtC2Jo29HZwyNhQ3PWrg,blackbelly-market-boulder-2,Blackbelly Market,https://s3-media2.fl.yelpcdn.com/bphoto/R0Fcme...,https://www.yelp.com/biz/blackbelly-market-bou...,13032471000,(303) 247-1000,$$
mbrY3doqkZoNDjzLFfXnyQ,mbrY3doqkZoNDjzLFfXnyQ,parma-trattoria-and-mozzarella-bar-louisville,Parma Trattoria & Mozzarella Bar,https://s3-media4.fl.yelpcdn.com/bphoto/iFSEUe...,https://www.yelp.com/biz/parma-trattoria-and-m...,13032842741,(303) 284-2741,$$
1LMe5UqMS2ei_ubt46FbNA,1LMe5UqMS2ei_ubt46FbNA,the-buff-restaurant-boulder,The Buff Restaurant,https://s3-media4.fl.yelpcdn.com/bphoto/Hll4hZ...,https://www.yelp.com/biz/the-buff-restaurant-b...,13034429150,(303) 442-9150,$$
nClMzSME01CZn0MacQVl6w,nClMzSME01CZn0MacQVl6w,south-side-walnut-cafe-boulder,South Side Walnut Cafe,https://s3-media1.fl.yelpcdn.com/bphoto/P_0lYU...,https://www.yelp.com/biz/south-side-walnut-caf...,17203048118,(720) 304-8118,$$
pErXNL3l0GZiOIvGvgN0Hg,pErXNL3l0GZiOIvGvgN0Hg,boulder-swim-club-no-title,Boulder Swim Club,https://s3-media2.fl.yelpcdn.com/bphoto/Ynkn_u...,https://www.yelp.com/biz/boulder-swim-club-no-...,13034446444,(303) 444-6444,


# Activity 2: pull restaurants in a different location

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the business' fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://docs.developer.yelp.com/reference/v3_business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of business ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results, i.e. turns them into a single, usable DataFrame

In [19]:
# your code here
selected_id = ' search'

# Construct the API request URL
api_url = f'https://api.yelp.com/v3/businesses/{selected_id}/reviews'

# Define your Yelp API key
api_key = 'hDmREkKL9dz5eDOpQ6AKykDFKpZDlfqKoRP6N9R6e4N3-35RRIcgoF7-HKfZbDiEOm6gPZueckvz6nE8o1bEEVdU2Qpo0M_SNvQ5IcEGiJGmCJ7PMTw5x0JY7q4zZnYx'

# Set up authentication headers
headers = {'Authorization': f'Bearer {api_key}'}

# Make the API request
response = requests.get(api_url, headers=headers)

# Check if the request was successful
if response.status_code == 200:
    # Extract and process the reviews data from the response
    reviews_data = response.json()
    # Process reviews_data as needed
    # Print or further analyze the reviews
    print(reviews_data)
else:
    print(f'Error: {response.status_code}')

Error: 400
