<a href="https://colab.research.google.com/github/andrew66882011/qss20_slides_activities/blob/main/activities/07_apis_examplecode_solutions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
## imports
import pandas as pd
import numpy as np
import re
import requests
import yaml


## repeated printouts
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Example 1: no credentials; no wrapper

Site: National Assessment of Education Progress (NAEP)

Documentation: https://www.nationsreportcard.gov/api_documentation.aspx

Base link: https://www.nationsreportcard.gov/DataService/GetAdhocData.aspx 

## Query returns something

In [None]:
## using their example query of 2011 writing scores separated by gender
## based on here - https://stackoverflow.com/questions/40836749/pythonic-way-of-writing-a-single-line-long-string
## using the ( ) syntax to formulate a long
## string without linebreaks added
example_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011')


example_naep_query


'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011'

In [None]:
## use requests to call the api
naep_resp = requests.get(example_naep_query)
naep_resp
print(type(naep_resp))

## get the json contents of the response 
## here, we're assuming valid response
naep_resp_j = naep_resp.json()
naep_resp_j

## with result, turn it into a dataframe
naep_resp_d = pd.DataFrame(naep_resp_j['result'])
naep_resp_d

<Response [200]>

<class 'requests.models.Response'>


{'status': 200,
 'result': [{'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '1',
   'varValueLabel': 'Male',
   'value': 139.099504632971,
   'isStatDisplayable': 1,
   'errorFlag': 0},
  {'year': 2011,
   'sample': 'R3',
   'yearSampleLabel': '2011',
   'Cohort': 2,
   'CohortLabel': 'Grade 8',
   'stattype': 'MN:MN',
   'subject': 'WRI',
   'grade': 8,
   'scale': 'WRIRP',
   'jurisdiction': 'NP',
   'variable': 'GENDER',
   'variableLabel': 'Gender',
   'varValue': '2',
   'varValueLabel': 'Female',
   'value': 158.567104984955,
   'isStatDisplayable': 1,
   'errorFlag': 0}]}

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,GENDER,Gender,2,Female,158.567105,1,0


## What happens if there's an error in our query?

In [None]:
## here's a query that from the documentation we know
## won't work since i modified year to 2025 which doesnt
## exist in the data
wrong_naep_query = (
'https://www.nationsreportcard.gov/'
'Dataservice/GetAdhocData.aspx?'
'type=data&subject=writing&grade=8&'
'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025')

wrong_naep_query

'https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2025'

In [None]:
## use requests to call the api
naep_wrong_resp = requests.get(wrong_naep_query)
naep_wrong_resp

## in the case of this particular api,
## the call returns some response but
## when we try to extract the json containing
## status or results, we get in an error
# naep_wrong_resp.json()

<Response [200]>

### More all-purpose way of allowing remainder of calls to run: try, except

In [None]:
## putting it in a try; except as general error catching
try:
    results = naep_wrong_resp.json()['result']
except:
    pass

### Can usually also find more targeted way but that varies more across APIs

In [None]:
## if we wanted do more specific error catching,
## see that the status == 400 actually appears here
## so could write if else along thos elines
naep_wrong_resp.text
naep_resp.text

if "System.Exception" in naep_wrong_resp.text:
    print("not found")

'{"status":400,"result": "System.Exception: The query \'SELECT DISTINCT Framework FROM Cycles WHERE Subject=\'WRI\' AND Cohort=2 AND CONVERT(VARCHAR(10),Year)+Sample IN (\'2025R3\')\' did not return exactly 1 framework. Make sure you can trend the years defined for the given subject and cohort.\r\n   at NRCDataService3.GetAdhocData.GetFramework(NDEContext& ndeContext, String subjectCode, List`1 yearSamples, String cohort) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1735\r\n   at NRCDataService3.GetAdhocData.PopulateBaseOrchestratorRequest() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 1495\r\n   at NRCDataService3.GetAdhocData.ConstructRequest_Datapoint() in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 482\r\n   at NRCDataService3.GetAdhocData.Page_Load(Object sender, EventArgs e) in C:\\projects\\ndecore2015\\NRCDataService2\\GetAdhocData.aspx.cs:line 136"}'

'{"status":200,"result": [{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","variable":"GENDER","variableLabel":"Gender","varValue":"1","varValueLabel":"Male","value":139.099504632971,"isStatDisplayable":1,"errorFlag":0},{"year":2011,"sample":"R3","yearSampleLabel":"2011","Cohort":2,"CohortLabel":"Grade 8","stattype":"MN:MN","subject":"WRI","grade":8,"scale":"WRIRP","jurisdiction":"NP","variable":"GENDER","variableLabel":"Gender","varValue":"2","varValueLabel":"Female","value":158.567104984955,"isStatDisplayable":1,"errorFlag":0}]}'

not found


## Writing a function to make multiple, sequential calls

- Say we want to pull the data for grades 4, 8, and 12
- How can we write a function that iterates over a list of those grades and pulls the data for each grade?

**Note**: an ideal function would have arguments for each parameter in the API like subject, subscale, etc. Here we can leave those other parts constant

In [None]:
def pull_one_grade(which_grade):
    
    ## define query 
    create_query = (
    'https://www.nationsreportcard.gov/'
    'Dataservice/GetAdhocData.aspx?'
    'type=data&subject=writing&grade={grade}&'
    'subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011').format(grade = 
                                    str(which_grade))
    print(create_query)
    ## call API
    naep_resp = requests.get(create_query)
    
    ## try/except
    try:
        print("Pulling for this query: " + create_query)
        naep_j = naep_resp.json()
        naep_df = pd.DataFrame(naep_j['result'])
    
    except:
        naep_df = pd.DataFrame()
    
    return(naep_df)
         
    

In [None]:
all_res = [pull_one_grade(grade) for grade in [4, 8, 12]]
all_res

valid_res = pd.concat([res for res in all_res if not res.empty])
valid_res

https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=4&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011
Pulling for this query: https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=4&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011
https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011
Pulling for this query: https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=8&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011
https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&subject=writing&grade=12&subscale=WRIRP&variable=GENDER&jurisdiction=NP&stattype=MN:MN&Year=2011
Pulling for this query: https://www.nationsreportcard.gov/Dataservice/GetAdhocData.aspx?type=data&sub

[Empty DataFrame
 Columns: []
 Index: [],
    year sample yearSampleLabel  Cohort CohortLabel stattype subject  grade  \
 0  2011     R3            2011       2     Grade 8    MN:MN     WRI      8   
 1  2011     R3            2011       2     Grade 8    MN:MN     WRI      8   
 
    scale jurisdiction variable variableLabel varValue varValueLabel  \
 0  WRIRP           NP   GENDER        Gender        1          Male   
 1  WRIRP           NP   GENDER        Gender        2        Female   
 
         value  isStatDisplayable  errorFlag  
 0  139.099505                  1          0  
 1  158.567105                  1          0  ,
    year sample yearSampleLabel  Cohort CohortLabel stattype subject  grade  \
 0  2011     R3            2011       3    Grade 12    MN:MN     WRI     12   
 1  2011     R3            2011       3    Grade 12    MN:MN     WRI     12   
 
    scale jurisdiction variable variableLabel varValue varValueLabel  \
 0  WRIRP           NP   GENDER        Gender   

Unnamed: 0,year,sample,yearSampleLabel,Cohort,CohortLabel,stattype,subject,grade,scale,jurisdiction,variable,variableLabel,varValue,varValueLabel,value,isStatDisplayable,errorFlag
0,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,GENDER,Gender,1,Male,139.099505,1,0
1,2011,R3,2011,2,Grade 8,MN:MN,WRI,8,WRIRP,NP,GENDER,Gender,2,Female,158.567105,1,0
0,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,GENDER,Gender,1,Male,141.256978,1,0
1,2011,R3,2011,3,Grade 12,MN:MN,WRI,12,WRIRP,NP,GENDER,Gender,2,Female,155.385917,1,0


# Example 2: needs credentials; no wrapper

In [None]:
## load creds
with open("../private_data/my_cred.yaml", 'r') as stream:
    try:
        creds = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

## get the key
API_KEY = creds['yelp_api']['api_key']

In [None]:
## use documentation to define what to search
## doc: https://www.yelp.com/developers/documentation/v3/business_search
## write the query 
base_url = "https://api.yelp.com/v3/businesses/search?"
my_name = "restaurants"
my_location = "Hanover,NH,03755"
yelp_genquery = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = my_name,
                loc = my_location)

## use requests to call the API; here, we're
## passing it our credentials (structure varies
## by API and telling it to only return 10 results
## (max is 50 at once)
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_genresp = requests.get(yelp_genquery, headers = header)
yelp_genresp

## then, look at structure of response
yelp_genjson = yelp_genresp.json()


<Response [200]>

In [None]:
## example business
yelp_genjson['businesses'][0]

## more automatic way of summarizing but things end up in lists
## within columns
yelp_gendf = pd.DataFrame(yelp_genjson['businesses'])
yelp_gendf.head()

{'id': '8ybF6YyRldtZmU9jil4xlg',
 'alias': 'mollys-restaurant-and-bar-hanover',
 'name': "Molly's Restaurant & Bar",
 'image_url': 'https://s3-media2.fl.yelpcdn.com/bphoto/1YkJFic4Czt9b2FsZyOrwQ/o.jpg',
 'is_closed': False,
 'url': 'https://www.yelp.com/biz/mollys-restaurant-and-bar-hanover?adjust_creative=ABQTB3e9fTiSiyqs0c-3Bg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=ABQTB3e9fTiSiyqs0c-3Bg',
 'review_count': 403,
 'categories': [{'alias': 'tradamerican', 'title': 'American (Traditional)'},
  {'alias': 'burgers', 'title': 'Burgers'},
  {'alias': 'pizza', 'title': 'Pizza'}],
 'rating': 4.0,
 'coordinates': {'latitude': 43.701144, 'longitude': -72.2894249},
 'transactions': ['delivery'],
 'price': '$$',
 'location': {'address1': '43 South Main St',
  'address2': '',
  'address3': '',
  'city': 'Hanover',
  'zip_code': '03755',
  'country': 'US',
  'state': 'NH',
  'display_address': ['43 South Main St', 'Hanover, NH 03755']},
 'phone': '+16036432570',
 'disp

Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance
0,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/1YkJFi...,False,https://www.yelp.com/biz/mollys-restaurant-and...,403,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 43.701144, 'longitude': -72.2894249}",[delivery],$$,"{'address1': '43 South Main St', 'address2': '...",16036432570,(603) 643-2570,250.83016
1,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/jpAl7T...,False,https://www.yelp.com/biz/base-camp-cafe-hanove...,186,"[{'alias': 'himalayan', 'title': 'Himalayan/Ne...",4.5,"{'latitude': 43.700626, 'longitude': -72.2887803}",[delivery],$$,"{'address1': '3 Lebanon St', 'address2': 'Ste ...",16036432007,(603) 643-2007,196.139758
2,neBEWvgHNhsXIBImCYx_6A,murphys-on-the-green-hanover,Murphy's On the Green,https://s3-media1.fl.yelpcdn.com/bphoto/wv3hxs...,False,https://www.yelp.com/biz/murphys-on-the-green-...,136,"[{'alias': 'newamerican', 'title': 'American (...",4.0,"{'latitude': 43.702, 'longitude': -72.2894949}",[delivery],$$,"{'address1': '11 S Main St', 'address2': '', '...",16036434075,(603) 643-4075,295.934683
3,WFvBi7bA6JZ-rMzm2Ipa_g,candela-tapas-lounge-hanover,Candela Tapas Lounge,https://s3-media1.fl.yelpcdn.com/bphoto/7sd_y_...,False,https://www.yelp.com/biz/candela-tapas-lounge-...,69,"[{'alias': 'lounges', 'title': 'Lounges'}, {'a...",4.0,"{'latitude': 43.70076, 'longitude': -72.287631...",[pickup],$$$,"{'address1': '15 Lebanon St', 'address2': '', ...",16032779094,(603) 277-9094,103.183327
4,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media1.fl.yelpcdn.com/bphoto/QCIlbB...,False,https://www.yelp.com/biz/lous-restaurant-and-b...,265,"[{'alias': 'tradamerican', 'title': 'American ...",4.0,"{'latitude': 43.70143, 'longitude': -72.289001}",[delivery],$$,"{'address1': '30 S Main St', 'address2': '', '...",16036433321,(603) 643-3321,244.97721


In [None]:
## more data-specific way of summarizing
## we're doing a simple approach and just retaining
## cols that have a simple str structure
## if doing for real, would want to extract things
def clean_yelp_json(one_biz):

    ## restrict to str cols
    d_str = {key:value for key, value in one_biz.items() if type(value) == str}
    
    df_str = pd.DataFrame(d_str, index = [d_str['id']])
    return(df_str)

yelp_stronly = [clean_yelp_json(one_b) for one_b in yelp_genjson['businesses']]
yelp_stronly_df = pd.concat(yelp_stronly)

yelp_stronly_df.head(7)


Unnamed: 0,id,alias,name,image_url,url,price,phone,display_phone
8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover,Molly's Restaurant & Bar,https://s3-media2.fl.yelpcdn.com/bphoto/1YkJFi...,https://www.yelp.com/biz/mollys-restaurant-and...,$$,16036432570,(603) 643-2570
XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover,Base Camp Cafe,https://s3-media2.fl.yelpcdn.com/bphoto/jpAl7T...,https://www.yelp.com/biz/base-camp-cafe-hanove...,$$,16036432007,(603) 643-2007
neBEWvgHNhsXIBImCYx_6A,neBEWvgHNhsXIBImCYx_6A,murphys-on-the-green-hanover,Murphy's On the Green,https://s3-media1.fl.yelpcdn.com/bphoto/wv3hxs...,https://www.yelp.com/biz/murphys-on-the-green-...,$$,16036434075,(603) 643-4075
WFvBi7bA6JZ-rMzm2Ipa_g,WFvBi7bA6JZ-rMzm2Ipa_g,candela-tapas-lounge-hanover,Candela Tapas Lounge,https://s3-media1.fl.yelpcdn.com/bphoto/7sd_y_...,https://www.yelp.com/biz/candela-tapas-lounge-...,$$$,16032779094,(603) 277-9094
KA8yhrd-ClVYMyOefXdVYg,KA8yhrd-ClVYMyOefXdVYg,lous-restaurant-and-bakery-hanover,Lou's Restaurant & Bakery,https://s3-media1.fl.yelpcdn.com/bphoto/QCIlbB...,https://www.yelp.com/biz/lous-restaurant-and-b...,$$,16036433321,(603) 643-3321
vMyN7JL5cJExJORgIobbQg,vMyN7JL5cJExJORgIobbQg,tuk-tuk-thai-cuisine-hanover,Tuk Tuk Thai Cuisine,https://s3-media2.fl.yelpcdn.com/bphoto/Aanb8l...,https://www.yelp.com/biz/tuk-tuk-thai-cuisine-...,$$,16032779192,(603) 277-9192
34j_2nRCVQBTKafJvncZlg,34j_2nRCVQBTKafJvncZlg,pine-restaurant-hanover-2,PINE Restaurant,https://s3-media1.fl.yelpcdn.com/bphoto/zWSReI...,https://www.yelp.com/biz/pine-restaurant-hanov...,$$$,16036468000,(603) 646-8000


# Activity

- Try running a business search query for your hometown or another place by constructing a query similar to `yelp_genquery` but changing the location parameter
- Other endpoints require feeding what's called the fusion id into the API. Take an id from `yelp_stronly.id` and use the documentation here to pull the reviews for that business: https://www.yelp.com/developers/documentation/v3/business_reviews
- **Challenge**: generalize the previous step by writing a function that (1) takes a list of ids as an input, (2) calls the reviews API for each id, (3) returns the results, and (4) rowbinds all results


In [None]:
## change location parameter and see results
yelp_mytown = ('{base_url}'
                'term={name}'
                '&location={loc}').format(base_url = base_url,
                name = "ice cream",
                loc = "Winnetka, IL, 60093")

## use requests to call the API
header = {'Authorization': f"Bearer {API_KEY}"}
yelp_resp = requests.get(yelp_mytown, headers = header)
#yelp_resp.json()['businesses']

homers_id = yelp_resp.json()['businesses'][2]['id']


In [None]:
## get reviews
new_base = "https://api.yelp.com/v3/businesses/"
reviews_query = ('{base_url}'
                '{yelp_id}'
                '/reviews').format(base_url = new_base,
                yelp_id = homers_id)
one_review = requests.get(reviews_query, headers = header)
one_review.json()['reviews']
len(one_review.json()['reviews']) # seems max 3- here's discussion of default order: https://www.yelp-support.com/article/How-is-the-order-of-reviews-determined?
one_review_df = pd.DataFrame(one_review.json()['reviews'])
one_review_df.head()

[{'id': 'G03jqutpguTEp4PiAVPlpg',
  'url': 'https://www.yelp.com/biz/homers-ice-cream-wilmette?adjust_creative=ABQTB3e9fTiSiyqs0c-3Bg&hrid=G03jqutpguTEp4PiAVPlpg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=ABQTB3e9fTiSiyqs0c-3Bg',
  'text': "After the first dinner out, post Quarantine, we took a nice drive up the north shore and found ourselves pulling up to Homer's. The patio was full with...",
  'rating': 5,
  'time_created': '2020-05-30 18:44:49',
  'user': {'id': 'tfHtsYCvpvYv13JETfHABQ',
   'profile_url': 'https://www.yelp.com/user_details?userid=tfHtsYCvpvYv13JETfHABQ',
   'image_url': 'https://s3-media3.fl.yelpcdn.com/photo/1N7vVT9Giwweel-BMxvVGQ/o.jpg',
   'name': 'Dustin M.'}},
 {'id': 'azhHmb2CgiI6dOVeQ4Knqg',
  'url': 'https://www.yelp.com/biz/homers-ice-cream-wilmette?adjust_creative=ABQTB3e9fTiSiyqs0c-3Bg&hrid=azhHmb2CgiI6dOVeQ4Knqg&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_reviews&utm_source=ABQTB3e9fTiSiyqs0c-3Bg',
  'text': "Homer's 

3

Unnamed: 0,id,url,text,rating,time_created,user
0,G03jqutpguTEp4PiAVPlpg,https://www.yelp.com/biz/homers-ice-cream-wilm...,"After the first dinner out, post Quarantine, w...",5,2020-05-30 18:44:49,"{'id': 'tfHtsYCvpvYv13JETfHABQ', 'profile_url'..."
1,azhHmb2CgiI6dOVeQ4Knqg,https://www.yelp.com/biz/homers-ice-cream-wilm...,Homer's is the best place I have ever been. It...,5,2020-04-05 16:54:22,"{'id': 'YPM09eQImaIzEMWMjnD4ag', 'profile_url'..."
2,tbiWi15lQ64ryYId0pnsCQ,https://www.yelp.com/biz/homers-ice-cream-wilm...,I returned for the Veggie Burger and Apple Cin...,5,2021-05-08 12:29:39,"{'id': '976w1TuMGQeW9alwGiguRA', 'profile_url'..."


In [None]:
## generalize into function
def getrev_oneid(one_id: str):
    
    reviews_query = ('{base_url}'
                '{yelp_id}'
                '/reviews').format(base_url = new_base,
                yelp_id = one_id)
    one_review = requests.get(reviews_query, headers = header)
    try: 
        one_review_df = pd.DataFrame(one_review.json()['reviews'])
        one_review_df['biz_id'] = one_id
    except:
        one_review_df = pd.DataFrame()
    return(one_review_df)

In [None]:
## iterate over ids and get reviews
all_rev_list = [getrev_oneid(one_id) for one_id in yelp_stronly_df.id]

## rowbind using pd.concat 
## also merge with the original yelp data to get
## the biz name
all_rev_df = pd.merge(pd.concat(all_rev_list),
                      yelp_stronly_df[['id', 'alias']],
                    left_on = "biz_id",
                    right_on = "id", suffixes = ['_reviews', '_business'])
all_rev_df.head()

Unnamed: 0,id_reviews,url,text,rating,time_created,user,biz_id,id_business,alias
0,BMCP-JBLhQlYhdMKYOBWLg,https://www.yelp.com/biz/mollys-restaurant-and...,Navigating an opening night in the time of COV...,5,2020-06-12 18:44:27,"{'id': 'dCQ6UxVTij0oL9BQ7qjyag', 'profile_url'...",8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover
1,GZZ_bpSFkWM9YCha2XMtNw,https://www.yelp.com/biz/mollys-restaurant-and...,"Thursday late afternoon August 13, 2020\nWe we...",5,2020-08-16 09:45:49,"{'id': 'HTK2_m375MBlD76DhMX8Ug', 'profile_url'...",8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover
2,p2QpTUqSCWMv16tKFD7PUg,https://www.yelp.com/biz/mollys-restaurant-and...,"Got my Moderna #2 today, so decided to head ba...",5,2021-04-24 16:26:49,"{'id': '5FJSrMA8z-ZxkXPhV-72OA', 'profile_url'...",8ybF6YyRldtZmU9jil4xlg,8ybF6YyRldtZmU9jil4xlg,mollys-restaurant-and-bar-hanover
3,8JgEfN_DFYZAv4Vo8Rlntg,https://www.yelp.com/biz/base-camp-cafe-hanove...,I miss sitting and having a meal here but the ...,5,2020-08-27 18:39:00,"{'id': 'tikC9_rdMwiSOW-Hoq9-_A', 'profile_url'...",XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover
4,tN-kTUHHb6pbOCuxDQ334A,https://www.yelp.com/biz/base-camp-cafe-hanove...,I grew up in Hanover but haven't lived in the ...,5,2020-08-08 12:56:13,"{'id': '5bIwqpc6p0mCfx8oxe9qYg', 'profile_url'...",XVGEEIH5rVB2QzW-qywcJw,XVGEEIH5rVB2QzW-qywcJw,base-camp-cafe-hanover
