In [12]:
import os
import requests
import pandas as pd
from time import sleep

In [2]:
API_KEY = os.getenv("YELP_API_KEY")
if API_KEY is None:
    raise ValueError("YELP_API_KEY environment variable not set.")

HEADERS = {"Authorization": f"Bearer {API_KEY}"}

In [3]:

# --- PAGINATED SEARCH FUNCTION ---
def search_yelp_paginated(term="thrift_stores", location="Provo, UT",
                          limit=50, max_results=1000):
    """
    Collects up to max_results Yelp businesses using pagination.
    - term: Yelp alias (e.g., 'thrift_stores')
    - limit: max 50 per Yelp API request
    - max_results: Yelp Search caps near 1000 items max
    """
    url = "https://api.yelp.com/v3/businesses/search"

    all_businesses = []
    offset = 0

    while True:
        params = {
            "term": term,
            "location": location,
            "limit": limit,
            "offset": offset,
        }

        response = requests.get(url, headers=HEADERS, params=params)
        response.raise_for_status()

        data = response.json()
        businesses = data.get("businesses", [])

        # Stop if no more results
        if not businesses:
            break

        all_businesses.extend(businesses)

        # Increase offset
        offset += limit

        # Yelp won't return beyond ~1000 results
        if offset >= max_results:
            break

        # Avoid hitting rate limits
        time.sleep(0.25)

    return all_businesses

In [4]:
# --- CITIES TO SEARCH ---
cities = [
    # Utah County
    "Provo, UT",
    "Orem, UT",
    "Lehi, UT",
    "American Fork, UT",
    "Pleasant Grove, UT",
    "Spanish Fork, UT",
    "Springville, UT",
    "Lindon, UT",
    "Highland, UT",

    # Salt Lake County
    "Salt Lake City, UT",
    "West Valley City, UT",
    "Sandy, UT",
    "Draper, UT",
    "West Jordan, UT",
    "South Jordan, UT",
    "Midvale, UT",
    "Murray, UT",
]


In [7]:
# --- COLLECT DATA ---
all_data = []
for city in cities:
    print(f"Collecting thrift stores from: {city} ...")
    results = search_yelp_paginated(
        term="thrift_stores",
        location=city,
        limit=50,
        max_results=1000
    )

    for b in results:
        all_data.append({
            "name": b.get("name"),
            "rating": b.get("rating"),
            "review_count": b.get("review_count"),
            "price": b.get("price"),
            "categories": ", ".join([c["title"] for c in b.get("categories", [])]),
            "latitude": b.get("coordinates", {}).get("latitude"),
            "longitude": b.get("coordinates", {}).get("longitude"),
            "address": " ".join(b.get("location", {}).get("display_address", [])),
            "city_queried": city
        })

Collecting thrift stores from: Provo, UT ...
Collecting thrift stores from: Orem, UT ...
Collecting thrift stores from: Lehi, UT ...
Collecting thrift stores from: American Fork, UT ...
Collecting thrift stores from: Pleasant Grove, UT ...
Collecting thrift stores from: Spanish Fork, UT ...
Collecting thrift stores from: Springville, UT ...
Collecting thrift stores from: Lindon, UT ...
Collecting thrift stores from: Highland, UT ...
Collecting thrift stores from: Salt Lake City, UT ...
Collecting thrift stores from: West Valley City, UT ...
Collecting thrift stores from: Sandy, UT ...
Collecting thrift stores from: Draper, UT ...
Collecting thrift stores from: West Jordan, UT ...
Collecting thrift stores from: South Jordan, UT ...
Collecting thrift stores from: Midvale, UT ...
Collecting thrift stores from: Murray, UT ...


In [8]:

# --- SAVE RESULTS ---
df = pd.DataFrame(all_data).drop_duplicates(subset=["name", "address"])
df.to_csv("utah_thrift_stores.csv", index=False)

df.head()

Unnamed: 0,name,rating,review_count,price,categories,latitude,longitude,address,city_queried
0,Thrifthood,4.3,12,,Thrift Stores,40.23167,-111.66129,"160 S 100 W St Ste 1 Provo, UT 84601","Provo, UT"
1,Making Space Thrift,4.3,3,,"Thrift Stores, Fabric Stores",40.24012,-111.662942,"475 N Freedom Blvd Provo, UT 84601","Provo, UT"
2,Get Thrifty,3.4,8,,Thrift Stores,40.256653,-111.670738,"1700 N State Street Ste 20 Provo, UT 84604","Provo, UT"
3,Curveture,5.0,2,,"Plus Size Fashion, Thrift Stores, Women's Clot...",40.252912,-111.66156,"1394 N Freedom Blvd Provo, UT 84604","Provo, UT"
4,Preloved Provo,4.9,7,,"Thrift Stores, Men's Clothing, Women's Clothing",40.250892,-111.662831,"230 Cougar Blvd Provo, UT 84604","Provo, UT"


In [10]:
def yelp_search_paginated(term, location, max_per_city=200):
    """
    Collects Yelp results up to max_per_city (default 200), 
    but automatically stops when no more results exist.
    """
    url = "https://api.yelp.com/v3/businesses/search"
    all_results = []
    limit = 50
    offset = 0

    while True:
        params = {
            "term": term,
            "location": location,
            "limit": limit,
            "offset": offset
        }

        response = requests.get(url, headers=HEADERS, params=params)

        if not response.ok:
            print(f"Stopping early: {response.status_code} for {location} at offset {offset}")
            break

        data = response.json()
        businesses = data.get("businesses", [])

        if not businesses:
            break  # No more results

        all_results.extend(businesses)

        # If fewer than 50 returned → no more pages
        if len(businesses) < limit:
            break

        offset += limit

        # Safety ceiling
        if offset >= max_per_city:
            break

        sleep(0.2)

    return all_results

In [13]:
term = "icecream"
all_data = []
for city in cities:
    print(f"Collecting {term} from {city}...")
    results = yelp_search_paginated(term, city)
    for r in results:
        r["search_city"] = city
        all_data.append(r)

# Convert to DataFrame
df = pd.json_normalize(all_data)

Collecting icecream from Provo, UT...
Collecting icecream from Orem, UT...
Collecting icecream from Lehi, UT...
Collecting icecream from American Fork, UT...
Collecting icecream from Pleasant Grove, UT...
Collecting icecream from Spanish Fork, UT...
Collecting icecream from Springville, UT...
Collecting icecream from Lindon, UT...
Collecting icecream from Highland, UT...
Collecting icecream from Salt Lake City, UT...
Collecting icecream from West Valley City, UT...
Collecting icecream from Sandy, UT...
Collecting icecream from Draper, UT...
Collecting icecream from West Jordan, UT...
Collecting icecream from South Jordan, UT...
Collecting icecream from Midvale, UT...
Collecting icecream from Murray, UT...


In [14]:
# ---------- SAVE TO CSV ----------
output_filename = "yelp_icecream_shops_utah.csv"
df.to_csv(output_filename, index=False)

print(f"\nSaved {len(df)} rows to {output_filename}")
df.head()


Saved 2729 rows to yelp_icecream_shops_utah.csv


Unnamed: 0,id,alias,name,image_url,is_closed,url,review_count,categories,rating,transactions,...,location.address3,location.city,location.zip_code,location.country,location.state,location.display_address,attributes.business_temp_closed,attributes.menu_url,attributes.waitlist_reservation,attributes.open24_hours
0,PmHPzLNvws4DspOIWnkUUA,sub-zero-nitrogen-ice-cream-provo,Sub Zero Nitrogen Ice Cream,https://s3-media0.fl.yelpcdn.com/bphoto/bHlLp1...,False,https://www.yelp.com/biz/sub-zero-nitrogen-ice...,53,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",3.4,"[delivery, pickup]",...,,Provo,84601,US,UT,"[62 West Center St, Provo, UT 84601]",,https://subzeroicecream.com,,
1,fmDKNQCeRNDqDVmo34yksQ,rockwell-ice-cream-provo-2,Rockwell Ice Cream,https://s3-media0.fl.yelpcdn.com/bphoto/rGvN1Y...,False,https://www.yelp.com/biz/rockwell-ice-cream-pr...,383,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.5,"[delivery, pickup]",...,,Provo,84601,US,UT,"[43 N University Ave, Provo, UT 84601]",,,,
2,yjzLBkPam1a3mLMTjL-SkQ,the-twisted-cow-provo,The Twisted Cow,https://s3-media0.fl.yelpcdn.com/bphoto/tvrTKQ...,False,https://www.yelp.com/biz/the-twisted-cow-provo...,29,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.7,[],...,,Provo,84601,US,UT,"[250 N University Ave, Provo, UT 84601]",,https://www.thetwistedcowshakes.com/menu,,
3,Lhr0Wz9SHYRcQBH0hMLNyg,brooker-s-founding-flavors-ice-cream-provo,Brooker’s Founding Flavors Ice Cream,https://s3-media0.fl.yelpcdn.com/bphoto/BkD7wm...,False,https://www.yelp.com/biz/brooker-s-founding-fl...,77,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.8,"[delivery, pickup]",...,,Provo,84606,US,UT,"[748 E 820 N, Provo, UT 84606]",,,,
4,aKniDfANeYQaTh4JQfcU6g,byu-creamery-on-ninth-provo-2,BYU Creamery on Ninth,https://s3-media0.fl.yelpcdn.com/bphoto/MbXLAB...,False,https://www.yelp.com/biz/byu-creamery-on-ninth...,202,"[{'alias': 'icecream', 'title': 'Ice Cream & F...",4.1,[delivery],...,,Provo,84604,US,UT,"[1209 900th E, Provo, UT 84604]",,,,
