The Foursquare dataset tracks the venues visited by a user in a day. However, the assigned categories are too granular to infer meaningful patterns out of them. Like 250 different kinds!

In [1]:
import numpy as np
import pyarrow as pa
import pyarrow.parquet

In [2]:
dataset = pa.parquet.read_table("../datasets/foursquare_nyc.parquet")

In [4]:
catg_array = pa.compute.dictionary_encode(dataset["venueCategory"])
categories = catg_array.combine_chunks().dictionary.tolist()

In [7]:
# categories
len(categories)

251

A small UI using ipywidgets to iterate over all the event categories and map them to a smaller set of categories. 

**Spin up a small UI to iterate over all events and put them into categories.**

In [6]:
# ipywidgets was pretty complicated beyond simple controls. We want components!

In [8]:
import ipywidgets as wg

data = {"categories": categories, "index": 0}
venues: dict[str, list[str]] = {}
btn_to_label = {}


def add_new_venue(text):
    val = text.value.lower()
    assert val not in venues, "added new venue same as older one"

    venues[val] = []
    venues[val].append(current_entry.value)

    new_button = wg.Button(description=val)
    new_label = wg.Label(data["categories"][data["index"]])
    btn_to_label[new_button] = new_label
    new_button.on_click(append_to_venue)
    layout.children = tuple([c for c in layout.children] + [new_button, new_label])

    data["index"] += 1
    current_entry.value = data["categories"][data["index"]]


def append_to_venue(btn):
    val = btn.description.lower()
    venues[val].append(current_entry.value)

    matching_label = btn_to_label[btn]
    matching_label.value += f", {current_entry.value}"

    data["index"] += 1
    current_entry.value = data["categories"][data["index"]]


current_entry = wg.Label(data["categories"][0])
new_venue = wg.Text(placeholder="new venue")
new_venue.on_submit(add_new_venue)
layout = wg.VBox(
    [
        current_entry,
        new_venue,
        wg.Label("------------------------------------------------------------------"),
    ]
)

display(layout)

VBox(children=(Label(value='Arts & Crafts Store'), Text(value='', placeholder='new venue'), Label(value='-----…

In [22]:
clean_venues = {}
for place, list_catg in venues.items():
    clean_venues[place] = []
    for catg in list_catg:
        if isinstance(catg, list):
            clean_venues[place].append(catg[0])
        else:
            assert isinstance(catg, str)
            clean_venues[place].append(catg)
# clean_venues

Final output, after swapping the assigned category for some of the venues. 

In [8]:
final_venues = {
    "chores": [
        "Funeral Home",
        "Storage Facility",
        "Recycling Facility",
        "Car Wash",
        "Internet Cafe",
        "Salon / Barbershop",
        "Laundry Service",
        "Tanning Salon",
        "Gas Station / Garage",
        "Spa / Massage",
        "Nail Salon",
        "Animal Shelter",
        "Gym / Fitness Center",
        "Pet Service",
    ],
    "shopping": [
        "Arts & Crafts Store",
        "Electronics Store",
        "Mobile Phone Shop",
        "Automotive Shop",
        "Sporting Goods Shop",
        "Jewelry Store",
        "Clothing Store",
        "Department Store",
        "Tattoo Parlor",
        "Mall",
        "Music Store",
        "Miscellaneous Shop",
        "Hardware Store",
        "Convenience Store",
        "Bookstore",
        "Bike Shop",
        "Video Game Store",
        "Toy / Game Store",
        "Paper / Office Supplies Store",
        "Candy Store",
        "Camera Store",
        "Cosmetics Shop",
        "Plaza",
        "Pet Store",
        "Smoke Shop",
        "Record Shop",
        "Furniture / Home Store",
        "Bridal Shop",
        "Antique Shop",
        "Thrift / Vintage Store",
        "Car Dealership",
        "Flea Market",
        "Gift Shop",
        "Flower Shop",
        "Hobby Shop",
        "Board Shop",
        "Shop & Service",
        "Video Store",
        "Motorcycle Shop",
        "Newsstand",
        "Market",
    ],
    "outdoors": [
        "Cemetery",
        "Bridge",
        "Other Great Outdoors",
        "Park",
        "Road",
        "Neighborhood",
        "Playground",
        "Moving Target",
        "Campground",
        "Parking",
        "Athletic & Sport",
        "Scenic Lookout",
        "Pool",
        "Garden",
        "Pool Hall",
        "Harbor / Marina",
        "Outdoors & Recreation",
        "River",
        "Beach",
        "Historic Site",
        "Rest Area",
        "Sculpture Garden",
        "City",
        "Ski Area",
        "Shrine",
        "College Stadium",
        "Mosque",
        "Public Art",
        "Garden Center",
        "Castle",
        "Church",
        "Temple",
        "Synagogue",
    ],
    "residential": [
        "Home (private)",
        "Residential Building (Apartment / Condo)",
        "Fraternity House",
        "Hotel",
        "Sorority House",
    ],
    "medical": ["Medical Center", "Drugstore / Pharmacy", "Medical School"],
    "food_n_drinks": [
        "Breakfast Spot",
        "Food Truck",
        "Food & Drink Shop",
        "Coffee Shop",
        "Gastropub",
        "Café",
        "Restaurant",
        "American Restaurant",
        "Mexican Restaurant",
        "Burger Joint",
        "Sandwich Place",
        "Ice Cream Shop",
        "Soup Place",
        "Deli / Bodega",
        "Diner",
        "Cuban Restaurant",
        "BBQ Joint",
        "Italian Restaurant",
        "Bar",
        "Spanish Restaurant",
        "Asian Restaurant",
        "Burrito Place",
        "Fast Food Restaurant",
        "Dumpling Restaurant",
        "Cupcake Shop",
        "Wings Joint",
        "Caribbean Restaurant",
        "French Restaurant",
        "Salad Place",
        "Vegetarian / Vegan Restaurant",
        "Sushi Restaurant",
        "Chinese Restaurant",
        "Latin American Restaurant",
        "Southern / Soul Food Restaurant",
        "Fried Chicken Joint",
        "Dessert Shop",
        "Bagel Shop",
        "Middle Eastern Restaurant",
        "Tea Room",
        "Seafood Restaurant",
        "Donut Shop",
        "Japanese Restaurant",
        "German Restaurant",
        "Indian Restaurant",
        "Hot Dog Joint",
        "Steakhouse",
        "Thai Restaurant",
        "Bakery",
        "Food",
        "Ramen /  Noodle House",
        "Mediterranean Restaurant",
        "Beer Garden",
        "African Restaurant",
        "Malaysian Restaurant",
        "Snack Place",
        "Taco Place",
        "South American Restaurant",
        "Brazilian Restaurant",
        "Winery",
        "Greek Restaurant",
        "Falafel Restaurant",
        "Tapas Restaurant",
        "Eastern European Restaurant",
        "Korean Restaurant",
        "Portuguese Restaurant",
        "Brewery",
        "Cajun / Creole Restaurant",
        "Mac & Cheese Joint",
        "Vietnamese Restaurant",
        "Dim Sum Restaurant",
        "Swiss Restaurant",
        "Australian Restaurant",
        "Peruvian Restaurant",
        "Filipino Restaurant",
        "Arepa Restaurant",
        "Turkish Restaurant",
        "Scandinavian Restaurant",
        "Fish & Chips Shop",
        "Afghan Restaurant",
        "Ethiopian Restaurant",
        "Distillery",
        "Gluten-free Restaurant",
        "Argentinian Restaurant",
        "Moroccan Restaurant",
        "Molecular Gastronomy Restaurant",
        "Pizza Place",
    ],
    "transit": [
        "Bus Station",
        "Airport",
        "Ferry",
        "Subway",
        "Light Rail",
        "Train Station",
        "General Travel",
        "Taxi",
        "Rental Car Location",
        "Travel & Transport",
        "Travel Lounge",
        "Bike Rental / Bike Share",
    ],
    "workplace": [
        "Bank",
        "Government Building",
        "Office",
        "Building",
        "Student Center",
        "College Academic Building",
        "University",
        "General College & University",
        "Factory",
        "School",
        "Community College",
        "Post Office",
        "Housing Development",
        "High School",
        "College & University",
        "Library",
        "Professional & Other Places",
        "Nursery School",
        "Law School",
        "Elementary School",
        "Design Studio",
        "Trade School",
        "Embassy / Consulate",
        "Middle School",
        "Financial or Legal Service",
        "Military Base",
        "Music School",
    ],
    "recreation": [
        "Music Venue",
        "Other Nightlife",
        "Stadium",
        "Performing Arts Venue",
        "Art Gallery",
        "Event Space",
        "Convention Center",
        "Theater",
        "Movie Theater",
        "Comedy Club",
        "General Entertainment",
        "Arts & Entertainment",
        "College Theater",
        "Casino",
        "Nightlife Spot",
        "Art Museum",
        "Museum",
        "History Museum",
        "Spiritual Center",
        "Racetrack",
        "Zoo",
        "Fair",
        "Planetarium",
        "Photography Lab",
        "Aquarium",
        "Arcade",
        "Concert Hall",
        "Bowling Alley",
        "Science Museum",
        "Gaming Cafe",
    ],
}

In [11]:
import json

with open("../datasets/foursquare_events_mapping.json", "w") as f:
    json.dump(final_venues, f)