## Finding every Chinese restaurant in North America

In this notebook I load the Foursquare Open Places dataset from Hugging Face and filter it in batches to find all the Chinese restaurants in Canada and the US.  The data is exported to CSV to bring into ArcGIS for mapping and spatial analysis.  

Foursquare's dataset covers the entire world and has 105 million rows, so I filtered first for location and then for restaurant IDs.  Because the dataset is so large, I knew I didn't have enough RAM and asked generative AI to help me come up with a way to process it in batches.  It also provided some aggressive error handling that was helpful.  I couldn't dataset streaming to work, so this ended up being a good option.  

While I'd like to cover all of North America eventually, I'm testing out the workflow in the NY-NJ-PA-CT area around New York City.  

In [1]:
from datasets import load_dataset, load_dataset_builder
import pandas as pd

import os
import time
import numpy as np
from pathlib import Path

# lowercase comments: things i did
# Capitalized comments: things Cursor did

In [3]:
# load foursquare open places dataset builder to inspect it
ds_builder = load_dataset_builder("foursquare/fsq-os-places", "places")

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

In [4]:
# inspect dataset features
ds_builder.info.features

{'fsq_place_id': Value('string'),
 'name': Value('string'),
 'latitude': Value('float64'),
 'longitude': Value('float64'),
 'address': Value('string'),
 'locality': Value('string'),
 'region': Value('string'),
 'postcode': Value('string'),
 'admin_region': Value('string'),
 'post_town': Value('string'),
 'po_box': Value('string'),
 'country': Value('string'),
 'date_created': Value('string'),
 'date_refreshed': Value('string'),
 'date_closed': Value('string'),
 'tel': Value('string'),
 'website': Value('string'),
 'email': Value('string'),
 'facebook_id': Value('int64'),
 'instagram': Value('string'),
 'twitter': Value('string'),
 'fsq_category_ids': List(Value('string')),
 'fsq_category_labels': List(Value('string')),
 'placemaker_url': Value('string'),
 'unresolved_flags': List(Value('string')),
 'geom': Value('binary'),
 'bbox': {'xmin': Value('float64'),
  'ymin': Value('float64'),
  'xmax': Value('float64'),
  'ymax': Value('float64')}}

In [None]:
# casting a wide net with filtering because some values in 'country' and 'region' are null or messed up

# us and canada ('country' column)
countries_include = ['US', 'CA']

# countries to exclude ('country' column)
countries_exclude = [
    "AF", "AX", "AL", "DZ", "AS", "AD", "AO", "AI", "AQ", "AG", "AR", "AM", "AW", "AU", "AT", "AZ",
    "BS", "BH", "BD", "BB", "BY", "BE", "BZ", "BJ", "BM", "BT", "BO", "BQ", "BA", "BW", "BV", "BR",
    "IO", "BN", "BG", "BF", "BI", "CV", "KH", "CM", "KY", "CF", "TD", "CL", "CN", "CX", "CC", "CO",
    "KM", "CG", "CD", "CK", "CR", "CI", "HR", "CU", "CW", "CY", "CZ", "DK", "DJ", "DM", "DO", "EC",
    "EG", "SV", "GQ", "ER", "EE", "SZ", "ET", "FK", "FO", "FJ", "FI", "FR", "GF", "PF", "TF", "GA",
    "GM", "GE", "DE", "GH", "GI", "GR", "GL", "GD", "GP", "GU", "GT", "GG", "GN", "GW", "GY", "HT",
    "HM", "VA", "HN", "HK", "HU", "IS", "IN", "ID", "IR", "IQ", "IE", "IM", "IL", "IT", "JM", "JP",
    "JE", "JO", "KZ", "KE", "KI", "KP", "KR", "KW", "KG", "LA", "LV", "LB", "LS", "LR", "LY", "LI",
    "LT", "LU", "MO", "MG", "MW", "MY", "MV", "ML", "MT", "MH", "MQ", "MR", "MU", "YT", "MX", "FM",
    "MD", "MC", "MN", "ME", "MS", "MA", "MZ", "MM", "NA", "NR", "NP", "NL", "NC", "NZ", "NI", "NE",
    "NG", "NU", "NF", "MK", "MP", "NO", "OM", "PK", "PW", "PS", "PA", "PG", "PY", "PE", "PH", "PN",
    "PL", "PT", "PR", "QA", "RE", "RO", "RU", "RW", "BL", "SH", "KN", "LC", "MF", "PM", "VC", "WS",
    "SM", "ST", "SA", "SN", "RS", "SC", "SL", "SG", "SX", "SK", "SI", "SB", "SO", "ZA", "GS", "SS",
    "ES", "LK", "SD", "SR", "SJ", "SE", "CH", "SY", "TW", "TJ", "TZ", "TH", "TL", "TG", "TK", "TO",
    "TT", "TN", "TR", "TM", "TC", "TV", "UG", "UA", "AE", "GB", "UY", "UZ", "VU", "VE", "VN", "VG",
    "VI", "WF", "EH", "YE", "ZM", "ZW", "UM",
]

# us states ('region' column)
us_states_include = [
    "AL", "AK", "AZ", "AR", "AS", "CA", "CO", "CT", "DE", "DC", "FL", "GA", "GU", "HI", "ID", "IL", "IN",
    "IA", "KS", "KY", "LA", "ME", "MD", "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", "NM", 
    "NY", "NC", "ND", "MP", "OH", "OK", "OR", "PA", "PR", "RI", "SC", "SD", "TN", "TX", "TT", "UT", "VT", 
    "VA", "VI", "WA", "WV", "WI", "WY",
]

# canadian provinces ('region' column)
ca_provinces_include = [
    "NL", "PE", "NS", "NB", "QC", "ON", "MB", "SK", "AB", "BC", "YT", "NT", "NU",
]



In [2]:
# fsq chinese restaurant category ids
# after filtering function was applied (below), some restaurants are just labeled "chinese" with no subcategory
# this may not have been the case before filtering, but hopefully that doesn't matter for this project


chinese_category_ids = [
    "4bf58dd8d48988d145941735",  # Chinese Restaurant (main category)
    "52af3a723cf9994f4e043bec",  # Beijing Restaurant
    "52af3a7c3cf9994f4e043bed",  # Cantonese Restaurant
    "58daa1558bbb0b01f18ec1d3",  # Cha Chaan Teng
    "52af3a673cf9994f4e043beb",  # Chinese Aristocrat Restaurant
    "52af3a903cf9994f4e043bee",  # Chinese Breakfast Restaurant
    "4bf58dd8d48988d1f5931735",  # Dim Sum Restaurant
    "52af3a9f3cf9994f4e043bef",  # Dongbei Restaurant
    "52af3aaa3cf9994f4e043bf0",  # Fujian Restaurant
    "52af3ab53cf9994f4e043bf1",  # Guizhou Restaurant
    "52af3abe3cf9994f4e043bf2",  # Hainan Restaurant
    "52af3ac83cf9994f4e043bf3",  # Hakka Restaurant
    "52af3ad23cf9994f4e043bf4",  # Henan Restaurant
    "52af3add3cf9994f4e043bf5",  # Hong Kong Restaurant
    "52af3af23cf9994f4e043bf7",  # Huaiyang Restaurant
    "52af3ae63cf9994f4e043bf6",  # Hubei Restaurant
    "52af3afc3cf9994f4e043bf8",  # Hunan Restaurant
    "52af3b053cf9994f4e043bf9",  # Imperial Restaurant
    "52af3b213cf9994f4e043bfa",  # Jiangsu Restaurant
    "52af3b293cf9994f4e043bfb",  # Jiangxi Restaurant
    "52af3b343cf9994f4e043bfc",  # Macanese Restaurant
    "52af3b3b3cf9994f4e043bfd",  # Manchu Restaurant
    "52af3b463cf9994f4e043bfe",  # Peking Duck Restaurant
    "52af3b633cf9994f4e043c01",  # Shaanxi Restaurant
    "52af3b513cf9994f4e043bff",  # Shandong Restaurant
    "52af3b593cf9994f4e043c00",  # Shanghai Restaurant
    "52af3b6e3cf9994f4e043c02",  # Shanxi Restaurant
    "52af3b773cf9994f4e043c03",  # Szechuan Restaurant
    "52af3b813cf9994f4e043c04",  # Taiwanese Restaurant
    "52af3b893cf9994f4e043c05",  # Tianjin Restaurant
    "52af3b913cf9994f4e043c06",  # Xinjiang Restaurant
    "52af3b9a3cf9994f4e043c07"   # Yunnan Restaurant
]


In [None]:
# count the number of categories
len(chinese_category_ids)

32

In [None]:
# load full(!!!!!) dataset (11GB)

# couldn't get streaming to work
# fsq-os-places only has "train" split
# latest release: 2025-09-09

dataset = load_dataset("foursquare/fsq-os-places", "places", split="train")

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/100 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/88 [00:00<?, ?it/s]

In [2]:
# define output dir separately from the batch filtering function below
output_dir = Path("chinese_restaurants_filtered")
output_dir.mkdir(exist_ok=True)

In [18]:
# extract all the chinese restaurants in us and canada

# based on country (country code) or region (state/province) variables
# include chinese restaurants with null country or region just in case

# Create output directory
output_dir = Path("chinese_restaurants_filtered")
output_dir.mkdir(exist_ok=True)

# Clear any existing files
for f in output_dir.glob("*.csv"):
    f.unlink()

batch_size = 500000
total_chinese_restaurants = 0

print("Starting batch processing with filtering...")

def is_chinese_restaurant(category_ids):
    """Check if any category ID matches Chinese restaurant categories"""
    try:
        # Check for null values
        if pd.isna(category_ids) or category_ids is None:
            return False
        
        # apparently 'fsq-category-ids' holds numpy arrays, not lists
        # Convert numpy arrays to lists
        if isinstance(category_ids, np.ndarray):
            category_ids = category_ids.tolist()
        
        # Check if it's a list and contains Chinese restaurant categories
        if isinstance(category_ids, list):
            return any(cat in chinese_category_ids for cat in category_ids)
        return False
    except:
        # Fallback: if anything goes wrong, return False
        return False

def is_us_ca_location(country, region):
    # Check if location is in US/Canada, handling nulls properly
    # If both country and region are null, return True (we'll include these)
    if pd.isna(country) and pd.isna(region):
        return True
    
    # If country is in our include list
    if not pd.isna(country) and country in countries_include:
        return True
    
    # If country is in exclude list, return False
    if not pd.isna(country) and country in countries_exclude:
        return False
    
    # If region is a US state or Canadian province
    if not pd.isna(region):
        if region in us_states_include or region in ca_provinces_include:
            return True
    
    # If we have country but it's not in include/exclude lists, be conservative
    if not pd.isna(country):
        return False
    
    # If we only have region and it's not US/CA, return False
    if not pd.isna(region):
        return False
    
    # Default case: include if we can't determine (null country/region)
    return True

try:
    start_time = time.time()
    
    for i, df_batch in enumerate(dataset.to_pandas(batch_size=batch_size, batched=True)):
        print(f"Processing batch {i+1} with {len(df_batch)} rows...")
        
        if 'country' in df_batch.columns and 'fsq_category_ids' in df_batch.columns:
            # Get region column if it exists
            region_col = 'region' if 'region' in df_batch.columns else None
            
            # Apply location filtering
            if region_col:
                location_mask = df_batch.apply(
                    lambda row: is_us_ca_location(row['country'], row[region_col]), 
                    axis=1
                )
            else:
                location_mask = df_batch['country'].apply(
                    lambda x: is_us_ca_location(x, None)
                )
            
            location_filtered = df_batch[location_mask]
            print(f"  → {len(location_filtered)} rows after location filtering")
            
            # Apply Chinese restaurant filtering
            chinese_mask = location_filtered['fsq_category_ids'].apply(is_chinese_restaurant)
            filtered_batch = location_filtered[chinese_mask]
            
            if len(filtered_batch) > 0:
                output_file = output_dir / f"chinese_restaurants_batch_{i}.csv"
                filtered_batch.to_csv(output_file, index=False)
                total_chinese_restaurants += len(filtered_batch)
                print(f"  → Found {len(filtered_batch)} Chinese restaurants")
            else:
                print(f"  → No Chinese restaurants in this batch")
        else:
            print(f"  → Skipping batch - missing required columns")
            print(f"  → Available columns: {list(df_batch.columns)}")
        
        if (i + 1) % 10 == 0:
            elapsed = time.time() - start_time
            print(f"Completed {i + 1} batches in {elapsed:.1f} seconds")
            print(f"Total Chinese restaurants found so far: {total_chinese_restaurants}")
            
except Exception as e:
    print(f"Error processing batch {i + 1}: {e}")
    import traceback
    traceback.print_exc()

print(f"Processing complete. Total Chinese restaurants found: {total_chinese_restaurants}")


Starting corrected batch processing with proper filtering...
Processing batch 1 with 500000 rows...
  → 431893 rows after location filtering
  → Found 1469 Chinese restaurants
Processing batch 2 with 500000 rows...
  → 499999 rows after location filtering
  → Found 2064 Chinese restaurants
Processing batch 3 with 500000 rows...
  → 499998 rows after location filtering
  → Found 1015 Chinese restaurants
Processing batch 4 with 500000 rows...
  → 500000 rows after location filtering
  → Found 2495 Chinese restaurants
Processing batch 5 with 500000 rows...
  → 397211 rows after location filtering
  → Found 901 Chinese restaurants
Processing batch 6 with 500000 rows...
  → 499967 rows after location filtering
  → Found 1227 Chinese restaurants
Processing batch 7 with 500000 rows...
  → 67232 rows after location filtering
  → Found 109 Chinese restaurants
Processing batch 8 with 500000 rows...
  → 0 rows after location filtering
  → No Chinese restaurants in this batch
Processing batch 9 wi

In [None]:
# Combine all the csv files into one

csv_files = sorted(Path(output_dir).glob("chinese_restaurants_batch_*.csv"))
dfs = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

if dfs:
    combined_df = pd.concat(dfs, ignore_index=True)
    combined_output_file = Path(output_dir) / "chinese_restaurants_combined.csv"
    combined_df.to_csv(combined_output_file, index=False)
    print(f"Combined {len(csv_files)} CSV files into {combined_output_file}")
    print(f"Total rows in combined file: {len(combined_df)}")
else:
    print("No batch CSV files found to combine.")


Combined 61 CSV files into chinese_restaurants_filtered/chinese_restaurants_combined.csv
Total rows in combined file: 64361


In [3]:
chinese_restaurants = pd.read_csv(output_dir / "chinese_restaurants_combined.csv")
chinese_restaurants.head()

Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
0,4c8c4e92cf3ea1434a7af451,Lee's Chinese,39.444009,-123.804738,154 E Redwood Ave,Fort Bragg,CA,95437.0,,,...,info@bbbemail.org,162608400000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xf3\x80\xd5\xb0\xd...,"{'xmin': -123.80473844785541, 'ymin': 39.44400..."
1,4f442d5819836ed00192b620,China Express,39.433506,-123.805474,660 S Main St,Fort Bragg,CA,95437.0,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xf3\x8c\xe3\xdd\xf...,"{'xmin': -123.80547424960108, 'ymin': 39.43350..."
2,a194a71384e241d278559b60,Hill House Restaurant,39.310533,-123.79836,10701 Palette Dr,Mendocino,CA,95460.0,,,...,frontdesk@mendocinohotels.com,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xf3\x18T\x8a\x9b\x...,"{'xmin': -123.79836, 'ymin': 39.310533, 'xmax'..."
3,5dffd431f4a5f60008cfa1ce,Panda Express,39.130484,-123.198543,1236 Airport Park Blvd,Ukiah,CA,95482.0,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xcc\xb4\xed|X;@C\x...,"{'xmin': -123.19854294913323, 'ymin': 39.13048..."
4,4f32482419836c91c7c8471b,New Dragon,39.140123,-123.205708,765 S State St,Ukiah,CA,95482.0,,,...,,111568100000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xcd*Pt\xa0@@C\x91\...,"{'xmin': -123.20570765866523, 'ymin': 39.14012..."


### Examining the data and isolating a subset for a geoprocessing test

The resulting CSV still needs a little bit of clean up, but thankfully everything has latitude and longitude.  I cleaned up and unified misspelled place names and removed a few rows with no country listed at all.  

In [4]:
chinese_restaurants.shape

(64361, 27)

In [5]:
chinese_restaurants.columns

Index(['fsq_place_id', 'name', 'latitude', 'longitude', 'address', 'locality',
       'region', 'postcode', 'admin_region', 'post_town', 'po_box', 'country',
       'date_created', 'date_refreshed', 'date_closed', 'tel', 'website',
       'email', 'facebook_id', 'instagram', 'twitter', 'fsq_category_ids',
       'fsq_category_labels', 'placemaker_url', 'unresolved_flags', 'geom',
       'bbox'],
      dtype='object')

In [6]:
# find all the rows with no country listed

country_null = chinese_restaurants[chinese_restaurants['country'].isna()]
print(country_null.shape)
country_null.head()

(13, 27)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
5180,57ba8b46498ed8856ca33475,Great Wei Restaurant,2.195557,102.237099,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,"b'\x00\x00\x00\x00\x01@Y\x8f,\xa1H\xba\x84@\x0...","{'xmin': 102.237099, 'ymin': 2.195557, 'xmax':..."
7661,57c5e9ca498eea2cadb12bc1,RoomFree,43.45938,39.900918,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01@C\xf3QG\xf10Y@E\xba\xcc...,"{'xmin': 39.900918, 'ymin': 43.45938, 'xmax': ..."
7662,57c5e96d498ecbc36caea471,бар вок,43.458568,39.901039,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01@C\xf3U>\xf6\xb5\xd4@E\x...,"{'xmin': 39.901039, 'ymin': 43.458568, 'xmax':..."
8001,57bada85498e61797111af4a,эээ,59.87001,29.855366,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01@=\xda\xf9D$\x1c?@M\xef\...,"{'xmin': 29.855366, 'ymin': 59.87001, 'xmax': ..."
8002,57badab2498e4fd6c6a3d773,Япония,59.869533,29.854272,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01@=\xda\xb1\x91\xdd\xe3v@...,"{'xmin': 29.854272, 'ymin': 59.869533, 'xmax':..."


In [None]:
# remove rows with no country listed (only 13 rows, not gonna bother looking into them)
chinese_restaurants = chinese_restaurants[chinese_restaurants['country'].notna()]
chinese_restaurants.shape

# save new csv
chinese_restaurants.to_csv(output_dir / "chinese_restaurants_cleaned.csv", index=False)


In [31]:
# find all the rows with no region (state or province) listed
# none found, but everything has lat/long coords so it's fine (see below)

region_null = chinese_restaurants[chinese_restaurants['region'].isna()]
print(region_null.shape)
region_null.head()

(1640, 27)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
26,4bd881f40b779c74b95507a0,Fu Hing,39.520284,-122.193704,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\x8ce\xa5\x17\nO@C\...,"{'xmin': -122.19370391130472, 'ymin': 39.52028..."
43,4f5bb94ee4b01f8f4c30222c,Rice Wok,39.761965,-121.82338,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^t\xb2@\x9c\xde\xb0@...,"{'xmin': -121.82337966269802, 'ymin': 39.76196..."
237,4f6b7272e4b0d8154c2fb5ef,Manchu Wok,38.269847,-121.948936,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,"b'\x00\x00\x00\x00\x01\xc0^|\xbb^\xce[\xfb@C""\...","{'xmin': -121.94893617775226, 'ymin': 38.26984..."
391,4e79745852b1fc6a4ae6a494,MR. NOODLE,37.973042,-122.33957,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,"b'\x00\x00\x00\x00\x01\xc0^\x95\xbb\x83\x03*,@...","{'xmin': -122.33956981, 'ymin': 37.97304232, '..."
488,4d55bdd1a05c3704ca8dbc87,Emmo's Kitchen,37.851761,-122.273421,,,,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\x91\x7f\xbb\xa8DB@...,"{'xmin': -122.27342120582856, 'ymin': 37.85176..."


In [34]:
# count rows with no lat/long coords

print(len(chinese_restaurants[chinese_restaurants['latitude'].isna() | chinese_restaurants['longitude'].isna()]))

# answer is 0 rows, yay!

0


In [43]:
# count rows where 'date_closed' is null

open_restaurants = len(chinese_restaurants[chinese_restaurants['date_closed'].isna()])
print(f'open restaurants: {open_restaurants}')

open restaurants: 50712


In [42]:
total_restaurants = len(chinese_restaurants)
open_restaurants = len(chinese_restaurants[chinese_restaurants['date_closed'].isna()])

print(f'closed restaurants: {total_restaurants - open_restaurants}')

closed restaurants: 13636


In [44]:
# check for duplicate rows by 'fsq_place_id'
# no duplicates found!

duplicate_ids = chinese_restaurants[chinese_restaurants.duplicated(subset=['fsq_place_id'])]
print(duplicate_ids.shape)
duplicate_ids.head()

(0, 27)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox


In [49]:
# count restaurants by country and region

counts_by_country = chinese_restaurants['country'].value_counts()
counts_by_country

country
US    57536
CA     6812
Name: count, dtype: int64

In [81]:
counts_by_region = chinese_restaurants['region'].value_counts()
counts_by_region

region
CA    8578
NY    7056
TX    3540
FL    3522
ON    2833
PA    2693
NJ    2407
IL    2182
GA    1818
OH    1542
VA    1519
MA    1456
BC    1438
NC    1432
MI    1229
MD    1162
MO    1122
WA    1064
IN     815
SC     810
AZ     804
CT     803
TN     757
CO     710
WI     699
MN     673
AB     670
OR     656
AL     633
LA     596
NV     582
OK     530
QC     503
KY     484
IA     427
KS     424
HI     390
UT     384
AR     318
MS     294
NH     242
RI     234
ME     234
NM     231
NE     220
WV     204
ID     203
DE     188
MB     177
SK     174
DC     167
NS     152
VT     125
MT     104
AK      98
NB      97
SD      75
ND      67
WY      62
NL      58
PE      20
YT      10
NT       7
NU       1
Name: count, dtype: int64

In [82]:
# turn value_counts into pandas df
counts_by_region = counts_by_region.to_frame()


In [83]:
counts_by_region.head()

Unnamed: 0_level_0,count
region,Unnamed: 1_level_1
CA,8578
NY,7056
TX,3540
FL,3522
ON,2833


In [84]:
pd.set_option('display.max_rows', None)
counts_by_region


Unnamed: 0_level_0,count
region,Unnamed: 1_level_1
CA,8578
NY,7056
TX,3540
FL,3522
ON,2833
PA,2693
NJ,2407
IL,2182
GA,1818
OH,1542


In [75]:
# done

fixregion_monk = chinese_restaurants[chinese_restaurants['region'] == 'Monk']
fixregion_monk

Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
12592,53655355498ebeabc3bfc92c,Chuan Saveur Masion,45.45698,-73.593145,2110 Rue Jolicoeur,Montreal,Monk,H4E 1X6,,,...,,,,,['52af3b773cf9994f4e043c03'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Re\xf6\x15VXx@F\xba~...,"{'xmin': -73.59314473564325, 'ymin': 45.456979..."


In [77]:
# done
# display all rows where 'region' is 'ケベック', '安大略省', '安大略', 'Mexico', or 'New York'
fixregion_misc = chinese_restaurants[chinese_restaurants['region'].isin(['ケベック', '安大略省', '安大略', 'Mexico', 'New York'])]
fixregion_misc


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
12445,4e2eec2ad4c058fdbeee58ab,FORTUNE 2004,46.810879,-71.215122,,ケベック,ケベック,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q\xcd\xc4\x8c\xfb=\x...,"{'xmin': -71.21512150321217, 'ymin': 46.810878..."
12569,51729107498ebf7fc424ef49,Sensai,45.447928,-73.572102,Ocean Coral H10 Resort,Riviera Maya,Mexico,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Rd\x9dP\x08\x82\xbb@...,"{'xmin': -73.57210160094375, 'ymin': 45.447927..."
27764,4e63fc62c65b2dc8a04cb2fa,Fu Kee Chinese Cuisine,42.980028,-81.211768,601 Hamilton Rd,伦敦,安大略省,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0TM\x8d\x9dF\xe8(@E}q...,"{'xmin': -81.21176845478533, 'ymin': 42.980028..."
50767,4c84d07b2f1c236ac7f74e43,"Happy Lamb Hot Pot, Flushing 快乐小羊",40.762194,-73.828995,136-59 37th Avenue,Queens,New York,11354,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Ru\x0eA\x0bc\x0b@Da\...,"{'xmin': -73.828995, 'ymin': 40.762194, 'xmax'..."
58243,588d327a4988da585259114f,Old Beijing BBQ,43.784412,-79.27767,,Toronto,安大略,M1S,,,...,,,,,['52af3b463cf9994f4e043bfe'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0S\xd1\xc5Y\xd3\xcb\x...,"{'xmin': -79.27767034228535, 'ymin': 43.784411..."


In [None]:
# done
# delete rows where index is 12445, 12569, 27764
chinese_restaurants = chinese_restaurants.drop(index=[12445, 12569, 27764])

In [None]:
# done
chinese_restaurants.loc[chinese_restaurants['region'] == 'New York', 'region'] = 'NY'

In [None]:
# done
chinese_restaurants.loc[chinese_restaurants['region'] == '安大略', 'region'] = 'ON'


In [None]:
# done

chinese_restaurants.loc[chinese_restaurants['region'] == 'Monk', 'region'] = 'QC'

In [None]:
# done
# correct misspellings in 'region'

chinese_restaurants.loc[chinese_restaurants['region'] == 'Ontario', 'region'] = 'ON'
chinese_restaurants.loc[chinese_restaurants['region'] == 'British Columbia', 'region'] = 'BC'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Saskatchewan', 'region'] = 'SK'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Calif', 'region'] = 'CA'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Alberta', 'region'] = 'AB'
chinese_restaurants.loc[chinese_restaurants['region'] == 'On', 'region'] = 'ON'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Saakatchewan', 'region'] = 'SK'
chinese_restaurants.loc[chinese_restaurants['region'] == 'On.', 'region'] = 'ON'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Québec', 'region'] = 'QC'
chinese_restaurants.loc[chinese_restaurants['region'] == 'BC Columbia', 'region'] = 'BC'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Bc', 'region'] = 'BC'
chinese_restaurants.loc[chinese_restaurants['region'] == 'M1G 1P9', 'region'] = 'ON'
chinese_restaurants.loc[chinese_restaurants['region'] == 'Onrtario', 'region'] = 'ON'

In [None]:
# done
# display all rows where 'region' is 'Canada' (should be a province instead)
# these all turned out to be locations I recognized in British Columbia so it was a simple fix

fixregion_canada = chinese_restaurants[chinese_restaurants['region'] == 'Canada']
fixregion_canada



Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
15953,4bcfe1da462cb7130195d707,Terracotta - Modern Chinese,49.28348,-123.103001,52 Alexander St,Vancouver,Canada,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xc6\x97\x91PNZ@H\x...,"{'xmin': -123.10300095407857, 'ymin': 49.28348..."
16107,4b04a6fbf964a520f35522e3,Chop & Wok,49.265765,-123.121365,786 W. 6th Ave.,Vancouver,Canada,,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,['closed'],b'\x00\x00\x00\x00\x01\xc0^\xc7\xc4r\xa37&@H\x...,"{'xmin': -123.12136522234923, 'ymin': 49.26576..."
16306,4aec8791f964a52053c821e3,Cafe seventh heaven,49.183088,-123.13393,"2792 aberdeen centre, 4151 hazelbridge",Richmond,Canada,V6x 4j7,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0^\xc8\x92NX*@@H\x97o...,"{'xmin': -123.13392981157176, 'ymin': 49.18308..."


In [89]:
# save after more cleaning
chinese_restaurants.to_csv(output_dir / "chinese_restaurants_cleaned.csv", index=False)

In [85]:
pd.reset_option('display.max_rows')

### Exporting a small subset for mapping

I chose to look at the New York City Metropolitan area and the 4 states that it's attached to, NY-NJ-PA-CT.  I'm called it the tri-state area for now, even though that might not be the most accurate name for it.  I also took a peek at what the data looks like for NYS only.  

In [91]:
state_ny = chinese_restaurants[chinese_restaurants['region'] == 'NY']
print(state_ny.shape)
state_ny.head()

(7056, 27)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,email,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox
12400,511acbade4b006d2e9bdd439,Happy Star Chinese Restaurant,43.848748,-73.42651,87 Montcalm St,Ticonderoga,NY,12883,,,...,,108183300000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R[K\xf2\x02\x11N@E\x...,"{'xmin': -73.42651033594049, 'ymin': 43.848748..."
12950,4e98cbecb8f77b7a98e89113,China Buffet,44.979332,-73.444442,876 Champlain St.,Champlain,NY,12919,,,...,,213517100000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R\\q\xbd\x16\x12\x8c...,"{'xmin': -73.44444205432427, 'ymin': 44.979331..."
12951,5f0d03125c674838b4fc0af9,Empire Buffet,44.979439,-73.444732,876 US Highway 11,Champlain,NY,12919,,,...,,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,"b'\x00\x00\x00\x00\x01\xc0R\\v~vc""@F}^C\x81U\xbb'","{'xmin': -73.44473229943517, 'ymin': 44.979439..."
12952,573cf7ac498e5642fc277b92,China Cafe,44.703619,-73.491966,60 Smithfield Blvd,Plattsburgh,NY,12901,,,...,jiggyteddy@gmail.com,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R_|_1@\x98@FZ\x10.(%G',"{'xmin': -73.49196605500981, 'ymin': 44.703618..."
12953,4d23a15f2ac6f04d588c2e45,No. 1 Chinese,44.69753,-73.480845,344 Cornelia St.,Plattsburgh,NY,12901,,,...,info@hongkongjadebuffet.com,117362000000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R^\xc6+\x9d\xbf\xa6@...,"{'xmin': -73.48084535985126, 'ymin': 44.697529..."


In [90]:
# save out NY state restaurants for geoprocessing test
state_ny.to_csv(output_dir / "state_ny.csv", index=False)

In [122]:
# tri-state area
tri_state = chinese_restaurants[chinese_restaurants['region'].isin(['NY', 'NJ', 'CT', 'PA'])]
print(tri_state.shape)
tri_state.head()

(12959, 28)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox,year_created
12400,511acbade4b006d2e9bdd439,Happy Star Chinese Restaurant,43.848748,-73.42651,87 Montcalm St,Ticonderoga,NY,12883,,,...,108183300000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R[K\xf2\x02\x11N@E\x...,"{'xmin': -73.42651033594049, 'ymin': 43.848748...",2013
12950,4e98cbecb8f77b7a98e89113,China Buffet,44.979332,-73.444442,876 Champlain St.,Champlain,NY,12919,,,...,213517100000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R\\q\xbd\x16\x12\x8c...,"{'xmin': -73.44444205432427, 'ymin': 44.979331...",2011
12951,5f0d03125c674838b4fc0af9,Empire Buffet,44.979439,-73.444732,876 US Highway 11,Champlain,NY,12919,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,"b'\x00\x00\x00\x00\x01\xc0R\\v~vc""@F}^C\x81U\xbb'","{'xmin': -73.44473229943517, 'ymin': 44.979439...",2019
12952,573cf7ac498e5642fc277b92,China Cafe,44.703619,-73.491966,60 Smithfield Blvd,Plattsburgh,NY,12901,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R_|_1@\x98@FZ\x10.(%G',"{'xmin': -73.49196605500981, 'ymin': 44.703618...",2016
12953,4d23a15f2ac6f04d588c2e45,No. 1 Chinese,44.69753,-73.480845,344 Cornelia St.,Plattsburgh,NY,12901,,,...,117362000000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0R^\xc6+\x9d\xbf\xa6@...,"{'xmin': -73.48084535985126, 'ymin': 44.697529...",2011


In [123]:
# save out tri-state area for geoprocessing
tri_state.to_csv("tri_state.csv", index=False)

In [93]:
locality_ny = state_ny['locality'].value_counts()
locality_ny

locality
New York         1368
Brooklyn         1146
Flushing          460
Bronx             407
Staten Island     126
                 ... 
NY                  1
Paterson            1
Shanghai            1
Jersey City         1
Jamesport           1
Name: count, Length: 681, dtype: int64

In [94]:
zipcode_ny = state_ny['postcode'].value_counts()
zipcode_ny

postcode
11354      347
11355      189
11220.0    114
10002      112
10013      107
          ... 
11963.0      1
11930.0      1
12986        1
11954.0      1
12946        1
Name: count, Length: 853, dtype: int64

In [95]:
closed_ny = state_ny['date_closed'].value_counts()
closed_ny

date_closed
2018-05-05    72
2025-05-03    65
2025-05-04    46
2025-05-05    35
2025-05-06    34
              ..
2023-07-26     1
2024-11-26     1
2019-01-18     1
2015-01-22     1
2020-11-08     1
Name: count, Length: 725, dtype: int64

In [96]:
earliest_date = chinese_restaurants['date_created'].min()
print("Earliest date_created in chinese_restaurants:", earliest_date)


Earliest date_created in chinese_restaurants: 2003-12-10


In [None]:
# Convert 'date_created' to datetime if not already
chinese_restaurants['date_created'] = pd.to_datetime(chinese_restaurants['date_created'], errors='coerce')

# Extract year from 'date_created'
chinese_restaurants['year_created'] = chinese_restaurants['date_created'].dt.year

# Count number of restaurants added per year
restaurants_per_year = chinese_restaurants['year_created'].value_counts().sort_index()

print("Number of restaurants added per year:")
for year, count in restaurants_per_year.items():
    print(f"{year}: {count}")


# these numbers are probably influenced by foursquare's popularity which peaked around 2010


Number of restaurants added per year:
2003: 40
2004: 11
2005: 17
2006: 26
2007: 23
2008: 23
2009: 2529
2010: 22076
2011: 11748
2012: 11060
2013: 2538
2014: 1922
2015: 1536
2016: 1621
2017: 1405
2018: 1239
2019: 1430
2020: 619
2021: 907
2022: 1065
2023: 897
2024: 908
2025: 705


In [None]:
# Count all 'name' values in chinese_restaurants that are not unique
# i'd like to analyze these separately using NLP
name_counts = chinese_restaurants['name'].value_counts()
non_unique_names = name_counts[name_counts > 1]
print("Number of non-unique 'name' values:", non_unique_names.sum())
print("Non-unique 'name' values and their counts:")
print(non_unique_names)


Number of non-unique 'name' values: 35026
Non-unique 'name' values and their counts:
name
Panda Express           1683
China Wok                698
China King               520
China Garden             519
China Buffet             475
                        ... 
Foo Lai                    2
Ming Kong Kitchen          2
Mr Wong's Restaurant       2
Maple Gardens              2
Kam Fung Restaurant        2
Name: count, Length: 5000, dtype: int64


In [105]:
non_unique_names.head(50)

name
Panda Express                    1683
China Wok                         698
China King                        520
China Garden                      519
China Buffet                      475
China House                       395
China Star                        306
Great Wall                        290
China Express                     286
New China                         278
China One                         201
Great Wall Chinese Restaurant     196
China Kitchen                     169
Golden Dragon                     161
New China Restaurant              144
Panda Garden                      143
New China Buffet                  142
Manchu Wok                        141
Golden China                      140
China Chef                        135
China Dragon                      133
Jade Garden                       130
China Taste                       127
Hong Kong Express                 125
China Palace                      125
Hong Kong Restaurant              124
China M

In [None]:
# breakdown of chinese cuisine types via 'fsq_category_labels'
# 25 of 32 categories represented
label_counts = chinese_restaurants['fsq_category_labels'].value_counts()
label_counts

fsq_category_labels
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant']                                    60956
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Dim Sum Restaurant']                1034
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Szechuan Restaurant']                661
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Cantonese Restaurant']               565
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Taiwanese Restaurant']               507
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Shanghai Restaurant']                172
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Hunan Restaurant']                   146
['Dining and Drinking > Restaurant > Asian Restaurant > Chinese Restaurant > Hakka Restaurant']                    62
['Dining and Drinking > Restaurant >

In [115]:
chinese_restaurants.shape

(64345, 28)

In [None]:
# starting to separate french speaking and bilingual provinces to in case i decidec to analyze french restaurant names differently
province_qc = chinese_restaurants[chinese_restaurants['region'] == 'QC']
print(province_qc.shape)
province_qc

(503, 28)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox,year_created
11994,23c2d8c020e1409c56e37834,Mets Chinois Express,50.026873,-66.888606,46 Elie-Rochefort Rue,Port-Cartier,QC,G5B 1N2,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P\xb8\xde\xeb\x7fS\x...,"{'xmin': -66.88860595163708, 'ymin': 50.026873...",2015
11995,6ff377df7c394af325c087c0,Mets Chinois Express,49.196445,-68.265163,1133 Lafleche Blvd,Baie-Comeau,QC,G5C 1E1,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q\x10\xf8pD\x9e\xd0@...,"{'xmin': -68.26516348553855, 'ymin': 49.196444...",2014
11996,4ee92d2b29c220d20d8ef761,Restaurant la maison de Hong-Kong,48.736443,-69.090418,46 138 Rte O,Forestville,QC,G0T 1E0,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0QE\xc9hJ\xee\x19@H^C...,"{'xmin': -69.09041793172254, 'ymin': 48.736442...",2011
11997,4e459c9914959d51c157fae6,Fu Dat,48.511275,-68.460416,1015 Boulevard Sainte-Anne,Rimouski,QC,G5M 1X3,,,...,1.273494e+14,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q\x1dwr\xc1\xac7@HAq...,"{'xmin': -68.46041554368354, 'ymin': 48.511274...",2011
11998,4cc205b1b2beb1f7432a0d4c,Buffet Chinois,48.452638,-68.524752,Rue Saint Germain Est,Rimouski,QC,,,,...,1.371462e+14,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q!\x95\x89\x1f\x1f\x...,"{'xmin': -68.52475193061355, 'ymin': 48.452637...",2010
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13324,4eb2c6294690370e0caa7add,Buffet Plus,48.548187,-71.648954,,Alma,QC,,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q\xe9\x88v\x8f\xdd\x...,"{'xmin': -71.6489540486276, 'ymin': 48.5481870...",2011
13325,4f29ae2d108106f47f093508,Panda Royal,48.448898,-71.086105,"907, boul Sainte-Geneviève",Chicoutimi,QC,G7G 2G3,,,...,3.549194e+14,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Q\xc5\x82\xbd\x0f\xb...,"{'xmin': -71.08610464606033, 'ymin': 48.448898...",2012
13327,516b02a3e4b056efa1401872,Buffet Oriental,48.134700,-78.123816,741 Royale Rue,Malartic,QC,J0Y 1Z0,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0S\x87\xec\x981=\xb7@...,"{'xmin': -78.12381558235835, 'ymin': 48.134700...",2013
13329,510bc81719a9c18822c17259,Mlle Dang,48.228461,-79.012935,"527, av. Larivière",Rouyn-Noranda,QC,J9X 4J3,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0S\xc0\xd3\xed\xddSY@...,"{'xmin': -79.0129351292975, 'ymin': 48.2284606...",2013


In [None]:
# new brunswick is a bilingual province, unlike quebec which is only francophone
# there are other provinces with significant french speaking populations so i'll add more to this later
province_nb = chinese_restaurants[chinese_restaurants['region'] == 'NB']
print(province_nb.shape)
province_nb.head(50)

(97, 28)


Unnamed: 0,fsq_place_id,name,latitude,longitude,address,locality,region,postcode,admin_region,post_town,...,facebook_id,instagram,twitter,fsq_category_ids,fsq_category_labels,placemaker_url,unresolved_flags,geom,bbox,year_created
12000,1555574921fd4f5a02611df0,Canton Place,47.997769,-66.691361,127 Val d'Amour Rd,Campbellton,NB,E3N 5B9,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P\xac?@\xe6\xf2n@G\x...,"{'xmin': -66.69136068871896, 'ymin': 47.997769...",2014
12001,5026e771e4b0f23b01a3781e,China Coast,47.997718,-66.69152,127 Val d'Amour Rd,Campbellton,NB,,,,...,132555400000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P\xacA\xdc\xa16\xf3@...,"{'xmin': -66.69151988738649, 'ymin': 47.997717...",2012
12002,ac9835025b854b04f84373de,Canton Palace,48.066534,-66.378081,442A William St,Dalhousie,NB,E8C 2X6,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P\x982y\xf1\xaa\t@H\...,"{'xmin': -66.3780808315279, 'ymin': 48.0665339...",2011
12003,4d936fddb3273704698b9925,Lucky Dragon,47.69282,-65.701617,956 Principale Rue,Beresford,NB,E8K 2H6,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Pl\xe7J~3\xb0@G\xd8\...,"{'xmin': -65.70161688160329, 'ymin': 47.692819...",2011
12004,f5b93cb55f1945bd6b4373c5,Lis Restaurant and Take Out Inc,47.629322,-65.665942,515 St Peter Ave,Bathurst,NB,E2A 2Y5,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Pj\x9e\xcby2v@G\xd0\...,"{'xmin': -65.66594206653266, 'ymin': 47.629321...",2016
12005,4d1b99618860a0932881b434,House of Lee,47.620258,-65.649617,315 Main St,Bathurst,NB,E2A 1B1,,,...,119482400000000.0,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0Pi\x93S\xdf\xff\x91@...,"{'xmin': -65.6496171653255, 'ymin': 47.6202582...",2010
12006,4ca20b4cd3c2b60cedb2eabc,Cantine Du Village,47.665957,-65.119548,1371 rue des Fondateurs,Paquetville,NB,E8R 1A3,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0PG\xa6\xad\xf6\xb01@...,"{'xmin': -65.11954831209802, 'ymin': 47.665957...",2010
12007,0daef753aa684e65cac33890,Saigon Maxim Restaurant,47.744363,-64.720968,146 Premiere,Shippagan,NB,E8S 1A1,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P.$W$+&@G\xdfGK\x1b\...,"{'xmin': -64.72096804172597, 'ymin': 47.744363...",2011
12008,5ab4dedeaa864986ec696907,Saigon Maxim Restaurant,47.740274,-64.721118,146 1re Rue,Shippagan,NB,E8S 1A4,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P.&\xcd\x87\xe8n@G\x...,"{'xmin': -64.72111833832034, 'ymin': 47.740273...",2010
12010,4baca134f964a52067ff3ae3,Great Wall - Mets chinois,47.514858,-64.911552,3550 rue Principal,Tracadie-Sheila,NB,E1X 1C9,,,...,,,,['4bf58dd8d48988d145941735'],['Dining and Drinking > Restaurant > Asian Res...,https://foursquare.com/placemakers/review-plac...,,b'\x00\x00\x00\x00\x01\xc0P:V\xde\xfb\xc2\xb4@...,"{'xmin': -64.91155218683235, 'ymin': 47.514858...",2010
