this script is creating a mapping of region codes to regions, will upload this to gbq for a cloud function lookup table

In [4]:
import requests
import time
import csv
import re
import json

# Initialize an empty list to store the results
results = []

for region_code in range(1, 1001):  # Iterate through region codes 1 through 1000
    url = f"https://sapi.craigslist.org/web/v8/postings/search/full?CC=US&batch={region_code}-0-360-0-0-1&lang=en&searchPath=apa"
    try:
        # Make the HTTP request
        response = requests.get(url)
        if response.status_code == 200:
            json_string = re.search(r'cl\.jsonp\(.*?,\s*(.*)\)', response.text).group(1)
            
            # Extract the JSON data from the response
            data = json.loads(json_string)
            
            # Determine if the region should be marked as 'none'
            if data.get('errors') and any(error['message'] == "That url is unsupported (bad area)" for error in data['errors']):
                area_name = 'none'
                location_info = {'areaId': None, 'city': None, 'country': None, 'lat': None, 'lon': None, 'postal': None, 'radius': None, 'region': None, 'url': None}
            else:
                # Extract the area name and location information
                area_name = data['data']['areas'].get(str(region_code), {}).get('name', 'none')
                location_info = data['data'].get('location', {})
            
            result = {
                "code": region_code,
                "region": area_name,
                "areaId": location_info.get('areaId'),
                "city": location_info.get('city'),
                "country": location_info.get('country'),
                "lat": location_info.get('lat'),
                "lon": location_info.get('lon'),
                "postal": location_info.get('postal'),
                "radius": location_info.get('radius'),
                "regionCode": location_info.get('region'),
                "url": location_info.get('url')
            }
            results.append(result)
            print(f"Region Code: {region_code}, Area Name: {area_name}, Location Info: {location_info}")
        else:
            print(f"Failed to fetch data for region code {region_code}: HTTP Status {response.status_code}")
            results.append({"code": region_code, "region": 'none'})
        
    except requests.RequestException as e:
        print(f"Request error for region code {region_code}: {e}")
        results.append({"code": region_code, "region": 'none'})
    except KeyError as e:
        print(f"Key error for region code {region_code}: {e}")
        results.append({"code": region_code, "region": 'none'})
    except Exception as e:
        print(f"Unexpected error for region code {region_code}: {e}")
        results.append({"code": region_code, "region": 'none'})
    
    # Pause for 1 second before the next request
    time.sleep(1)

# Write the results to a CSV file
with open('region_to_area_name_extended.csv', 'w', newline='') as csvfile:
    fieldnames = ['code', 'region', 'areaId', 'city', 'country', 'lat', 'lon', 'postal', 'radius', 'regionCode', 'url']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for result in results:
        writer.writerow(result)

print("CSV file created successfully.")

Region Code: 1, Area Name: sfbay, Location Info: {'areaId': 1, 'city': 'SF bay area', 'country': 'US', 'lat': 37.5, 'lon': -122.25, 'postal': '', 'radius': 60, 'region': 'CA', 'url': 'sfbay.craigslist.org'}
Region Code: 2, Area Name: seattle, Location Info: {'areaId': 2, 'city': 'seattle-tacoma', 'country': 'US', 'lat': 47.6064, 'lon': -122.331001, 'postal': '', 'radius': 60, 'region': 'WA', 'url': 'seattle.craigslist.org'}
Region Code: 3, Area Name: newyork, Location Info: {'areaId': 3, 'city': 'new york city', 'country': 'US', 'lat': 40.714199, 'lon': -74.006401, 'postal': '', 'radius': 60, 'region': 'NY', 'url': 'newyork.craigslist.org'}
Region Code: 4, Area Name: boston, Location Info: {'areaId': 4, 'city': 'boston', 'country': 'US', 'lat': 42.358299, 'lon': -71.060303, 'postal': '', 'radius': 60, 'region': 'MA', 'url': 'boston.craigslist.org'}
Failed to fetch data for region code 5: HTTP Status 400
Failed to fetch data for region code 6: HTTP Status 400
Region Code: 7, Area Name: 

In [7]:
import pandas as pd

df = pd.read_csv('region_to_area_name.csv')
df[df['country'] == 'US'].count()


code          415
region        415
areaId        415
city          415
country       415
lat           415
lon           415
postal          0
radius        415
regionCode    415
url           415
dtype: int64