In [5]:
import pandas as pd
import json
import os
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut, GeocoderServiceError
from geopy.extra.rate_limiter import RateLimiter

In [6]:
# Load the CSV files

read_file, location_col = "Second Sight Ventures Portcos-Table 1", "HQ City"
# read_file, location_col = "Drive Capital Portcos-Table 1", "HQ Location"
# read_file, location_col = "GC Active Portcos-Table 1", "HQ City"

# df = pd.read_csv("in/Map Project V4/"+read_file+".csv")
df = pd.read_csv("in/Map Project V6/"+read_file+".csv")
# df = df.head(10)

# Concatenate the dataframes
# df = pd.concat([df1, df2], ignore_index=True)

In [7]:
# Inspect columns to locate the correct location column
print(df.columns)

Index(['Unnamed: 0', 'Company Name', 'Industry', 'HQ City', 'Employee Size',
       'Valuation ($mm)', 'Last Financing Size ($mm)', 'Year Founded',
       'First Financing Date', 'Website'],
      dtype='object')


In [8]:
# Set up geopy with OpenStreetMap Nominatim
geolocator = Nominatim(user_agent="company_geocoder")
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Get unique locations to minimize API calls
unique_locations = df[location_col].dropna().unique()

# Load existing results if available
coords_file = 'location_coords.json'
if os.path.exists(coords_file):
    with open(coords_file, 'r') as f:
        location_coords = json.load(f)
else:
    location_coords = {}

# Unique locations to process
unique_locations = df[location_col].dropna().unique()

# Set up geocoder with timeout and rate limiter
geolocator = Nominatim(user_agent="company_geocoder", timeout=5)
geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1)

# Loop with caching and saving progress
for loc in unique_locations:
    if loc in location_coords:
        continue  # Skip if already done
    try:
        geo = geocode(loc)
        if geo:
            location_coords[loc] = [geo.latitude, geo.longitude]
            print (loc, geo.latitude, geo.longitude)
        else:
            location_coords[loc] = [None, None]
            print (loc, "none found")
        # Save progress every time
        with open(coords_file, 'w') as f:
            json.dump(location_coords, f)
    except (GeocoderTimedOut, GeocoderServiceError) as e:
        print(f"Timeout or service error for {loc}: {e}")
        location_coords[loc] = [None, None]
        with open(coords_file, 'w') as f:
            json.dump(location_coords, f)

# Map back the lat/lon to the dataframe
df["Latitude"] = df[location_col].map(lambda x: location_coords.get(x, (None, None))[0])
df["Longitude"] = df[location_col].map(lambda x: location_coords.get(x, (None, None))[1])

# Save to a new CSV
df.to_csv("in/Map Project V4/"+read_file+"_latlon.csv", index=False)
print("Saved as portcos_with_latlon.csv")

Saved as portcos_with_latlon.csv
