In [2]:
import pandas as pd

df = pd.read_csv('health.csv')
df

Unnamed: 0,Clinic Name,Times,Address,Info
0,Morrisania Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","1309 Fulton Avenue, Second Floor, Bronx",The clinic may close early once capacity is re...
1,Jamaica Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","90-37 Parsons Boulevard, First Floor, Queens",The clinic may close early once capacity is re...
2,Corona Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","34-33 Junction Boulevard, First Floor, Queens",The clinic may close early once capacity is re...
3,Fort Greene Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, Second Floor, B...",The clinic may close early once capacity is re...
4,Fort Greene Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, First Floor, Br...",Also open Tuesdays from 5 p.m. to 7 p.m.\nThe ...
5,Chelsea Sexual Health Clinic,"Monday to Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",The clinic may close earlier if capacity is re...
6,Chelsea Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",Also open Tuesdays from 5 p.m. to 7 p.m. for S...
7,Central Harlem Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","2238 Fifth Avenue, First Floor, Manhattan",The clinic may close early once capacity is re...


In [3]:
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from tqdm import tqdm
import time
import re

# Initialize geocoder
geolocator = Nominatim(user_agent="my_app")

# Function to clean the address by removing floor information
def clean_address(address):
    # Remove "Floor" and any preceding text up to the comma
    return re.sub(r',\s*[^,]*\s*Floor,', ',', address, flags=re.IGNORECASE)

# Clean the address column
df['CLEAN_ADDRESS'] = df['Address'].apply(clean_address) + ', NY'
df['LONGITUDE'] = None
df['LATITUDE'] = None

# Function to get coordinates with retry logic
def get_coordinates(address, geolocator, retries=3):
    for _ in range(retries):
        try:
            location = geolocator.geocode(address, timeout=10)
            if location:
                return location.latitude, location.longitude
            else:
                return None, None
        except GeocoderTimedOut:
            time.sleep(1)
    return None, None

# Apply geocoding with progress bar
for idx, address in tqdm(enumerate(df['CLEAN_ADDRESS']), total=len(df), desc="Geocoding Addresses"):
    latitude, longitude = get_coordinates(address, geolocator)
    df.at[idx, 'LATITUDE'] = latitude
    df.at[idx, 'LONGITUDE'] = longitude

# Print the updated DataFrame
print(df[['CLEAN_ADDRESS', 'LATITUDE', 'LONGITUDE']])

Geocoding Addresses: 100%|██████████| 8/8 [00:04<00:00,  1.91it/s]

                                 CLEAN_ADDRESS   LATITUDE  LONGITUDE
0                1309 Fulton Avenue, Bronx, NY  40.832752  -73.90346
1          90-37 Parsons Boulevard, Queens, NY  40.704006 -73.800216
2         34-33 Junction Boulevard, Queens, NY   40.75423 -73.871978
3  295 Flatbush Avenue Extension, Brooklyn, NY  40.691812 -73.982048
4  295 Flatbush Avenue Extension, Brooklyn, NY  40.691812 -73.982048
5                303 9th Avenue, Manhattan, NY  40.749583 -73.999811
6                303 9th Avenue, Manhattan, NY  40.749583 -73.999811
7             2238 Fifth Avenue, Manhattan, NY  40.751518 -73.981566





In [4]:
df

Unnamed: 0,Clinic Name,Times,Address,Info,CLEAN_ADDRESS,LONGITUDE,LATITUDE
0,Morrisania Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","1309 Fulton Avenue, Second Floor, Bronx",The clinic may close early once capacity is re...,"1309 Fulton Avenue, Bronx, NY",-73.90346,40.832752
1,Jamaica Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","90-37 Parsons Boulevard, First Floor, Queens",The clinic may close early once capacity is re...,"90-37 Parsons Boulevard, Queens, NY",-73.800216,40.704006
2,Corona Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","34-33 Junction Boulevard, First Floor, Queens",The clinic may close early once capacity is re...,"34-33 Junction Boulevard, Queens, NY",-73.871978,40.75423
3,Fort Greene Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, Second Floor, B...",The clinic may close early once capacity is re...,"295 Flatbush Avenue Extension, Brooklyn, NY",-73.982048,40.691812
4,Fort Greene Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, First Floor, Br...",Also open Tuesdays from 5 p.m. to 7 p.m.\nThe ...,"295 Flatbush Avenue Extension, Brooklyn, NY",-73.982048,40.691812
5,Chelsea Sexual Health Clinic,"Monday to Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",The clinic may close earlier if capacity is re...,"303 9th Avenue, Manhattan, NY",-73.999811,40.749583
6,Chelsea Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",Also open Tuesdays from 5 p.m. to 7 p.m. for S...,"303 9th Avenue, Manhattan, NY",-73.999811,40.749583
7,Central Harlem Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","2238 Fifth Avenue, First Floor, Manhattan",The clinic may close early once capacity is re...,"2238 Fifth Avenue, Manhattan, NY",-73.981566,40.751518


In [5]:
# Function to extract floor information and clean the address
def split_address(address):
    parts = address.split(',')
    if len(parts) == 3:
        street, floor, borough = parts
    else:
        street, borough = parts
        floor = None
    clean_address = f"{street.strip()}, {borough.strip()}"
    return clean_address, floor.strip() if floor else None


# Extract floor information into a new column
# Apply the function to split the address and extract floor info
df['CLEAN_ADDRESS'], df['FLOOR_INFO'] = zip(*df['Address'].apply(split_address))


In [6]:
df

Unnamed: 0,Clinic Name,Times,Address,Info,CLEAN_ADDRESS,LONGITUDE,LATITUDE,FLOOR_INFO
0,Morrisania Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","1309 Fulton Avenue, Second Floor, Bronx",The clinic may close early once capacity is re...,"1309 Fulton Avenue, Bronx",-73.90346,40.832752,Second Floor
1,Jamaica Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","90-37 Parsons Boulevard, First Floor, Queens",The clinic may close early once capacity is re...,"90-37 Parsons Boulevard, Queens",-73.800216,40.704006,First Floor
2,Corona Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","34-33 Junction Boulevard, First Floor, Queens",The clinic may close early once capacity is re...,"34-33 Junction Boulevard, Queens",-73.871978,40.75423,First Floor
3,Fort Greene Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, Second Floor, B...",The clinic may close early once capacity is re...,"295 Flatbush Avenue Extension, Brooklyn",-73.982048,40.691812,Second Floor
4,Fort Greene Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","295 Flatbush Avenue Extension, First Floor, Br...",Also open Tuesdays from 5 p.m. to 7 p.m.\nThe ...,"295 Flatbush Avenue Extension, Brooklyn",-73.982048,40.691812,First Floor
5,Chelsea Sexual Health Clinic,"Monday to Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",The clinic may close earlier if capacity is re...,"303 9th Avenue, Manhattan",-73.999811,40.749583,First Floor
6,Chelsea Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","303 9th Avenue, First Floor, Manhattan",Also open Tuesdays from 5 p.m. to 7 p.m. for S...,"303 9th Avenue, Manhattan",-73.999811,40.749583,First Floor
7,Central Harlem Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.","2238 Fifth Avenue, First Floor, Manhattan",The clinic may close early once capacity is re...,"2238 Fifth Avenue, Manhattan",-73.981566,40.751518,First Floor


In [8]:
cols = ['Address']
df = df.drop(columns=cols)
df = df.rename(columns={'CLEAN_ADDRESS': 'Address'})
df

Unnamed: 0,Clinic Name,Times,Info,Address,LONGITUDE,LATITUDE,FLOOR_INFO
0,Morrisania Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close early once capacity is re...,"1309 Fulton Avenue, Bronx",-73.90346,40.832752,Second Floor
1,Jamaica Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close early once capacity is re...,"90-37 Parsons Boulevard, Queens",-73.800216,40.704006,First Floor
2,Corona Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close early once capacity is re...,"34-33 Junction Boulevard, Queens",-73.871978,40.75423,First Floor
3,Fort Greene Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close early once capacity is re...,"295 Flatbush Avenue Extension, Brooklyn",-73.982048,40.691812,Second Floor
4,Fort Greene Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",Also open Tuesdays from 5 p.m. to 7 p.m.\nThe ...,"295 Flatbush Avenue Extension, Brooklyn",-73.982048,40.691812,First Floor
5,Chelsea Sexual Health Clinic,"Monday to Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close earlier if capacity is re...,"303 9th Avenue, Manhattan",-73.999811,40.749583,First Floor
6,Chelsea Express Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",Also open Tuesdays from 5 p.m. to 7 p.m. for S...,"303 9th Avenue, Manhattan",-73.999811,40.749583,First Floor
7,Central Harlem Sexual Health Clinic,"Monday through Friday, 8:30 a.m. to 3:30 p.m.",The clinic may close early once capacity is re...,"2238 Fifth Avenue, Manhattan",-73.981566,40.751518,First Floor


In [9]:
df.to_csv('health.csv', index=False)