In [1]:
import pandas as pd
from models import get_conn
import geopandas as gpd
import googlemaps
import os
from dotenv import load_dotenv
pd.set_option('display.expand_frame_repr', False) # display full data in terminal

load_dotenv()
engine, Session = get_conn()

In [None]:
# Given a file in csv_files, which was manually exported from Google Maps for the Van Trip, read the file and insert the data into the database
# appending new data can be done by adding rows to additional_van_stops.csv and running this script again.


# Use acutal google maps API instead of just web scraping. Issues: I'm using the 'Title' of each pin, which didn't return an exact match for all locations. I manually edited ~20 locations in the CSV,
# Adding extra keyworks to teh title (often a state), to get the goglemaps api to return the right result. I still have 4 rows that google couldn't match. Since some titles may return multiple
# results this doesn't guarantee an exact match.

def get_lat_long(title, gmaps):
    # Geocode the place to get coordinates
    geocode_result = gmaps.geocode(title)
    if geocode_result:
        lat = geocode_result[0]['geometry']['location']['lat']
        lng = geocode_result[0]['geometry']['location']['lng']
        return lat, lng
    else:
        return None, None


def fetch_all_lat_long(df, gmaps):
    latitudes = []
    longitudes = []

    for _, location in df.iterrows():
        lat, lng = get_lat_long(location.title, gmaps)
        latitudes.append(lat)
        longitudes.append(lng)

    df['latitude'] = latitudes
    df['longitude'] = longitudes
    return df

def main():
    api_key = os.getenv('GOOGLE_API_KEY')
    gmaps = googlemaps.Client(key=api_key)
    
    # locations_df_raw = pd.read_csv('csv_files/2023_2024_Van_Trip.csv')
    locations_df_raw = pd.read_csv('csv_files/additional_van_stops.csv')
    locations_df_raw['notes'] = locations_df_raw['notes'].fillna("2023 2024 Van Trip")
    locations_df_raw.drop(columns=['comment', 'url'], inplace=True)

    locations_df = fetch_all_lat_long(locations_df_raw, gmaps)

    locations_df.dropna(inplace=True)

    gdf = gpd.GeoDataFrame(locations_df, geometry=gpd.points_from_xy(locations_df.longitude, locations_df.latitude), crs="EPSG:4326")

    with Session() as session:
        gdf.to_postgis(name='waypoints', con=engine, if_exists='append')
    
main()