In [14]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

In [15]:
stops = pd.read_csv("/Users/abeburton/Desktop/web_mapping/WMfinal/GTFS/stops.txt")
stop_times = pd.read_csv("/Users/abeburton/Desktop/web_mapping/WMfinal/GTFS/stop_times.txt")
stop_info = pd.merge(stops, stop_times, how='inner', on='stop_id')

In [16]:
def adjust_time(time_str):
    # Split the time string into hours, minutes, and seconds
    h, m, s = map(int, time_str.split(':'))
    
    # Adjust hours if they reach or exceed 24
    if h >= 24:
        h -= 24  # This adjusts 24:01:32 to 00:01:32, for example
    
    # Reconstruct the time string, formatted as HH:MM:SS
    return f"{h:02}:{m:02}:{s:02}"

# Apply the function to clean the 'departure_time' column
stop_info['departure_time'] = stop_info['departure_time'].apply(adjust_time)
stop_info['departure_time'] = stop_info['departure_time'].str.strip()
stop_info['departure_time'] = pd.to_datetime(stop_info['departure_time'], format='%H:%M:%S').dt.time
stop_info = stop_info.sort_values(by='departure_time', ascending=True)
stop_info['departure_time'] = stop_info['departure_time'].astype(str)

In [17]:
stop_info.drop_duplicates(subset=['stop_id', 'departure_time'], inplace=True)

In [14]:
stop_info.shape

(403051, 4)

In [18]:
stop_info.drop(columns=['stop_code', 'stop_name','arrival_time','stop_desc','zone_id', 'stop_url', 'location_type', 'parent_station',
       'stop_timezone', 'wheelchair_boarding', 'trip_id', 'stop_sequence', 'stop_headsign', 'pickup_type',
       'drop_off_type','shape_dist_traveled','timepoint'], inplace=True)

In [19]:
geometry = [Point(xy) for xy in zip(stop_info['stop_lon'], stop_info['stop_lat'])]
geo_stop_info = gpd.GeoDataFrame(stop_info, geometry=geometry)

# Set the coordinate reference system (CRS) for your GeoDataFrame if you know it
geo_stop_info.set_crs(epsg=4326, inplace=True)  # EPSG 4326 is for WGS 84, commonly used for GPS

Unnamed: 0,stop_id,stop_lat,stop_lon,departure_time,geometry
128717,14120,40.751739,-111.865235,00:00:00,POINT (-111.86523 40.75174)
141764,14300,40.686958,-111.856951,00:00:00,POINT (-111.85695 40.68696)
355220,22861,40.724166,-111.897075,00:00:00,POINT (-111.89708 40.72417)
213672,16533,40.696508,-111.937859,00:00:00,POINT (-111.93786 40.69651)
196320,16109,40.681191,-111.939051,00:00:00,POINT (-111.93905 40.68119)
...,...,...,...,...,...
56207,11653,41.165033,-111.958329,23:59:57,POINT (-111.95833 41.16503)
440629,25401,40.775592,-111.916961,23:59:57,POINT (-111.91696 40.77559)
283923,18518,40.746677,-111.888041,23:59:58,POINT (-111.88804 40.74668)
190783,15929,40.697187,-111.934721,23:59:59,POINT (-111.93472 40.69719)


In [20]:
geo_stop_info.drop(columns=['stop_lat','stop_lon'], inplace=True)

In [21]:
geo_stop_info.to_file("/Users/abeburton/Desktop/web_mapping/WMfinal/GTFS/stop_info.geojson", driver="GeoJSON")

In [14]:
stop_info.to_csv("/Users/abeburton/Desktop/web_mapping/WMfinal/GTFS/stop_info.csv")

In [10]:
# put the transit cost info in the same prjection as other data
import geopandas as gpd

# Load GeoJSON file
gdf = gpd.read_file('/Users/abeburton/Desktop/web_mapping/WMfinal/Low_Transportation_Cost_Index_-6469997285854372953.geojson')

# Change the projection to WGS 84 (EPSG:4326)
gdf = gdf.to_crs(epsg=4326)

gdf.shape

(72240, 12)

In [11]:
gdf = gdf.loc[(gdf.loc[:,'STATE_NAME']=='Utah') & (gdf.loc[:,'COUNTY'].isin(['001','035','049'])),:]

In [13]:
# Export to a new GeoJSON file
gdf.to_file('/Users/abeburton/Desktop/web_mapping/WMfinal/Low_Transportation_Cost_Index_-6469997285854372953.geojson', driver='GeoJSON')