In [41]:
import geopandas as gpd
import pandas as pd

In [42]:
# read in stops txt data/raw-data/google_daily_transit/stops.txt
stops = pd.read_csv('../data/raw-data/google_daily_transit/stops.txt')

In [43]:
stops

Unnamed: 0,stop_id,stop_code,stop_name,stop_desc,stop_lat,stop_lon,zone_id,stop_url,location_type,parent_station,stop_timezone,wheelchair_boarding
0,10005,10005,40th Ave NE & NE 51st St,,47.665886,-122.284897,1,,0,,America/Los_Angeles,1
1,10010,10010,NE 55th St & 39th Ave NE,,47.668579,-122.285667,1,,0,,America/Los_Angeles,1
2,10020,10020,NE 55th St & 37th Ave NE,,47.668579,-122.288300,1,,0,,America/Los_Angeles,1
3,10030,10030,NE 55th St & 35th Ave NE,,47.668579,-122.290512,1,,0,,America/Los_Angeles,1
4,10040,10040,NE 55th St & 33rd Ave NE,,47.668583,-122.293015,1,,0,,America/Los_Angeles,1
...,...,...,...,...,...,...,...,...,...,...,...,...
6514,9978,9978,Sand Point Way NE & 40th Ave NE,,47.662743,-122.285202,1,,0,,America/Los_Angeles,1
6515,9980,9980,NE 55th St & Princeton Ave NE,,47.669342,-122.276146,1,,0,,America/Los_Angeles,1
6516,9990,9990,NE 55th St & 45th Ave NE,,47.669025,-122.279846,1,,0,,America/Los_Angeles,1
6517,99908,99908,Vashon Passenger Ferry & Vashon Ferry Dock,,47.510941,-122.464722,39,,0,,America/Los_Angeles,1


In [44]:
#read in trips txt data
trips = pd.read_csv('../data/raw-data/google_daily_transit/trips.txt')

In [45]:
trips

Unnamed: 0,route_id,service_id,trip_id,trip_headsign,trip_short_name,direction_id,block_id,shape_id,peak_flag,fare_id,wheelchair_accessible,bikes_allowed
0,100162,69014,347461006,University District,LOCAL,1,7252890,30271008,0,101,1,1
1,100162,69014,347463286,University District Bellevue,LOCAL,1,7252892,30271003,0,101,1,1
2,100162,69014,347463296,University District Bellevue,LOCAL,1,7252901,30271003,0,101,1,1
3,100137,69014,347464336,Eastgate P&R,LOCAL,1,7252851,20246003,0,101,1,1
4,100136,69014,347465656,Factoria Crossroads,LOCAL,1,7252887,20245007,0,101,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
31010,102650,366,691083056,Capitol Hill Station,LOCAL,1,7252756,30636001,0,101,1,1
31011,102650,366,691083066,Capitol Hill Station,LOCAL,1,7252757,30636001,0,101,1,1
31012,102650,366,691083076,Capitol Hill Station,LOCAL,1,7252757,30636001,0,101,1,1
31013,102650,366,691083086,Capitol Hill Station,LOCAL,1,7252745,30636001,0,101,1,1


In [46]:
# read in stop_times txt data
stop_times = pd.read_csv('../data/raw-data/google_daily_transit/stop_times.txt')

In [47]:
stop_times

Unnamed: 0,trip_id,arrival_time,departure_time,stop_id,stop_sequence,stop_headsign,pickup_type,drop_off_type,shape_dist_traveled,timepoint
0,347461006,05:39:00,05:39:00,67655,1,,0,0,0.0,1
1,347461006,05:39:35,05:39:35,67610,10,,0,0,1446.7,0
2,347461006,05:40:18,05:40:18,70608,17,,0,0,3266.3,0
3,347461006,05:40:34,05:40:34,70619,21,,0,0,3933.1,0
4,347461006,05:40:50,05:40:50,72932,25,,0,0,4585.6,0
...,...,...,...,...,...,...,...,...,...,...
1069672,691083096,19:07:07,19:07:07,13291,1164,,0,0,180868.4,0
1069673,691083096,19:08:24,19:08:24,13292,1168,,0,0,181509.8,0
1069674,691083096,19:10:42,19:10:42,41965,1174,,0,0,182669.3,0
1069675,691083096,19:12:21,19:12:21,41970,1177,,0,0,183497.6,0


In [48]:
#group by trip_id and count the number of stops, put this in a new dataframe
trips_grouped = stop_times.groupby('stop_id').count().reset_index()

In [49]:
trips_grouped = trips_grouped[['stop_id', 'trip_id']]
#change trip_id to trips_count
trips_grouped = trips_grouped.rename(columns={'trip_id': 'daily_trips_count'})

In [50]:
trips_grouped

Unnamed: 0,stop_id,daily_trips_count
0,250,36
1,260,15
2,280,126
3,300,23
4,320,319
...,...,...
6514,99755,14
6515,99756,143
6516,99760,143
6517,99908,36


In [51]:
#merge this with the stops
stops = stops.merge(trips_grouped, on='stop_id', how='left')

In [66]:
# make stops into a geodataframe with lat and lon as geometry
stops_geo = gpd.GeoDataFrame(stops, geometry=gpd.points_from_xy(stops.stop_lon, stops.stop_lat))

In [67]:
#set crs to 4326
stops_geo= stops_geo.set_crs(epsg=4326)

In [68]:
stops_geo.explore(tiles = 'cartodb positron', column = 'daily_trips_count', cmap = 'inferno', legend = True, scheme = 'natural_breaks')

In [69]:
#read in ped network
pedestrian_network = gpd.read_file('../data/cleaned-pednetwork/cleaned-pednetwork-full/cleaned-pednetwork-full.shp')

In [70]:
#get a bounding box of the ped_net data
ped_net_bounds = pedestrian_network.total_bounds
ped_net_bounds

array([1252240.71691886,  214353.37310228, 1284576.93875745,
        245043.05274282])

In [71]:
#change crs of stops_geo to 2285
stops_geo = stops_geo.to_crs(epsg=2285)

In [72]:
#filter stops_geo to only include stops within the bounding box of the ped_net data
stops_geo = stops_geo.cx[ped_net_bounds[0]:ped_net_bounds[2], ped_net_bounds[1]:ped_net_bounds[3]]

In [73]:
stops_geo.explore(tiles = 'cartodb positron', column = 'daily_trips_count', cmap = 'inferno', legend = True, scheme = 'natural_breaks')

In [74]:
#save stops_geo to a shapefile
stops_geo.to_file('../data/processed-data/transit-stops/transit-stops.shp')

  stops_geo.to_file('../data/processed-data/transit-stops/transit-stops.shp')
