In [13]:
pip install tqdm

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.2.2 -> 23.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [14]:
from sqlalchemy import create_engine, text
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
import folium
from tqdm.notebook import tqdm, trange
import time

In [2]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [3]:
engine = create_engine(connection_string)

In [4]:
trips_query = '''
SELECT sumdid, startdate, starttime, enddate, endtime, companyname, tripduration, tripdistance,
       startlatitude, startlongitude, endlatitude, endlongitude
FROM trips;
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(trips_query), con = connection)

In [5]:
trips.loc[trips.companyname == 'Bolt Mobility', 'tripduration'] = trips.tripduration / 60

In [6]:
drop_entries1 = trips[(trips['tripduration'] < 1) | (trips['tripduration'] >= (24 * 60))].index
trips.drop(drop_entries1, inplace = True)

In [7]:
drop_entries1 = trips[(trips['tripduration'] < 1) | (trips['tripduration'] >= (24 * 60))].index
trips.drop(drop_entries1, inplace = True)

In [41]:
bus_stops = pd.read_csv('../../data/nash_bus_stops.csv')

In [24]:
bus_stops.shape

(88, 8)

In [42]:
bus_stops = bus_stops.drop_duplicates()

In [63]:
zipcodes = gpd.read_file('../../data/zipcodes.geojson')

In [43]:
split_data = bus_stops['Mapped Location'].str.strip(')').str.strip('(').str.split(', ')
bus_stops['lat'] = split_data.apply(lambda x: x[0])
bus_stops['lng'] = split_data.apply(lambda x: x[1])

In [64]:
bus_stops['geometry'] = bus_stops.apply(lambda x: Point((float(x.lng), 
                                                         float(x.lat))), 
                                        axis=1)
bus_geo = gpd.GeoDataFrame(bus_stops, 
                           crs = zipcodes.crs, 
                           geometry = bus_stops['geometry'])

In [65]:
trips['start_geometry'] = trips.apply(lambda x: Point((float(x.startlongitude), 
                                                         float(x.startlatitude))), 
                                        axis=1)
trips_gdf = gpd.GeoDataFrame(trips, 
                           crs = zipcodes.crs, 
                           geometry = trips['start_geometry'])

In [74]:
trip_busstops = gpd.sjoin_nearest(trips_gdf, bus_geo)
trip_busstops.head()

AttributeError: module 'geopandas' has no attribute 'sjoin_nearest'

In [60]:
import geopy.distance

In [66]:
trips_sample = trips.head(10)

In [71]:
trips_sample['bus_distance'] = 0
trips_sample['bus_stop_id'] = 0


for indext, rowt in tqdm(trips_sample.iterrows()):
    x1 = rowt.startlatitude
    y1 = rowt.startlongitude
    for indexb, rowb in bus_stops.iterrows():
        x2 = rowb.lat
        y2 = rowb.lng
        dist = geopy.distance.geodesic((x1,y1), (x2,y2))
        if dist.meters < 5:
            trips_sample.loc[indext, 'bus_distance'] = dist.meters
            trips_sample.loc[indext, 'bus_stop_id'] = rowb['Stop ID Number']
        else:
            trips_sample.loc[indext, 'bus_distance'] = -1
            trips_sample.loc[indext, 'bus_stop_id'] = -1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trips_sample['bus_distance'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trips_sample['bus_stop_id'] = 0


0it [00:00, ?it/s]

In [72]:
trips_sample

Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,companyname,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude,start_geometry,geometry,bus_distance,bus_stop_id
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,Lime,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777,POINT (-86.77712099999999 36.163692),POINT (-86.77712 36.16369),-1,-1
1,PoweredD3QYJQ6MLZ5JL,2019-05-04,06:30:44,2019-05-04,07:01:30,Lime,30.766667,0.0,36.152949,-86.789994,36.155713,-86.7737,POINT (-86.78999399999999 36.152949),POINT (-86.78999 36.15295),-1,-1
2,Powered25UE3EUVBN6RU,2019-05-04,06:57:24,2019-05-04,07:00:59,Lime,3.583333,218.8464,36.160301,-86.778443,36.160329,-86.778553,POINT (-86.778443 36.160301),POINT (-86.77844 36.16030),-1,-1
3,PoweredUKXD3TNEM3NCN,2019-05-04,06:58:15,2019-05-04,07:02:26,Lime,4.183333,19.812,36.136907,-86.801883,36.136947,-86.80173,POINT (-86.801883 36.136907),POINT (-86.80188 36.13691),-1,-1
4,PoweredZVTEPTDZIUK5L,2019-05-04,06:47:15,2019-05-04,07:01:23,Lime,14.133333,544.068,36.149881,-86.796905,36.160372,-86.778379,POINT (-86.796905 36.149881),POINT (-86.79690 36.14988),-1,-1
5,Powered26AH2TKSXSOIE,2019-05-04,06:20:39,2019-05-04,07:09:43,Lime,49.066667,822.96,36.19228,-86.788583,36.192692,-86.790043,POINT (-86.788583 36.19228),POINT (-86.78858 36.19228),-1,-1
6,PoweredYRQLJ5TIJG2TF,2019-05-04,06:16:26,2019-05-04,07:03:59,Lime,47.55,754.0752,36.155414,-86.775036,36.138253,-86.765191,POINT (-86.775036 36.155414),POINT (-86.77504 36.15541),-1,-1
7,PoweredHAQRQEW6FPKV5,2019-05-04,06:49:57,2019-05-04,07:03:09,Lime,13.2,1206.7032,36.146114,-86.814444,36.146233,-86.814455,POINT (-86.81444399999999 36.146114),POINT (-86.81444 36.14611),-1,-1
8,PoweredDUBBDHXJH2D7X,2019-05-04,05:43:42,2019-05-04,07:03:59,Lime,80.283333,700.7352,36.147591,-86.800073,36.147714,-86.805921,POINT (-86.800073 36.147591),POINT (-86.80007 36.14759),-1,-1
9,Powered5KCCMXGL35OXV,2019-05-04,06:09:25,2019-05-04,07:06:28,Lime,57.05,485.5464,36.159912,-86.779686,36.152679,-86.789676,POINT (-86.779686 36.159912),POINT (-86.77969 36.15991),-1,-1


In [47]:
bus_stops[bus_stops['Stop ID Number'] == 114]

Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location,lat,lng,geometry
75,114,20AWESNN,20TH AVE S & WEST END AVE NB,False,False,91,FRANKLIN EXPRESS,"(36.151061, -86.799021)",36.151061,-86.799021,POINT (-86.79902 36.15106)
