# 5. SUMDs can provide alternative transportation and provide "last mile" access to public transit. How often are trips starting near public transit hubs? You can download a dataset of bus stop locations from https://data.nashville.gov/Transportation/Regional-Transportation-Authority-Bus-Stops/p886-fnbd.

In [43]:
from sqlalchemy import create_engine, text
import pandas as pd
from shapely.geometry import Point
import geopandas as gpd
import folium
import geopy.distance
from tqdm.notebook import tqdm, trange
import time

## Bringing in the data I want to work with

#### Setting up engine to use SQL

In [2]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [3]:
engine = create_engine(connection_string)

#### Bringing in the essential columns and filtering out non-compliiant entries (trips dataframe)

In [4]:
#SQL querry filters on compliance rules 2 and 3. (see question 2)

trips_query = '''
SELECT sumdid, startdate, starttime, enddate, endtime, companyname, tripduration, tripdistance,
       startlatitude, startlongitude, endlatitude, endlongitude
FROM trips;
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(trips_query), con = connection)
    
trips.head(10)

Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,companyname,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,Lime,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777
1,PoweredD3QYJQ6MLZ5JL,2019-05-04,06:30:44,2019-05-04,07:01:30,Lime,30.766667,0.0,36.152949,-86.789994,36.155713,-86.7737
2,Powered25UE3EUVBN6RU,2019-05-04,06:57:24,2019-05-04,07:00:59,Lime,3.583333,218.8464,36.160301,-86.778443,36.160329,-86.778553
3,PoweredUKXD3TNEM3NCN,2019-05-04,06:58:15,2019-05-04,07:02:26,Lime,4.183333,19.812,36.136907,-86.801883,36.136947,-86.80173
4,PoweredZVTEPTDZIUK5L,2019-05-04,06:47:15,2019-05-04,07:01:23,Lime,14.133333,544.068,36.149881,-86.796905,36.160372,-86.778379
5,Powered26AH2TKSXSOIE,2019-05-04,06:20:39,2019-05-04,07:09:43,Lime,49.066667,822.96,36.19228,-86.788583,36.192692,-86.790043
6,PoweredYRQLJ5TIJG2TF,2019-05-04,06:16:26,2019-05-04,07:03:59,Lime,47.55,754.0752,36.155414,-86.775036,36.138253,-86.765191
7,PoweredHAQRQEW6FPKV5,2019-05-04,06:49:57,2019-05-04,07:03:09,Lime,13.2,1206.7032,36.146114,-86.814444,36.146233,-86.814455
8,PoweredDUBBDHXJH2D7X,2019-05-04,05:43:42,2019-05-04,07:03:59,Lime,80.283333,700.7352,36.147591,-86.800073,36.147714,-86.805921
9,Powered5KCCMXGL35OXV,2019-05-04,06:09:25,2019-05-04,07:06:28,Lime,57.05,485.5464,36.159912,-86.779686,36.152679,-86.789676


In [5]:
trips.loc[trips.companyname == 'Bolt Mobility', 'tripduration'] = trips.tripduration / 60

In [6]:
drop_entries1 = trips[(trips['tripduration'] < 1) | (trips['tripduration'] >= (24 * 60))].index
trips.drop(drop_entries1, inplace = True)

In [7]:
drop_entries1 = trips[(trips['tripduration'] < 1) | (trips['tripduration'] >= (24 * 60))].index
trips.drop(drop_entries1, inplace = True)

In [8]:
trips.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 556037 entries, 0 to 565521
Data columns (total 12 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   sumdid          556037 non-null  object 
 1   startdate       556037 non-null  object 
 2   starttime       556037 non-null  object 
 3   enddate         556037 non-null  object 
 4   endtime         556037 non-null  object 
 5   companyname     556037 non-null  object 
 6   tripduration    556037 non-null  float64
 7   tripdistance    556037 non-null  float64
 8   startlatitude   556037 non-null  float64
 9   startlongitude  556037 non-null  float64
 10  endlatitude     556037 non-null  float64
 11  endlongitude    556037 non-null  float64
dtypes: float64(6), object(6)
memory usage: 55.1+ MB


#### Importing bus stop locations from .csv

In [9]:
bus_stops = pd.read_csv('../../data/nash_bus_stops.csv')

In [10]:
bus_stops.head()

Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location
0,4418,MCC4_20,MUSIC CITY CENTRAL 4TH - BAY 20,False,True,94,CLARKSVILLE EXPRESS,"(36.166545, -86.781895)"
1,4422,MCC5_6,MUSIC CITY CENTRAL 5TH - BAY 6,True,True,94,CLARKSVILLE EXPRESS,"(36.166501, -86.781233)"
2,4249,21WE,21ST AVE PAST WEST END AVE SB,False,False,87,GALLATIN EXPRESS,"(36.149489, -86.800523)"
3,4184,MCSMJ,MUSIC CITY STAR MT. JULIET STATION,True,True,90,MUSIC CITY STAR,"(36.199912, -86.517904)"
4,4425,MCC5_8,MUSIC CITY CENTRAL 5TH - BAY 8,False,True,92,HENDERSONVILLE EXPRESS,"(36.166768, -86.781424)"


In [11]:
bus_stops.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Stop ID Number     88 non-null     int64 
 1   Stop Abbreviation  88 non-null     object
 2   Stop Name          88 non-null     object
 3   Bench              88 non-null     bool  
 4   Shelter            88 non-null     bool  
 5   Line Number        88 non-null     int64 
 6   Line Name          88 non-null     object
 7   Mapped Location    88 non-null     object
dtypes: bool(2), int64(2), object(4)
memory usage: 4.4+ KB


#### Importing Davidson county zipcodes

In [12]:
zipcodes = gpd.read_file('../../data/zipcodes.geojson')

In [13]:
zipcodes.head()

Unnamed: 0,zip,objectid,po_name,shape_stlength,shape_starea,geometry
0,37115,1,MADISON,178783.0248888682,596553400.5788574,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,3,NASHVILLE,75820.99782140006,188884682.28344727,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."
2,37204,9,NASHVILLE,93180.2922504256,200664795.51708984,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
3,37027,11,BRENTWOOD,159760.6942933173,174978422.04101562,"MULTIPOLYGON (((-86.81258 36.06319, -86.81263 ..."
4,37064,18,FRANKLIN,28995.828320601937,46969608.005737305,"MULTIPOLYGON (((-87.02197 36.01200, -87.02140 ..."


In [14]:
print(zipcodes.crs)
zipcodes.info()

epsg:4326
<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype   
---  ------          --------------  -----   
 0   zip             56 non-null     object  
 1   objectid        56 non-null     object  
 2   po_name         56 non-null     object  
 3   shape_stlength  56 non-null     object  
 4   shape_starea    56 non-null     object  
 5   geometry        56 non-null     geometry
dtypes: geometry(1), object(5)
memory usage: 2.8+ KB


## Converting dataframes to geodataframes

#### bus_stops to gdf

In [15]:
# pulling out latitude and longitude position from 'Mapped Location' column
split_data = bus_stops['Mapped Location'].str.strip(')').str.strip('(').str.split(', ')
bus_stops['lat'] = split_data.apply(lambda x: x[0])
bus_stops['lng'] = split_data.apply(lambda x: x[1])

bus_stops.head()

Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location,lat,lng
0,4418,MCC4_20,MUSIC CITY CENTRAL 4TH - BAY 20,False,True,94,CLARKSVILLE EXPRESS,"(36.166545, -86.781895)",36.166545,-86.781895
1,4422,MCC5_6,MUSIC CITY CENTRAL 5TH - BAY 6,True,True,94,CLARKSVILLE EXPRESS,"(36.166501, -86.781233)",36.166501,-86.781233
2,4249,21WE,21ST AVE PAST WEST END AVE SB,False,False,87,GALLATIN EXPRESS,"(36.149489, -86.800523)",36.149489,-86.800523
3,4184,MCSMJ,MUSIC CITY STAR MT. JULIET STATION,True,True,90,MUSIC CITY STAR,"(36.199912, -86.517904)",36.199912,-86.517904
4,4425,MCC5_8,MUSIC CITY CENTRAL 5TH - BAY 8,False,True,92,HENDERSONVILLE EXPRESS,"(36.166768, -86.781424)",36.166768,-86.781424


In [33]:
# bus_stops to geodata frame (Point dtype column 'geometry')
bus_stops['geometry'] = bus_stops.apply(lambda x: Point((float(x.lng), 
                                                         float(x.lat))), 
                                        axis=1)
# matching crs to zipcodes
bus_geo = gpd.GeoDataFrame(bus_stops, 
                           crs = zipcodes.crs, 
                           geometry = bus_stops['geometry'])

In [34]:
print(bus_stops.info())
bus_stops.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 88 entries, 0 to 87
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   Stop ID Number     88 non-null     int64   
 1   Stop Abbreviation  88 non-null     object  
 2   Stop Name          88 non-null     object  
 3   Bench              88 non-null     bool    
 4   Shelter            88 non-null     bool    
 5   Line Number        88 non-null     int64   
 6   Line Name          88 non-null     object  
 7   Mapped Location    88 non-null     object  
 8   lat                88 non-null     object  
 9   lng                88 non-null     object  
 10  geometry           88 non-null     geometry
dtypes: bool(2), geometry(1), int64(2), object(6)
memory usage: 6.5+ KB
None


Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location,lat,lng,geometry
0,4418,MCC4_20,MUSIC CITY CENTRAL 4TH - BAY 20,False,True,94,CLARKSVILLE EXPRESS,"(36.166545, -86.781895)",36.166545,-86.781895,POINT (-86.78190 36.16654)
1,4422,MCC5_6,MUSIC CITY CENTRAL 5TH - BAY 6,True,True,94,CLARKSVILLE EXPRESS,"(36.166501, -86.781233)",36.166501,-86.781233,POINT (-86.78123 36.16650)
2,4249,21WE,21ST AVE PAST WEST END AVE SB,False,False,87,GALLATIN EXPRESS,"(36.149489, -86.800523)",36.149489,-86.800523,POINT (-86.80052 36.14949)
3,4184,MCSMJ,MUSIC CITY STAR MT. JULIET STATION,True,True,90,MUSIC CITY STAR,"(36.199912, -86.517904)",36.199912,-86.517904,POINT (-86.51790 36.19991)
4,4425,MCC5_8,MUSIC CITY CENTRAL 5TH - BAY 8,False,True,92,HENDERSONVILLE EXPRESS,"(36.166768, -86.781424)",36.166768,-86.781424,POINT (-86.78142 36.16677)


#### trips to gdf

In [35]:
#trips to geodata frame (Point dtype column 'start_geometry')
trips['start_geometry'] = trips.apply(lambda x: Point((float(x.startlongitude), 
                                                         float(x.startlatitude))), 
                                        axis=1)
#matching crs to zipcodes
trips_gdf = gpd.GeoDataFrame(trips, 
                           crs = zipcodes.crs, 
                           geometry = trips['start_geometry'])


In [20]:
print(trips.info())
trips.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 556037 entries, 0 to 565521
Data columns (total 14 columns):
 #   Column          Non-Null Count   Dtype   
---  ------          --------------   -----   
 0   sumdid          556037 non-null  object  
 1   startdate       556037 non-null  object  
 2   starttime       556037 non-null  object  
 3   enddate         556037 non-null  object  
 4   endtime         556037 non-null  object  
 5   companyname     556037 non-null  object  
 6   tripduration    556037 non-null  float64 
 7   tripdistance    556037 non-null  float64 
 8   startlatitude   556037 non-null  float64 
 9   startlongitude  556037 non-null  float64 
 10  endlatitude     556037 non-null  float64 
 11  endlongitude    556037 non-null  float64 
 12  start_geometry  556037 non-null  object  
 13  geometry        556037 non-null  geometry
dtypes: float64(6), geometry(1), object(7)
memory usage: 63.6+ MB
None


Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,companyname,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude,start_geometry,geometry
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,Lime,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777,POINT (-86.77712099999999 36.163692),POINT (-86.77712 36.16369)
1,PoweredD3QYJQ6MLZ5JL,2019-05-04,06:30:44,2019-05-04,07:01:30,Lime,30.766667,0.0,36.152949,-86.789994,36.155713,-86.7737,POINT (-86.78999399999999 36.152949),POINT (-86.78999 36.15295)
2,Powered25UE3EUVBN6RU,2019-05-04,06:57:24,2019-05-04,07:00:59,Lime,3.583333,218.8464,36.160301,-86.778443,36.160329,-86.778553,POINT (-86.778443 36.160301),POINT (-86.77844 36.16030)
3,PoweredUKXD3TNEM3NCN,2019-05-04,06:58:15,2019-05-04,07:02:26,Lime,4.183333,19.812,36.136907,-86.801883,36.136947,-86.80173,POINT (-86.801883 36.136907),POINT (-86.80188 36.13691)
4,PoweredZVTEPTDZIUK5L,2019-05-04,06:47:15,2019-05-04,07:01:23,Lime,14.133333,544.068,36.149881,-86.796905,36.160372,-86.778379,POINT (-86.796905 36.149881),POINT (-86.79690 36.14988)


## Starting analysis

#### I want to find all trips where the distance between the trips['start_geometry'] and the closest bus_stops['geometry'] is less than 0.1 mile 

In [36]:
trips_month5_weeks12 = trips[trips['start_date'] < '2019-05-]
trips_sample

Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,companyname,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude,start_geometry,geometry
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,Lime,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777,POINT (-86.77712099999999 36.163692),POINT (-86.77712 36.16369)
1,PoweredD3QYJQ6MLZ5JL,2019-05-04,06:30:44,2019-05-04,07:01:30,Lime,30.766667,0.0,36.152949,-86.789994,36.155713,-86.7737,POINT (-86.78999399999999 36.152949),POINT (-86.78999 36.15295)


In [44]:
trips['bus_distance'] = 0
trips['bus_stop_id'] = 0

for indext, rowt in tqdm(trips.iterrows()):
    x1 = rowt.startlatitude
    y1 = rowt.startlongitude
    for indexb, rowb in bus_stops.iterrows():
        x2 = rowb.lat
        y2 = rowb.lng
        dist = geopy.distance.geodesic((x1,y1), (x2,y2))
        if dist.meters < 5:
            trips_sample.loc[indext, 'bus_distance'] = dist.meters
            trips_sample.loc[indext, 'bus_stop_id'] = rowb['Stop ID Number']
        else:
            trips_sample.loc[indext, 'bus_distance'] = -1
            trips_sample.loc[indext, 'bus_stop_id'] = -1
        

0it [00:00, ?it/s]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trips_sample.loc[indext, 'bus_distance'] = -1


KeyboardInterrupt: 

## Note:
#### crs epsg:4326 has units of degrees, but when using the '.distance()' method it will return values in meters. This was checked by running the same code above with a sample containing a crs of epsg:5234 (has units of meters instead of degrees)

In [32]:
trips_sample

Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,companyname,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude,start_geometry,geometry,bus_distance,bus_stop_id
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,Lime,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777,POINT (-86.77712099999999 36.163692),POINT (-86.77712 36.16369),0.005671,4431
1,PoweredD3QYJQ6MLZ5JL,2019-05-04,06:30:44,2019-05-04,07:01:30,Lime,30.766667,0.0,36.152949,-86.789994,36.155713,-86.7737,POINT (-86.78999399999999 36.152949),POINT (-86.78999 36.15295),0.015813,4431


In [38]:
bus_stops.sort_values('Stop ID Number')

Unnamed: 0,Stop ID Number,Stop Abbreviation,Stop Name,Bench,Shelter,Line Number,Line Name,Mapped Location,lat,lng,geometry
75,114,20AWESNN,20TH AVE S & WEST END AVE NB,False,False,91,FRANKLIN EXPRESS,"(36.151061, -86.799021)",36.151061,-86.799021,POINT (-86.79902 36.15106)
34,120,21ACHINN,21ST AVE S & CHILDRENS WAY NB,True,False,87,GALLATIN EXPRESS,"(36.138372, -86.800622)",36.138372,-86.800622,POINT (-86.80062 36.13837)
25,120,21ACHINN,21ST AVE S & CHILDRENS WAY NB,True,False,87,GALLATIN EXPRESS,"(36.138372, -86.800622)",36.138372,-86.800622,POINT (-86.80062 36.13837)
49,120,21ACHINN,21ST AVE S & CHILDRENS WAY NB,True,False,89,SPRINGFIELD-JOELTON EXPRESS,"(36.138372, -86.800622)",36.138372,-86.800622,POINT (-86.80062 36.13837)
78,120,21ACHINN,21ST AVE S & CHILDRENS WAY NB,True,False,89,SPRINGFIELD-JOELTON EXPRESS,"(36.138372, -86.800622)",36.138372,-86.800622,POINT (-86.80062 36.13837)
...,...,...,...,...,...,...,...,...,...,...,...
19,5342,CLK E11,CLARKSVILLE EXIT 11 P&R,False,False,94,CLARKSVILLE EXPRESS,"(36.525005, -87.217205)",36.525005,-87.217205,POINT (-87.21721 36.52501)
71,5414,AGEXPO,WILLIAMSON CO AG EXPO,False,False,91,FRANKLIN EXPRESS,"(35.861752, -86.82586)",35.861752,-86.82586,POINT (-86.82586 35.86175)
9,5414,AGEXPO,WILLIAMSON CO AG EXPO,False,False,91,FRANKLIN EXPRESS,"(35.861752, -86.82586)",35.861752,-86.82586,POINT (-86.82586 35.86175)
58,5474,MCSHAM,MUSIC CITY STAR HAMILTON SPRINGS ST,False,False,90,MUSIC CITY STAR,"(36.234224, -86.368417)",36.234224,-86.368417,POINT (-86.36842 36.23422)
