In [None]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from sqlalchemy import create_engine, text

In [None]:
database_name = 'scooters'    # Fill this in with your database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{'scooters'}"

In [None]:
pip install psycopg2-binary

In [None]:
engine = create_engine(connection_string)

In [None]:
query = '''
SELECT *
FROM scooters
LIMIT 1000;
'''

with engine.connect() as connection:    
   scooters = pd.read_sql(text(query), con = connection)

In [None]:
with engine.connect() as connection:    
   scooters = pd.read_sql(text(query), con = connection)

In [None]:
scooters[0:100]

In [None]:
scooters.loc[0, 'pubdatetime']

In [None]:
scooters['month'] = scooters['pubdatetime'].dt.month
scooters.head()

In [None]:
scooters['year'] = scooters['pubdatetime'].dt.year
scooters.head()

In [None]:
query = '''
SELECT companyname, COUNT(DISTINCT(sumdid)), TO_CHAR(pubdatetime, 'YYYY-MM') AS mo_yr
FROM scooters
WHERE sumdtype = 'Powered'
GROUP BY companyname, mo_yr
ORDER BY mo_yr, companyname;
'''

with engine.connect() as connection:    
   scooters_bird_dates = pd.read_sql(text(query), con = connection)

In [None]:
scooters_bird_dates

In [None]:
type(scooters)

In [None]:
scooters.info()

In [None]:
query = '''
SELECT *
FROM trips
LIMIT 1000;
'''

with engine.connect() as connection: trips = connection.execute(text(query))

In [None]:
with engine.connect() as connection:    
    trips = pd.read_sql(text(query), con = connection)

In [None]:
trips.head()

In [None]:
trips.info()

1. During this period, seven companies offered scooters. How many scooters did each company have in this time frame? Did the number for each company change over time? Did scooter usage vary by company?

In [None]:
query = '''
SELECT DISTINCT(companyname)
FROM scooters;
'''

with engine.connect() as connection: trips = connection.execute(text(query))

In [None]:
with engine.connect() as connection:    
    result = pd.read_sql(text(query), con = connection)

In [None]:
result

In [None]:
query1 = '''
SELECT DISTINCT(companyname, sumdgroup, sumdtype), COUNT(DISTINCT(sumdid))
FROM scooters
GROUP BY companyname, sumdgroup, sumdtype;
'''

with engine.connect() as connection:    
    result1 = pd.read_sql(text(query1), con = connection)

In [None]:
print(result1)

In [None]:
query2 = '''
SELECT *
FROM trips
LIMIT 100;
'''

with engine.connect() as connection: result2 = connection.execute(text(query2))

In [None]:
with engine.connect() as connection:    
    result2 = pd.read_sql(text(query2), con = connection)

In [None]:
result2.tail(5)

In [None]:
query3 = '''
SELECT DISTINCT(pubdatetime)
FROM scooters;
'''

with engine.connect() as connection:    
    result3 = pd.read_sql(text(query3), con = connection)

In [None]:
result3.min()

In [None]:
result3.max()

In [None]:
query4 = '''
SELECT DISTINCT(companyname, sumdgroup, sumdtype, sumdid)
FROM scooters
WHERE sumdtype = 'Standard';
'''

with engine.connect() as connection:    
    result4 = pd.read_sql(text(query4), con = connection)

In [None]:
result4

In [None]:
query5 = '''
SELECT DISTINCT(pubdatetime, sumdid)
FROM scooters
WHERE sumdtype = 'Powered'
GROUP BY sumdid, pubdatetime;
'''

with engine.connect() as connection:    
    result5 = pd.read_sql(text(query5), con = connection)

In [None]:
result5.head()

2. According to Second Substitute Bill BL2018-1202 (as amended) (https://web.archive.org/web/20181019234657/https://www.nashville.gov/Metro-Clerk/Legislative/Ordinances/Details/7d2cf076-b12c-4645-a118-b530577c5ee8/2015-2019/BL2018-1202.aspx), all permitted operators will first clean data before providing or reporting data to Metro. Data processing and cleaning shall include:  
* Removal of staff servicing and test trips - NOT NEEDED
* Removal of trips below one minute 
* Trip lengths are capped at 24 hours  
Are the scooter companies in compliance with the second and third part of this rule? 

In [None]:
query6 = '''
SELECT DISTINCT(pubtimestamp)
FROM trips;
'''

with engine.connect() as connection:    
    result6 = pd.read_sql(text(query6), con = connection)

In [None]:
result6.min()

In [None]:
result6.max()

In [None]:
result2.head(10)

4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?

In [None]:
query7 = '''
SELECT *
FROM trips
LIMIT 100
'''

with engine.connect() as connection:    
    result7 = pd.read_sql(text(query7), con = connection)

In [None]:
result7.info()

In [None]:
result7.starttime.info()

In [None]:
result7.head()

In [None]:
query8 = '''
SELECT startdate, LEFT(starttime::text, 2) AS start_hr, COUNT(triprecordnum) AS trip_starts
FROM trips
WHERE tripduration >= 1.0 AND tripduration <= 1440
GROUP BY startdate, start_hr
ORDER BY trip_starts DESC
;
'''

with engine.connect() as connection:    
    result8 = pd.read_sql(text(query8), con = connection)

In [None]:
result8

In [None]:
query9 = '''
SELECT startdate, 
    LEFT(starttime::text, 2) AS start_hr, 
    COUNT(triprecordnum) AS trip_starts, 
    COUNT(DISTINCT(sumdid)) AS num_scooters, 
    ROUND(AVG(tripdistance),0) AS avg_distance,
    ROUND(MAX(tripdistance),0) AS max_distance,
    ROUND(AVG(tripduration),0) AS avg_trip_duration, 
    ROUND(MAX(tripduration),0) AS max_trip_duration
FROM trips
WHERE tripduration >= 1.0 AND tripduration <= 1440
GROUP BY startdate, start_hr
ORDER BY trip_starts DESC, num_scooters DESC
;
'''

with engine.connect() as connection:    
    result9 = pd.read_sql(text(query9), con = connection)

In [None]:
result9.head(10)

In [None]:
result9.shape

In [None]:
query10 = '''
SELECT ROUND(AVG(tripdistance),0) AS avg_distance,
       ROUND(MAX(tripdistance),0) AS max_distance,
       ROUND(AVG(tripduration),0) AS avg_trip_duration, 
       ROUND(MAX(tripduration),0) AS max_trip_duration
FROM trips
WHERE tripduration >= 1.0 AND tripduration <= 1440
;
'''


with engine.connect() as connection:    
    result10 = pd.read_sql(text(query10), con = connection)

In [None]:
result10

In [None]:
query11= '''
SELECT *
FROM trips
WHERE companyname LIKE '%Bolt%'
LIMIT 10;
'''

with engine.connect() as connection:    
    result11= pd.read_sql(text(query11), con = connection)

In [None]:
result11

# TRIPS_CLEANED ANALYSIS BELOW

In [None]:
trips_cleaned = pd.read_csv('../data/tripscleaned.csv') # cleaned scooters trips data from 3/21/23 William
print(trips_cleaned.shape)
trips_cleaned.head(3)

In [None]:
trips_cleaned.describe()

In [None]:
trips_cleaned_starts = trips_cleaned[['companyname', 'triprecordnum', 'sumdid', 'tripduration', 'tripdistance', 'startdate', 'starttime', 'startlatitude', 'startlongitude']]
trips_cleaned_starts.head()

In [None]:
trips_cleaned_ends = trips_cleaned[['companyname', 'triprecordnum', 'sumdid', 'tripduration', 'tripdistance', 'enddate', 'endtime', 'endlatitude', 'endlongitude']]
trips_cleaned_ends.head()

In [None]:
trips_cleaned_starts['geometry'] = trips_cleaned_starts.apply(lambda x: Point((float(x.startlongitude), 
                                                         float(x.startlatitude))), 
                                        axis=1)
trips_cleaned_starts.head(3)

In [None]:
trips_cleaned_starts = gpd.GeoDataFrame(trips_cleaned_starts, geometry=gpd.points_from_xy(trips_cleaned_starts.startlongitude, trips_cleaned_starts.startlatitude))
trips_cleaned_starts.head()

In [None]:
trips_cleaned_starts = trips_cleaned_starts.set_crs(4326)
print(trips_cleaned_starts.crs)

In [None]:
trips_cleaned_starts.geometry.to_crs('EPSG:4326')
print(trips_cleaned_starts.crs)

In [None]:
trips_cleaned_ends['geometry'] = trips_cleaned_ends.apply(lambda x: Point((float(x.endlongitude), 
                                                         float(x.endlatitude))), 
                                        axis=1)
trips_cleaned_ends.head(3)

In [None]:
trips_cleaned_ends = gpd.GeoDataFrame(trips_cleaned_ends, geometry=gpd.points_from_xy(trips_cleaned_ends.endlongitude, trips_cleaned_ends.endlatitude))
trips_cleaned_ends.head()

In [None]:
trips_cleaned_ends = trips_cleaned_ends.set_crs(4326)
print(trips_cleaned_ends.crs)

In [None]:
trips_cleaned_ends.geometry.to_crs('EPSG:4326')
print(trips_cleaned_ends.crs)

In [None]:
zipcodes = gpd.read_file('../data/zipcodes.geojson') # from earlier geospatial exercise
print(zipcodes.crs)
zipcodes.head( )

In [None]:
zipcodes.plot();

In [None]:
leg_kwds = {'title': 'Zipcodes', 'loc': 'upper left', 
            'bbox_to_anchor': (1, 1.03), 'ncol': 2}

zipcodes.plot(column = 'zip', figsize=(10, 10),
              edgecolor = 'black',
              legend = True, legend_kwds = leg_kwds, 
              cmap = 'BuGn')
plt.show()

In [None]:
bus_stops = pd.read_csv('../data/busstops_cleaned.csv') # from ea
print(bus_stops.shape)
bus_stops.head(3)

In [None]:
bus_stops['geometry'] = bus_stops.apply(lambda x: Point((float(x.lng), 
                                                         float(x.lat))), 
                                        axis=1)
bus_stops.head(3)

In [None]:
bus_geo = gpd.GeoDataFrame(bus_stops, 
                           crs = zipcodes.crs, 
                           geometry = bus_stops['geometry'])

In [None]:
zipcodes = zipcodes[['zip', 'po_name', 'geometry']]
zipcodes

In [None]:
stops_by_zip = gpd.sjoin(bus_geo, zipcodes, op = 'within')

In [None]:
trip_starts_by_zip = gpd.sjoin(trips_cleaned_starts, zipcodes, op = 'within')

In [None]:
trip_ends_by_zip = gpd.sjoin(trips_cleaned_ends, zipcodes, op = 'within')

In [None]:
trip_starts_by_zip['zip'].value_counts()

In [None]:
trip_ends_by_zip['zip'].value_counts()

In [None]:
stops_in_37207 = stops_by_zip.loc[stops_by_zip['zip'] == '37207']
stops_in_37207.shape

In [None]:
trip_starts_in_37207 = trip_starts_by_zip.loc[trip_starts_by_zip['zip'] == '37207']
trip_starts_in_37207.shape

In [None]:
trip_ends_in_37207 = trip_ends_by_zip.loc[trip_ends_by_zip['zip'] == '37207']
trip_ends_in_37207.shape

In [None]:
trip_starts_in_37207 = gpd.GeoDataFrame(trip_starts_in_37207, 
                           crs = zipcodes.crs, 
                           geometry = trip_starts_in_37207['geometry'])

In [None]:
trip_ends_in_37207 = gpd.GeoDataFrame(trip_ends_in_37207, 
                           crs = zipcodes.crs, 
                           geometry = trip_ends_in_37207['geometry'])

In [None]:
polygon37207 = zipcodes.loc[zipcodes['zip'] == '37207']
polygon37207.shape

In [None]:
#fig, ax = matplotlib.subplots()
ax = polygon37207.plot(figsize = (8, 10), color = 'lightgreen')
stops_in_37207.plot( ax = ax, column = 'route');
trip_starts_in_37207.plot( ax = ax, column = 'triprecordnum');
plt.show();

In [None]:
polygon37207.geometry.centroid

In [None]:
#find the center of the larger (more southern) polygon and look at it
center = polygon37207.geometry.centroid[42]
print(center)

In [None]:
#find the center of the larger (more southern) polygon and look at it
center = polygon37207.geometry.centroid[42]
print(center)

In [None]:
# reverse the order when constructing the array for folium location
area_center = [center.y, center.x]

# check the order of area_center, the location for our folium map
print(area_center)

In [None]:
#create our map of Nashville Zip Code 37207 and show it
map_37207 = folium.Map(location =  area_center, zoom_start = 12)
map_37207

In [None]:
i = 0
for row_index, row_values in stops_in_37207.iterrows():
    if i <3: 
        print('index is', row_index)
        print('values are:')
        print(' ')
        print(row_values)
        print('------------------------- ')
        i+=1

In [None]:
i = 0
for row_index, row_values in trip_starts_in_37207.iterrows():
    if i <3: 
        print('index is', row_index)
        print('values are:')
        print(' ')
        print(row_values)
        print('------------------------- ')
        i+=1

In [None]:
#draw our zip code area: 37207
map_37207 = folium.Map(location =  area_center, zoom_start = 12)

folium.GeoJson(polygon37207).add_to(map_37207)

#iterate through stops_in_37207 to create locations and markers 
#for each bus stop
#remember for Folium locations, lat is listed first!!

for row_index, row_values in stops_in_37207.iterrows():
    loc = [row_values['lat'], row_values['lng']]
    pop = str(row_values['route'])
    icon=folium.Icon(color="red",icon="bus", prefix='fa')
    
    marker = folium.Marker(
        location = loc, 
        popup = pop, icon = icon) 

for row_index, row_values in trip_starts_in_37207.iterrows():
    loc = [row_values['startlatitude'], row_values['startlongitude']]
    pop = str(row_values['triprecordnum'])
    icon=folium.Icon(color="green",icon="scooter", prefix='fa')
    
    marker = folium.Marker(
        location = loc, 
        popup = pop, icon = icon) 
        
    marker.add_to(map_37207)
map_37207.save('../maps/map37207.html')

#display our map
map_37207

In [None]:
cluster_map_37207 = folium.Map(location =  area_center, zoom_start = 12)

#create a marker cluster
marker_cluster = MarkerCluster().add_to(cluster_map_37207)

folium.GeoJson(polygon37207).add_to(cluster_map_37207)

# inside the loop add each marker to the cluster
for row_index, row_values in stops_in_37207.iterrows():
    loc = [row_values['lat'], row_values['lng']]
    pop = str(row_values['route'])
    icon=folium.Icon(color="blue",icon="bus", prefix='fa')
    
    marker = folium.Marker(
       location = loc, 
       popup = pop,
     icon = icon) 
    
    marker.add_to(marker_cluster)  
    
for row_index, row_values in trip_starts_in_37207.iterrows():
    loc = [row_values['startlatitude'], row_values['startlongitude']]
    pop = str(row_values['triprecordnum'])
    icon=folium.Icon(color="green",icon="scooter", prefix='fa')
    
    
    marker = folium.Marker(
        location = loc, 
        popup = pop,
    icon = icon) 
    
    marker.add_to(marker_cluster)

    
#save an interactive HTML map by calling .save()
cluster_map_37207.save('../maps/trip_starts_cluster37207.html')

cluster_map_37207  

3. The goal of Metro Nashville is to have each scooter used a minimum of 3 times per day. Based on the data, what is the average number of trips per scooter per day? Make sure to consider the days that a scooter was available. How does this vary by company?

4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?

5. SUMDs can provide alternative transportation and provide "last mile" access to public transit. How often are trips starting near public transit hubs? You can download a dataset of bus stop locations from https://data.nashville.gov/Transportation/Regional-Transportation-Authority-Bus-Stops/p886-fnbd.

Deliverables:
At the conclusion of this project, your group should deliver a presentation which addresses the following points:
* Are scooter companies in compliance with the required data cleaning?
* What are typical usage patterns for scooters in terms of time, location, and trip duration?
* Does it appear that scooters are used as "last mile" transportation from public transit hubs to work or school?
* What are your recommendations for total number of scooters for the city overall and density of scooters by zip code?