# Max total tripdurations per day by zip codes

In [11]:
from sqlalchemy import create_engine, text
import pandas as pd
import matplotlib.pyplot as plt
from shapely.geometry import Point
import geopandas as gpd
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [2]:
database_name = 'scooters'

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [3]:
engine = create_engine(connection_string)

In [4]:
#SQL querry filters on compliance rules 2 and 3. (see question 2)

trips_query = '''
SELECT sumdid, startdate, starttime, enddate, endtime, tripduration, tripdistance, startlatitude,
        startlongitude, endlatitude, endlongitude
FROM trips
WHERE tripduration >= 1.0
	AND tripduration <= (24 * 60);
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(trips_query), con = connection)
    
drop_entries = trips[(trips['tripdistance'] <= 0) & (trips['tripduration'] >= 5)].index
trips.drop(drop_entries, inplace = True)
trips.head(10)

Unnamed: 0,sumdid,startdate,starttime,enddate,endtime,tripduration,tripdistance,startlatitude,startlongitude,endlatitude,endlongitude
0,PoweredEASTYRQ3VKAGX,2019-05-04,06:43:29,2019-05-04,06:59:34,16.083333,660.8064,36.163692,-86.777121,36.164559,-86.768777
2,Powered25UE3EUVBN6RU,2019-05-04,06:57:24,2019-05-04,07:00:59,3.583333,218.8464,36.160301,-86.778443,36.160329,-86.778553
3,PoweredUKXD3TNEM3NCN,2019-05-04,06:58:15,2019-05-04,07:02:26,4.183333,19.812,36.136907,-86.801883,36.136947,-86.80173
4,PoweredZVTEPTDZIUK5L,2019-05-04,06:47:15,2019-05-04,07:01:23,14.133333,544.068,36.149881,-86.796905,36.160372,-86.778379
5,Powered26AH2TKSXSOIE,2019-05-04,06:20:39,2019-05-04,07:09:43,49.066667,822.96,36.19228,-86.788583,36.192692,-86.790043
6,PoweredYRQLJ5TIJG2TF,2019-05-04,06:16:26,2019-05-04,07:03:59,47.55,754.0752,36.155414,-86.775036,36.138253,-86.765191
7,PoweredHAQRQEW6FPKV5,2019-05-04,06:49:57,2019-05-04,07:03:09,13.2,1206.7032,36.146114,-86.814444,36.146233,-86.814455
8,PoweredDUBBDHXJH2D7X,2019-05-04,05:43:42,2019-05-04,07:03:59,80.283333,700.7352,36.147591,-86.800073,36.147714,-86.805921
9,Powered5KCCMXGL35OXV,2019-05-04,06:09:25,2019-05-04,07:06:28,57.05,485.5464,36.159912,-86.779686,36.152679,-86.789676
10,PoweredTM4WQQLA7MFNL,2019-05-04,08:01:43,2019-05-04,08:12:18,10.583333,215.4936,36.133833,-86.782791,36.131633,-86.781696


In [5]:
trips['startdate'] = pd.to_datetime(trips['startdate'])
trips['enddate'] = pd.to_datetime(trips['enddate'])

In [6]:
def fulltimejunc(date, time):
    # given a startdate timestamp with year, month and day and a starttime with hour,
    # minute, and second: combine the two into one datetime value.
    return pd.Timestamp(year = date.year,
                        month = date.month,
                        day = date.day,
                        hour = time.hour,
                        minute = time.minute,
                        second = time.second,
                        microsecond = time.microsecond)

In [7]:
for index, row in trips.iterrows():
    trips.loc[index, 'start_datetime'] = fulltimejunc(row.startdate, row.starttime)
    trips.loc[index, 'end_datetime'] = fulltimejunc(row.enddate, row.endtime)

## Looking into total trip duration for each day

In [8]:
trip_duration_sum = pd.DataFrame(trips.groupby(pd.Grouper(key='start_datetime', freq='D'))['tripduration'].sum())

In [9]:
trip_duration_sum.sort_values('tripduration', ascending=False)

Unnamed: 0_level_0,tripduration
start_datetime,Unnamed: 1_level_1
2019-05-26,841170.754883
2019-05-25,692909.992617
2019-06-01,646967.625933
2019-05-27,445520.800817
2019-06-08,415942.715050
...,...
2019-05-14,71209.677217
2019-05-09,58619.608833
2019-07-30,57702.974820
2019-05-01,54769.893367


#### The day with the most scooter use was 2019-05-26 with 841,170 total minutes
#### The day with the second highest scooter use was 2019-05-25 with 692,910 total minutes
#### *Beyond MVP* : We can groupby company / find most profitable day (scooters['costpermin'])

In [12]:
trips['start_geometry'] = trips.apply(lambda x: Point((float(x.startlongitude), 
                                                         float(x.startlatitude))), 
                                        axis=1)

In [13]:
trips['end_geometry'] = trips.apply(lambda x: Point((float(x.endlongitude), 
                                                         float(x.endlatitude))), 
                                        axis=1)

In [14]:
zipcodes = gpd.read_file('../../data/zipcodes.geojson')
print(zipcodes.crs)
zipcodes.head( )

epsg:4326


Unnamed: 0,zip,objectid,po_name,shape_stlength,shape_starea,geometry
0,37115,1,MADISON,178783.0248888682,596553400.5788574,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,3,NASHVILLE,75820.99782140006,188884682.28344727,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."
2,37204,9,NASHVILLE,93180.2922504256,200664795.51708984,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
3,37027,11,BRENTWOOD,159760.6942933173,174978422.04101562,"MULTIPOLYGON (((-86.81258 36.06319, -86.81263 ..."
4,37064,18,FRANKLIN,28995.828320601937,46969608.005737305,"MULTIPOLYGON (((-87.02197 36.01200, -87.02140 ..."


In [15]:
trips_gdf = gpd.GeoDataFrame(trips, 
                           crs = zipcodes.crs, 
                           geometry = trips['start_geometry'])

In [16]:
trips_by_zip = gpd.sjoin(trips_gdf, zipcodes, op = 'within')

In [17]:
trips_by_zip_maxduration = pd.DataFrame(trips_by_zip.groupby(by = ['zip', pd.Grouper(key='start_datetime', freq='D')])['tripduration'].sum())

In [18]:
trips_by_zip_maxduration = trips_by_zip_maxduration.reset_index()

In [19]:
maxduration_by_zip = pd.DataFrame(trips_by_zip_maxduration.groupby(by = ['zip'])['tripduration'].max())

In [21]:
df23 = pd.merge(maxduration_by_zip, trips_by_zip_maxduration, 
                               left_on = ['zip', 'tripduration'], right_on = ['zip','tripduration'], 
                               how = 'inner')

df23.sort_values('tripduration', ascending = False) 

Unnamed: 0,zip,tripduration,start_datetime
5,37203,400511.89375,2019-06-01
4,37201,214405.339683,2019-05-26
15,37213,62480.196533,2019-05-26
21,37219,38850.101067,2019-05-26
12,37210,32625.577367,2019-05-26
6,37204,29854.954633,2019-05-18
8,37206,29065.551917,2019-06-08
10,37208,29061.482017,2019-06-08
14,37212,26544.680267,2019-05-26
9,37207,13773.217483,2019-05-26
