## 4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?

In [None]:
from sqlalchemy import create_engine, text
import pandas as pd
import matplotlib.pyplot as plt

#### Setting up engine to use SQL

In [None]:
database_name = 'scooters'    # Fill this in with your lahman database name

connection_string = f"postgresql://postgres:postgres@localhost:5432/{database_name}"

In [None]:
engine = create_engine(connection_string)

#### Brining in the essential columns and filtering out non-compliant entries

In [None]:
trips_query = '''
SELECT sumdid, startdate, starttime, enddate, endtime
FROM trips
WHERE tripduration >= 1.0
	AND tripduration <= (24 * 60)
	OR (tripdistance >= 0 AND tripduration >= 5.0 AND tripduration <=1440);
'''

with engine.connect() as connection:    
    trips = pd.read_sql(text(trips_query), con = connection)

In [None]:
trips.info()

In [None]:
trips['startdate'] = pd.to_datetime(trips['startdate'])
trips['enddate'] = pd.to_datetime(trips['enddate'])
trips.info()

### Creating start_datetime and end_datetime columns

In [None]:
def fulltimejunc(date, time):
    # given a startdate timestamp with year, month and day and a starttime with hour, minute, and second: combine the two into one datetime value.
    return pd.Timestamp(year = date.year,
                        month = date.month,
                        day = date.day,
                        hour = time.hour,
                        minute = time.minute,
                        second = time.second,
                        microsecond = time.microsecond)

In [None]:
for index, row in trips.iterrows():
    trips.loc[index, 'start_datetime'] = fulltimejunc(row.startdate, row.starttime)
    trips.loc[index, 'end_datetime'] = fulltimejunc(row.enddate, row.endtime)

trips

In [None]:
trips.sort_values(by=['start_datetime'])

In [None]:
trips['month'] = trips['start_datetime'].dt.month
trips['day'] = trips['start_datetime'].dt.day
trips['hour'] = trips['start_datetime'].dt.hour
trips

### Groupping by month, day, and hour then counting the scooters used in those groups. (All years are 2019)

In [None]:
scooter_usage = pd.DataFrame(trips.groupby(by=['month','day','hour'])['sumdid'].count())
scooter_usage.columns = ['scooters_started']
scooter_usage[scooter_usage['scooters_started'] == scooter_usage['scooters_started'].max()]

### The datetime with the most scooters started was month: 5 , day: 16 , hour: 23 (2801 scooters started)
#### -This is almost twice as many scooters started as the datetime with the second most amount of scooters started
##### -Nashville hosted 'The Who' at the Bridgestone areana on this day

In [None]:
# Top 10 datetimes with the most scooters started
scooter_usage.sort_values('scooters_started', ascending = False).head(10)

#### Creating visuals for the amount of scooters started based on month, day, and time

In [None]:
trips.groupby(by=['month'])['sumdid'].count()

In [None]:
trips.groupby('day')['sumdid'].count()

In [None]:
trips.groupby(by=['hour'])['sumdid'].count()

In [None]:
scooter_usage_month = pd.DataFrame(trips.groupby(by=['month'])['sumdid'].count())
scooter_usage_month.columns = ['scooters_started']
scooter_usage_month.plot.bar()

In [None]:
scooter_usage_day = pd.DataFrame(trips.groupby(by=['day'])['sumdid'].count())
scooter_usage_day.columns = ['scooters_started']
scooter_usage_day.plot.bar()

In [None]:
scooter_usage_hour = pd.DataFrame(trips.groupby(by=['hour'])['sumdid'].count())
scooter_usage_hour.columns = ['scooters_started']
scooter_usage_hour.plot.bar()

### Finding when the first use and last use of a scooter was

In [None]:
df[df['start_datetime'] == df['start_datetime'].min()]

In [None]:
df[df['end_datetime'] == df['end_datetime'].max()]

#### min date: 2019-05-01 00:00:08.34
#### max date: 2019-08-01 02:14:09

### Creating a range of dates that increases by 5 minute intervals

In [None]:
date_list = pd.date_range(start = '2019-05-01 00:00:00.00', end = '2019-08-01 02:15:00.00', freq = '5min' )

In [None]:
datelist_df = pd.DataFrame(date_list)
datelist_df.columns = ['date_time']
datelist_df

### Seeing how many scooters where in use during individual times of the date_list created above

In [None]:

def date_compare(subdf):
    date_within = subdf.apply(lambda x: ((x['start_datetime']<=subdf['end_datetime']) &
                                         (x['end_datetime']>=subdf['start_datetime'])), axis=1)
    subdf['scooters_in_use'] = date_within.sum(axis=0)
    return subdf

df = trips.groupby('sumdid').apply(date_compare)
print (df)


In [None]:
df[df['scooters_in_use'] == df['scooters_in_use'].max()]