In [None]:
#importing necessary modules

import pandas as pd
import datetime as dt
import re

In [None]:
#importing cleaned trips data.

trips = pd.read_csv('../data/tripscleaned.csv')

In [None]:
#cleaning and resetting prior index.

trips = trips.reset_index(drop=True)
trips = trips.drop(columns='Unnamed: 0')

In [None]:
#assigning datatype 'datetime' to startdatetime and enddatetime.
#checking datatypes to verify the change. 

trips['startdatetime'] = pd.to_datetime(trips['startdatetime'])
trips['enddatetime'] = pd.to_datetime(trips['enddatetime'])
trips.info()

## 4. What is the highest count of scooters being used at the same time? When did it occur? Does this vary by zip code or other geographic region?
Data utilized is the cleaned trips from initial EDA and Q2, notes on cleaning can be found in notebook 'scooters_initial_eda_and_q2'. A trip 'occurs' on the day that the ride starts ('startdatetime') regardless of whether or not the trip extends into a different day/week/month.

In [None]:
#creating columns for future analysis of scooter use during specific times. I plan to group 'startdatetime'
#by different portions of the object to find the most number of scooters being used within a specific timeframe.
#Utilized string functions to extract double digit minutes from 'starttime', then take the first character only, 
#concatenate a 0 for 'tens of minutes'

trips['day_of_year'] = trips['startdatetime'].dt.dayofyear
trips['hour'] = trips['startdatetime'].dt.hour
trips['minute'] = trips['starttime'].str[3:5]
trips['minute'] = trips['minute'].astype(str)
trips['minute'] = trips['minute'].str[0] + '0'

In [None]:
#calculating trips by day

tripsbyday = trips.day_of_year.value_counts(sort=True, ascending=False)

In [None]:
#calculating trips by day and hour

tripsbydayhour = trips.groupby(by='day_of_year').hour.value_counts(sort=True, ascending=False)

In [None]:
#calculating trips by day, hour, and minute

tripsbydayhourminute = trips.groupby(by=['day_of_year', 'hour']).minute.value_counts(sort=True, ascending=False)

In [None]:
#exporting as .csv for possible use in building a presentation in excel or tableau.

#tripsbyday.to_csv('../data/tripsbyday.csv')
#tripsbydayhour.to_csv('../data/tripsbydayhour.csv')
#tripsbydayhourminute.to_csv('../data/tripsbydayhourminute.csv')

In [None]:
#reformatting data to better represent the contents, sorting to find most trips at a given time.

dayhourminute = pd.DataFrame(tripsbydayhourminute)
dayhourminute.columns = ['number_of_trips']
dayhourminute.sort_values('number_of_trips', ascending=False).head(25)

In [None]:
#reformatting data to better represent the contents, sorting to find most trips at a given time, comparing against the more
#detailed look above.

dayhour = pd.DataFrame(tripsbydayhour)
dayhour.columns = ['number_of_trips']
dayhour.sort_values('number_of_trips', ascending=False)

In [None]:
#compared notes with Roy and noticed our numbers were different. Investigated further.
#and found that for my max number of trips in a given time frame, day 136 and hour 23, there
#was only 249 scooters responsible for ~2800 trips. Or 11.25 trips > 1 minute for every
#scooter represented in that hour. Following is an investigation into these trips where I found
#Lime had the highest number of recorded trips for the given time period, cleaned trips file created in the initial notebook
#was updated to remove duplicates. All duplicates are stored in a seperate .csv for possible future analysis. 

trips13623 = trips[(trips['hour'] == 23) & (trips['day_of_year'] == 136)]
trips13623.companyname.value_counts()

In [None]:
#creating a .csv of all Lime rows to explore more easily in excel.

#tripslime = trips[(trips['companyname'] == 'Lime')]

In [None]:
#returning to initial data cleaning to find and remove offending rows.

In [None]:
#trips13623.to_csv('../data/trips13623.csv')

In [None]:
#exporting records of Lime's trips to investigate more easily in excel.

#tripslime.to_csv('../data/tripslime.csv')

# Q4 Answers Pt.1
To determine the highest count of scooters being used at one time, I extracted different measures of time from 'startdatetime' to group by and count records within the table. 

I counted the number of trips grouped by day of year, day of year + hour, and day of year + hour + tens of minutes. Using this approach, the more accurate representation of "being used at the same time" will be the smallest reasonable time period you can distill the data to. With this approach, it's perhaps more answering the question "What is the maximum number of trips taken via scooter in a given timeframe?" where the timeframe is such a reasonably small window you could consider it "at the same time".

An alternative approach would be to compare the actual timeframe of each trip to see where they overlap with each other. While it would be the most accurate, that approach is outside the scope of my current knowledge and time.

Using this method, I found that 05-25-2019 at 18:00 and 05-26-2019 at 18:50 had the most trips recorded for a given 10 minute time period, with 258 trips within their respective time intervals. The most trips recorded for a single hour long period was 05-25-2019 during the 18:00 hour with a recorded 1344 trips.

In [None]:
#exporting trips with time to .csv to continue Q4 analysis in a GeoSpatial environment.

trips.to_csv('../data/tripscleanedwithtime.csv')