In [3]:
import time
import pandas as pd
import numpy as np
from scipy import stats

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
accepted_months = ['january','february','march','april','may','june']
accepted_days = ['sunday','monday','tuesday','wednesday','thursday','friday','saturday']

In [27]:
while True:
    city = input('\nPlease enter the city whose bikeshare data you would like to analyze: ')
    city = city.lower()
    if city == 'chicago' or city == 'new york city' or city == 'washington':
        break
    else:
        print("\nThat is not a valid city! \nSupported cities are: 'Chicago', 'New York City', and 'Washington'.")
print('You selected {}!'.format(city.title()))

# TO DO: get user input for month (all, january, february, ... , june)
while True:
    month = input('\nPlease enter the month for which you would like to analyze data: ')
    month = month.lower()
    if (month == 'all') or (month in accepted_months):
        break
    else:
        print("\nThat is not a valid month! \nSupported options are: {}.".format(accepted_months))
print("You selected {}!".format(month.title()))

# TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
while True:
    day = input('\nPlease enter the day of the week for which you would like to analyze data: ')
    day = day.lower()
    if (day == 'all') or (day in accepted_days):
        break
    else:
        print("\nThat is not a valid day! \nSupported options are: {}.".format(accepted_days))
print("You selected {}!".format(day.title()))

print('-'*40)
print('Initiating analysis with selections as city: {}, month: {}, day: {}'.format(city.title(),month.title(),day.title()))


Please enter the city whose bikeshare data you would like to analyze: new york city
You selected New York City!

Please enter the month for which you would like to analyze data: all
You selected All!

Please enter the day of the week for which you would like to analyze data: all
You selected All!
----------------------------------------
Initiating analysis with selections as city: New York City, month: All, day: All


In [28]:
"""
Loads data for the specified city and filters by month and day if applicable.

Args:
    (str) city - name of the city to analyze
    (str) month - name of the month to filter by, or "all" to apply no month filter
    (str) day - name of the day of week to filter by, or "all" to apply no day filter
Returns:
    df - Pandas DataFrame containing city data filtered by month and day
"""
df = pd.read_csv(CITY_DATA[city])

# Convert 'Start Time' to DateTime
df['Start Time'] = pd.to_datetime(df['Start Time'])
df['Month'] = df['Start Time'].dt.month
df['Day of Week'] = df['Start Time'].dt.weekday
df['Start Hour'] = df['Start Time'].dt.hour

# Filter by month selection
if month != 'all':
    month = accepted_months.index(month) + 1
    df = df[df['Month'] == month]
# Filter by day of week selection
if day != 'all':
    day = accepted_days.index(day)
    df = df[df['Day of Week'] == day]

In [63]:
"""Displays statistics on the most frequent times of travel."""

print('\nCalculating The Most Frequent Times of Travel...\n')
start_time = time.time()

# TO DO: display the most common month
x = df['Month'].mode()
print('The most common month for rides in your selection is {}.'.format(accepted_months[x[0] - 1].title()))

# TO DO: display the most common day of week
y = df['Day of Week'].mode()
print('The most common day of week for rides in your selection is {}.'.format(accepted_days[y[0]].title()))

# TO DO: display the most common start hour
z = df['Start Hour'].mode()
print('The most common hour for rides in your selection is at {}:00.'.format(z[0]))

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


Calculating The Most Frequent Times of Travel...

The most common month for rides in your selection is June.
The most common day of week for rides in your selection is Tuesday.
The most common hour for rides in your selection is at 17:00.

This took 0.010971546173095703 seconds.
----------------------------------------


In [54]:
"""Displays statistics on the most popular stations and trip."""

print('\nCalculating The Most Popular Stations and Trip...\n')
start_time = time.time()

# TO DO: display most commonly used start station
stat_station = df['Start Station'].mode()
print('The most commonly used start station is {}.'.format(start_station[0]))

# TO DO: display most commonly used end station
end_station = df['End Station'].mode()
print('The most commonly used end station is {}.'.format(start_station[0]))

# TO DO: display most frequent combination of start station and end station trip
df['Journey'] = df['Start Station'].str.cat(df['End Station'], sep =', to ')
popular_journey = df['Journey'].mode()
print('The most popular journey is from {}.'.format(popular_journey[0]))

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


Calculating The Most Popular Stations and Trip...

The most commonly used start station is Pershing Square North.
The most commonly used end station is Pershing Square North.
The most popular journey is from E 7 St & Avenue A, to Cooper Square & E 7 St.

This took 0.154585599899292 seconds.
----------------------------------------


In [60]:
"""Displays statistics on the total and average trip duration."""

print('\nCalculating Trip Duration...\n')
start_time = time.time()

# TO DO: display total travel time
x = df['Trip Duration'].sum()
print('The total trip duration for all rides is {} seconds.'.format(x))

# TO DO: display mean travel time
y = df['Trip Duration'].mean()
print('The mean trip duration for all rides is {} seconds.'.format(y))

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)


Calculating Trip Duration...

The total trip duration for all rides is 269905248 seconds.
The mean trip duration for all rides is 899.68416 seconds.

This took 0.000997781753540039 seconds.
----------------------------------------


In [113]:
print(df.head(20))
"""Displays statistics on bikeshare users."""

print('\nCalculating User Stats...\n')
start_time = time.time()

# Display counts of user types
x = df.groupby('User Type').count()
print('Riders that are Customers: {} \nRiders that are Subscribers: {}'.format(x['Start Time']['Customer'],x['Start Time']['Subscriber']))

if city == 'chicago' or city == 'new york city':
    # Display counts of gender
    y = df.groupby('Gender').count()
    print('\nRiders that are Female: {} \nRiders that are Male: {}'.format(y['Start Time']['Female'],y['Start Time']['Male']))

    # Display earliest, most recent, and most common year of birth
    z_mode = df['Birth Year'].mode()
    print('\nThe most common birth year is ',int(z_mode[0]))
    z_min = df['Birth Year'].min()
    print('The earliest is ',int(z_min))
    z_max = df['Birth Year'].max()
    print('The most recent is ',int(z_max))

print("\nThis took %s seconds." % (time.time() - start_time))
print('-'*40)

    Unnamed: 0          Start Time             End Time  Trip Duration  \
0      5688089 2017-06-11 14:55:05  2017-06-11 15:08:21            795   
1      4096714 2017-05-11 15:30:11  2017-05-11 15:41:43            692   
2      2173887 2017-03-29 13:26:26  2017-03-29 13:48:31           1325   
3      3945638 2017-05-08 19:47:18  2017-05-08 19:59:01            703   
4      6208972 2017-06-21 07:49:16  2017-06-21 07:54:46            329   
5      1285652 2017-02-22 18:55:24  2017-02-22 19:12:03            998   
6      1675753 2017-03-06 16:22:53  2017-03-06 16:30:51            478   
7      1692245 2017-03-07 07:42:24  2017-03-07 08:49:42           4038   
8      2271331 2017-04-02 08:02:36  2017-04-02 09:28:08           5132   
9      1558339 2017-03-01 23:01:31  2017-03-01 23:06:41            309   
10     2287178 2017-04-02 14:37:20  2017-04-02 14:56:12           1131   
11     2744874 2017-04-13 13:40:39  2017-04-13 13:45:59            319   
12     3398180 2017-04-27 23:27:31  20