In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

months = ['january' , 'february' , 'march' , 'april' , 'may' , 'june']

days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday','Sunday']

In [2]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs

    while True:
        city = input('Would you like to see data from Chicago, New York City, or Washington? ')
        if city.lower() not in CITY_DATA:
            print('Not a valid city. Please check spelling and input again.')
        else:
            break
    
    # TO DO: get user input for month (all, january, february, ... , june)
    
    while True:
        month = input('Which month? January, February, March, April, May, June, or all? ')
        if month.lower() not in ('january' , 'february' , 'march' , 'april' , 'may' , 'june' , 'all'):
            print('Not a valid month. Please check spelling and input again.')
        else:
            break

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)

    while True:
        day = input('Which day? Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all? ')
        if day.lower() not in ('monday' , 'tuesday' , 'wednesday' , 'thursday' , 'friday' , 'saturday' , 'sunday' , 'all'):
            print('Not a valid month. Please check spelling and input again.')
        else:
           break

    print('-'*40)
    return city, month, day

In [3]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.
    """
    df = pd.read_csv(CITY_DATA[city])
    """
    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """

    # convert the Start Time column to datetime
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day'] = df['Start Time'].dt.weekday
    df['hour'] = df['Start Time'].dt.hour
    
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        df = df[df['month'] == (months.index(month) + 1)]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day'] == days.index(day.title())]
    
    return df

In [4]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    
    common_month = df['month'].mode()[0]

    print('Most Common Month: ' , common_month)

    # TO DO: display the most common day of week
    
    common_day = df['day'].mode()[0]
    print('Most Common Day: ' , common_day)

    # TO DO: display the most common start hour
    
    common_hour = df['hour'].mode()[0]
    print('Most Common Hour: ' , common_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [5]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    
    popular_start_station = df['Start Station'].mode()

    print('popular start station:', popular_start_station)

    # TO DO: display most commonly used end station
    
    popular_end_station = df['End Station'].mode()

    print("popular end station:", popular_end_station)

    # TO DO: display most frequent combination of start station and end station trip
    
    frequent_combination = df['Start Station'] + "_" + df['End Station']
    trip_counts = frequent_combination.value_counts().idxmax()
    print('Frequent Combination: ' , trip_counts)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [6]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    
    total_travel_time = df['Trip Duration'].sum()

    print('Total Travel Time:', total_travel_time)

    # TO DO: display mean travel time
    
    avg_travel_time = df['Trip Duration'].mean()

    print('Average Travel Times:', avg_travel_time)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [7]:
def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    
    user_types = df['User Type'].value_counts()

    print('Number of Users:', user_types)

    # TO DO: Display counts of gender
    
    if "Gender" in df.columns:
        gender = df['Gender'].value_counts()
        print('gender count:', gender)

    else:
        print('Gender does not exist in data')

    # TO DO: Display earliest, most recent, and most common year of birth
    
    if "Birth_Year" in df.columns:
        birth_year = df['Birth Year']

        print('Earliest Birth Year: ', birth_year.min())

        print('Most Recent Birth Year: ', birth_year.max())

        print('Most Common Birth Year: ', birth_year.mode())

    else:
        print('Birth Year does not exist in the data')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
Would you like to see data from Chicago, New York City, or Washington? chicago
Which month? January, February, March, April, May, June, or all? january
Which day? Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all? Monday
----------------------------------------

Calculating The Most Frequent Times of Travel...

Most Common Month:  1
Most Common Day:  0
Most Common Hour:  17

This took 0.0 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

popular start station: 0    Clinton St & Washington Blvd
dtype: object
popular end station: 0    Clinton St & Washington Blvd
dtype: object
Frequent Combination:  Michigan Ave & Washington St_Canal St & Madison St

This took 0.015625715255737305 seconds.
----------------------------------------

Calculating Trip Duration...

Total Travel Time: 2247180
Average Travel Times: 639.8576309794988

This took 0.0 seconds.
----------------------