In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }
MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
DAYS =  ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
CITIES = list(CITY_DATA.keys())


def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Greetings! I heard you wanted to know about bikeshare data! Let\'s explore some data from bikeshares in the US!')
    
    
    while True:
        city = input('Please enter a city you would like to research; Chicago, New York City, or Washington').lower()
        if city in CITY_DATA:
            print('You selected', city.title())
            break
        else:
            print('I\'m sorry, please select one of the cities listed!')
    
    # gets user input for month (all, january, february, ... , june)
    while True:
        month = input('Please enter a month you would like to research, or if you\'d like to see all, type "all"').lower()
        if month in MONTHS:
            print('You selected', month.title())
            break
        else:
            print('I\'m sorry, please select one of the months of the year, or type "all"!')

    # gets user input for day of week (all, monday, tuesday, ... sunday)
    while True:
        day = input('Please enter a day you would like to research, Monday through Sunday, or if you\'d like to see all, type "all"').lower()
        if day in DAYS:
            print('You selected', day.title())
            break
        else:
            print('I\'m sorry, please select one of the days of the week, or type "all"!')

    print('-'*40)
    return city, month, day


In [2]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])  
    df['month'] = df['Start Time'].dt.month
    df['Day of Week'] = df['Start Time'].dt.day_name()
    df['hour'] = df['Start Time'].dt.hour
    if month != 'all':
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1    
        df = df[df['month'] == month]  
        
    if day != 'all':
        days =  ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']       
        df = df[df['Day of Week'] == day.title()]

    return df

In [None]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # displays the most common month
    common_month = df['month'].mode()[0]
    print('The most common month is', common_month)
    
    # displays the most common day of week
    common_day = df['Day of Week'].mode()[0]
    print('The most common day of the week is', common_day)
    
    # displays the most common start hour
    common_start_hour = df['hour'].mode()[0]
    print('The most common start hour is', common_start_hour)   
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [4]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # displays most commonly used start station
    most_common_start =df['Start Station'].mode()[0]
    print('The most frequent start station is: ', most_common_start)
    
    # displays most commonly used end station
    most_common_end = df['End Station'].mode()[0]
    print('The most frequent end station is: ', most_common_end)
    
    # displays most frequent combination of start station and end station trip
    most_freq_start_end_station = (df['Start Station'] + ',' + df['End Station']).mode()[0]
    print('The most frequent combination of start and end station is: ', most_freq_start_end_station)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [5]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # displays total travel time
    total_time = df['Trip Duration'].sum()
    print('The total travel time is', total_time)

    # displays mean travel time
    mean_time = df['Trip Duration'].mean()
    print('The average travel time is', mean_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def user_stats(df, city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Displays counts of user types
    user_counts = df['User Type'].value_counts()
    print('These are the users, broken down by type:', user_counts)


    # Displays counts of gender
    if city != 'washington':
        count_gender = df['Gender'].value_counts()
        print('These are the number of male and females that use the bikeshare:', count_gender)

    # Displays earliest, most recent, and most common year of birth
    if city != 'washington':
            print('The earliest year of birth is ', df['Birth Year'].min())
            print('The latest year of birth is ', df['Birth Year'].max())
            print('The most common year of birth is ', df['Birth Year'].mode())     

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [None]:
def raw_data(df):
    """Displays if the user wants to see raw data of the bikeshare data"""
    view_data = input('\nWould you like to see 5 lines of data? Enter yes or no.\n').lower()
    i = 0
    while True:
        print(df.iloc[i:i+5,:]) 
        i += 5
        view_more = input('Would you like to display the next 5 rows of data?').lower()
        if view_more.lower() != 'yes':
            break

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df, city)
        raw_data(df)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Greetings! I heard you wanted to know about bikeshare data! Let's explore some data from bikeshares in the US!
