In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = ''
    while city not in ['chicago', 'new york city', 'washington']:
        city = input("Which city would you like to see data for? (chicago, new york city, washington)").lower()
        if city not in ['chicago', 'new york city', 'washington']:
            print('"{}" is not a valid city.'.format(city))
            
    # TO DO: get user input for month (all, january, february, ... , june)
    month = ''
    while month not in ['all','january','february','march','april','may','june']:
        month = input("Which month would you like to see data for? (all, january, february, march, april, may, june)").lower()
        if month not in ['all','january','february','march','april','may','june']:
            print('"{}" is not a valid month.'.format(month))

    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    day = ''
    while day not in ['all','monday','tuesday','wednesday','thursday','friday','saturday','sunday']:
        day = input("Which day would you like to see data for? (all, monday, tuesday, wednesday, thursday, friday, saturday, sunday)").lower()
        if day not in ['all','monday','tuesday','wednesday','thursday','friday','saturday','sunday']:
            print('"{}" is not a valid day.'.format(day))

    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - pandas DataFrame containing city data filtered by month and day
    """
    
    # load data file into a dataframe
    df = pd.read_csv(CITY_DATA[city])

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['End Time'] = pd.to_datetime(df['End Time'] )

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['hour'] = df['Start Time'].dt.hour
    df['day_of_week'] = df['Start Time'].dt.day_name()
    df['travel_time'] =  df['End Time'] - df['Start Time']

    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1

        # filter by month to create the new dataframe
        df = df[df['month'] == month]
        
        # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()]
    
    return df
    

def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    popular_month = df['month'].mode()[0]
    popular_monthstr = ['January', 'February', 'March', 'April', 'May', 'June'][int(popular_month)-1]
    print('{} is the most common month.'.format(popular_monthstr))
    
    # TO DO: display the most common day of week
    popular_day = df['day_of_week'].mode()[0]
    print('{} is the most common day of week.'.format(popular_day))        

    # TO DO: display the most common start hour
    popular_hour = df['hour'].mode()[0]
    print('{} is the most common hour.'.format(popular_hour))       

    print("\nThis took %s seconds." % round((time.time() - start_time),4))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    popular_startstation = df['Start Station'].mode()[0]
    print('{} is the most common Start Station.'.format(popular_startstation))     

    # TO DO: display most commonly used end station
    popular_endstation = df['End Station'].mode()[0]
    print('{} is the most common End Station.'.format(popular_endstation))     

    # TO DO: display most frequent combination of start station and end station trip
    bothstation = df['Start Station'] + ' / ' + df['End Station']
    popular_bothstation = bothstation.mode()[0]
    print('{} is the most common Station Combination.'.format(popular_bothstation))     
    
    print("\nThis took %s seconds." % round((time.time() - start_time),4))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    total_time = (df['travel_time'].sum()).total_seconds()
    print('{} seconds is the total travel time.'.format(total_time))     
    
    # TO DO: display mean travel time
    mean_time = (df['travel_time'].mean()).total_seconds()
    print('{} seconds is the mean travel time.'.format(mean_time))     

    print("\nThis took %s seconds." % round((time.time() - start_time),4))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    user_types = df['User Type'].value_counts()
    print('User Type Counts:')
    print(user_types,'\n')
    
    # TO DO: Display counts of gender
     
    try:
        genders = df['Gender'].value_counts()
        print('Gender Counts:')
        print(genders,'\n')
    except:
        pass
        
    # TO DO: Display earliest, most recent, and most common year of birth
    try:
        min_birthyear = int(df['Birth Year'].min())
        last_birthyear = int(df['Birth Year'].iloc[-1])
        popular_birthyear = int(df['Birth Year'].mode()[0])
        print('The earliest Birth Year is {}.'.format(min_birthyear))
        print('The most recent Birth Year is {}.'.format(last_birthyear))
        print('The most common Birth Year is {}.'.format(popular_birthyear))
    except:
        pass

    print("\nThis took %s seconds." % round((time.time() - start_time),4))
    print('-'*40)


def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
              
        printlines = 'yes'
        currentline = 0
        linecount = df.shape[0]
        while printlines == 'yes':
            printlines = input('Would you like to see five lines of the raw data?')
            if printlines == 'yes':
                print(df[currentline:currentline+5])
                currentline += 5
                if currentline > linecount:
                    printlines = 'no'
                    
                

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

if __name__ == "__main__":
	main()



Hello! Let's explore some US bikeshare data!
Which city would you like to see data for? (chicago, new york city, washington)chicago
Which month would you like to see data for? (all, january, february, march, april, may, june)all
Which day would you like to see data for? (all, monday, tuesday, wednesday, thursday, friday, saturday, sunday)all
----------------------------------------

Calculating The Most Frequent Times of Travel...

June is the most common month.
Tuesday is the most common day of week.
17 is the most common hour.

This took 0.024 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

Streeter Dr & Grand Ave is the most common Start Station.
Streeter Dr & Grand Ave is the most common End Station.
Lake Shore Dr & Monroe St / Streeter Dr & Grand Ave is the most common Station Combination.

This took 0.109 seconds.
----------------------------------------

Calculating Trip Duration...

280878987.0 seconds is the total travel ti