In [1]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input('would you like to see data for chicago, new york city or washington\n\n?: ').lower()
    while city not in CITY_DATA.keys():
        print('city is out of range')
        city = input('would you like to see data for chicago, new york city or washington').lower()

    # TO DO: get user input for month (all, january, february, ... , june)
    # create months list
    months = ['january', 'february','march','april','may','june','all']
    while True:
        month = input('which month? all, january, february, march, april, may, or june?').lower()
        if month in months:
            break
        else: 
            print('month is out of range')


    # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    # create list for days of week
    days = ['sunday','monday','tuesday','wednesday','thrusday','friday','saturday','all']
    while True:
        day = input('which day? all, sunday, monday, tuesday, wednesday, thrusday, friday or saturday?').lower()
        if day in days :
           break
        else:
             print('please enter a valid day')
                    
                    
                    


    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df = pd.read_csv(CITY_DATA[city])
    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    # extract month from the Start Time column to create a month column 
    df['month'] = df['Start Time'].dt.month
    # extract day of week from the Start Time column to create a day of week column 
    df['day_of_week'] = df['Start Time'].dt.day_name()
    # extract hour from the Start Time column to create an hour column
    df['hour'] = df['Start Time'].dt.hour
    # filter by month if applicable
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february','march','april','may','june','all']
        month = months.index(month) + 1
        # filter by month to create the new dataframe
        df = df.loc[df['month'] == month]

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df.loc[df['day_of_week'] == day.title()]

    
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # TO DO: display the most common month
    print("The most common month is ", df['month'].mode()[0], "\n")


    # TO DO: display the most common day of week
    print("The most common day of week  is ", df['day_of_week'].mode()[0], "\n")


    # TO DO: display the most common start hour
    print("The most common start hour is ", df['hour'].mode()[0], "\n")


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print("The most commonly used start station is ", df['Start Station'].mode()[0], "\n")
    


    # TO DO: display most commonly used end station
    print("The most commonly used end station is ", df['End Station'].mode()[0], "\n")


    # TO DO: display most frequent combination of start station and end station trip
    df['start and end'] = df['Start Station'] + " " + df['End Station']
    print("The most frequent combination of start station and end station trip is: ", df['start and end'].mode()[0])


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time
    print('total travel time :',(df['Trip Duration'].sum()))
    


    # TO DO: display mean travel time
    print('mean travel time :',(df['Trip Duration'].mean()))


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df, city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # TO DO: Display counts of user types
    print(df['User Type'].value_counts())
    # TO DO: Display counts of gender
    if city != 'washington':
        print(df['Gender'].value_counts())
    # TO DO: Display earliest, most recent, and most common year of birth
        print('The most common year of birth is:',int(df['Birth Year'].mode()[0]))
        print('The earliest year of birth is :',int(df['Birth Year'].min()))
        print('the most recent birth year is:',int(df['Birth Year'].max()))
                             



    


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def display_raw_data(df):
    """Display first 5 rows as per user request.
    """
    print(df.head())
    start_loc = 0
    while True:
        view_data = input("Do you wish to continue?: ")
        if view_data.lower() != 'yes':
            return
        start_loc = start_loc + 5
        print(df.iloc[start_loc:start_loc+5])
        
          

def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        while True:
            view_data = input("Would you like to view 5 rows of individual trip data? Enter yes or no?")
            if view_data.lower() != 'yes':
                break
            display_raw_data(df)
            break
        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
would you like to see data for chicago, new york city or washington

?: chicago
which month? all, january, february, march, april, may, or june?all
which day? all, sunday, monday, tuesday, wednesday, thrusday, friday or saturday?all
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is  6 

The most common day of week  is  Tuesday 

The most common start hour is  17 


This took 0.03127861022949219 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station is  Streeter Dr & Grand Ave 

The most commonly used end station is  Streeter Dr & Grand Ave 

The most frequent combination of start station and end station trip is:  Lake Shore Dr & Monroe St Streeter Dr & Grand Ave

This took 0.30013251304626465 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time : 2