In [1]:
import time
import pandas as pd
import numpy as np

# City data for 3 cities
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

# Add months of the year
MONTHS = ['january', 'february', 'march', 'april', 'may', 'june', 'all']


# Add days of the week
DAYS = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'All']

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # TO DO: get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    city = input("Please choose a city >>> (Chicago, New York City or Washington): ").lower()
    # Prevent error if incorrect 
    while city not in CITY_DATA:
        print("Please try again, name not recognized")
        city = input("Please choose a city >>> (Chicago, New York City or Washington): ").lower()
    
    # TO DO: get user input for month (all, january, february, ... , june)
    month = input("Please choose a month >>> (January, February, March, April, May, June or All): ").lower()
    # Prevent error if incorrect 
    while month not in MONTHS:
        print("Please try again, month not recognized")
        month = input("Please choose a month >>> (January, February, March, April, May, June or All): ").lower()
    
     # TO DO: get user input for day of week (all, monday, tuesday, ... sunday)
    day = input("Please choose a Day >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): ").title()
    # Prevent error if incorrect 
    while day not in DAYS:
        print("Please try again, day not recognized")
        day = input("Please choose a day >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): ").title()
        
    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #Read and load in as df to return df / use CITY DATA
    df = pd.read_csv(CITY_DATA[city])    
    
    # Clean Data 
    #Columns unnamed = 0  drop
    df.drop(columns='Unnamed: 0', inplace=True)
    
    #Missing values
    df.fillna(method='ffill', inplace=True)
    
    # Change data types 
    # Change startime/ endtime to datetime format
    df['Start Time'] = pd.to_datetime(df['Start Time'])
  
    # convert 'End Time' column to datetime. 
    df['End Time'] = pd.to_datetime(df['End Time'])
    
    # extract month from 'Start Time' column to create 'Month' column.
    df['Month'] = df['Start Time'].dt.month
    
    # extract day from 'Start Time' column to create 'Day' column.
    df['Day'] = df['Start Time'].dt.day_name()
    
    # filter by month
    if month != 'all':
        month_index = MONTHS.index(month) + 1
        df = df[df['Month'] == month_index]
    
    # Filter by day
    if day != 'All':
        df = df[df['Day'] == day]

    return df


def time_stats(df, month, day):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # # TO DO: display the most common month
    if month == 'all':
        
        print('The most common month is: ', MONTHS[df['Month'].mode()[0] - 1].title() )
    else:
        
        print('The most common month is: ', month.title())

    # TO DO: display the most common day of week
    if day == 'All':
         
        print('The most common day is: ', df['Day'].mode()[0])
    else:
        
        print('The most common day is: ', day)

   
    # TO DO: display the most common start hour
    # First need to create columns for start time and start hour to find common start hour 
    df['Start Hour'] = df['Start Time'].dt.hour
    
    # TO DO: display the most common start hour
    print('The Most Common Start Hour is: ', df['Start Hour'].mode()[0])
    
   
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

    
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # TO DO: display most commonly used start station
    print('The Most Commonly used Start Station is: ' , df['Start Station'].mode()[0])

     # TO DO: display most commonly used end station
    print('The Most Commonly used End Station is: ' , df['End Station'].mode()[0])

    
   # TO DO: display most frequent combination of start station and end station 
    df['journey'] = df['Start Station'] + " to " + df['End Station']
    print('The Most Frequent Trip from: ', df['journey'].mode()[0])

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # TO DO: display total travel time / sum = total
    print('The Total Travel Time is: ', df['Trip Duration'].sum(), "'sec")

    # TO DO: display mean travel time / mean = average
    print('The Mean Travel Time is: ', int(df['Trip Duration'].mean()), "'sec")
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()


    user_type = df['User Type'].value_counts()

    print(f"The types of users by number are given below:\n\n{user_type}")

    #Missing data for gender and birth year need to prevent error if user selects this filter 
    try:
        gender = df['Gender'].value_counts()
        print(f"\nThe types of users by gender is available:\n\n{gender}")
    except:
        print("\nThere is no 'Gender' data in this file.")

    # TO DO: Display earliest, most recent, and most common year of birth / earliest = min, recent = max, common = mode          
    try:
        earliest = int(df['Birth Year'].min())
        recent = int(df['Birth Year'].max())
        common_year = int(df['Birth Year'].mode()[0])
        print(f"\nThe earliest year of birth: {earliest}\n\nThe most recent year of birth: {recent}\n\nThe most common year of birth: {common_year}")
    except:
        print("There are no birth year data in this file.")
         
    print("\This took %s seconds." % (time.time() - start_time))
    print('-'*40)    

    
def raw_data(df):
    """ Displays 5 lines of raw data at a time when yes is selected. Each request will reveal the next 5 lines of data."""
    # Create index and increase by 5 
    i = 1
    while True:
        rawdata = input('\nWould you like to see 5 more lines of raw data? Enter yes or no.\n')
        if rawdata.lower() == 'yes':
            # print 5 lines
            print(df[i:i+5])
            
            # increase index by 5 
            i = i+5
            
        else:
            # prevent errors
            break
    
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)
        time_stats(df, month, day)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        raw_data(df)
        
        restart = input('\nWould you like to restart? Enter yes to continue: ')
        if restart.lower() not in ['yes', 'y'] :
            break


if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Please choose a city >>> (Chicago, New York City or Washington): CHICAGO
Please choose a month >>> (January, February, March, April, May, June or All): june
Please choose a Day >>> (Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday or All): friday
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is:  June
The most common day is:  Friday
The Most Common Start Hour is:  17

This took 0.0019948482513427734 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The Most Commonly used Start Station is:  Streeter Dr & Grand Ave
The Most Commonly used End Station is:  Streeter Dr & Grand Ave
The Most Frequent Trip from:  Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave

This took 0.006981372833251953 seconds.
----------------------------------------

Calculating Trip Duration...

The Total Travel Time is:  16904624 'sec
T

KeyboardInterrupt: Interrupted by user