In [10]:
import pandas as pd 
import numpy as np
import time


# In[2]:
#Dictionary key value pair to hold linked files
CITY_DATA = { 'Chicago': 'chicago.csv',
              'New_York': 'new_york_city.csv',
              'Washington': 'washington.csv' }

cities = ['Chicago', 'New_York', 'Washington']
months = ['January', 'February', 'March', 'April', 'May', 'June',]
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
valid = ['Yes', 'No']


# In[3]:

 # The get_filters function requests for user input as search criteria
def get_filters():
    print('Welcome to Bikeshare Report!')
    print ('-'*30)
    
    while True:
        try:
            print ()
            print('This program provides detailed analysis of Bikeshare  in Chicago, New York and Washington')
            print ()
            city = str(input('Which city report would you like to see?\n Kindly select from: Chicago, New York, Washington'))
            city = city.replace(' ', '_')
            city = city.title()
            
            if city in cities:
                print ()
                print(f'You have entered {city} as your choice')
                print ()
                break
            else:
                raise ValueError
        except ValueError:
            print('sorry this is not a valid input')
        
    
    while True:
        try:
            choice = str(input('Would you like to filter the data by month, both or none?\n'))
            choice = choice.title()
            if choice == 'Both':
                print ()
                month = str(input('Enter month of the year:\n'))
                month = month.title()
                print ()
                day = str(input('Enter day of the week:\n'))
                day = day.title()
                if day in days:
                    print ()
                    break  
                else:
                    raise ValueError  
            elif choice == 'Month':
                month = str(input('Enter month to filter:\n'))
                month = month.title()
                day = 'all'
                break
            elif choice == 'None':
                month = 'all'
                day = 'all'
                break    
            else:
                raise ValueError 
        except ValueError:
            print('Sorry this is not a valid input')
            
    print('-'*40)
    return city, month, day


# In[4]:

#The load_data function uses the output of get_filter to query the relevant csv document as dataframe for analysis
def load_data():
    
    #The values for user input for city, month and day is called using the get_filters() function
    city, month, day = get_filters()
    
    #The read csv statement reads the filename used as value pair for CITY_DATA dictionary.
    df = pd.read_csv(CITY_DATA[city])
    
    #dateparsing for relevant fields and column manipulation.
    df['start_month'] = pd.to_datetime(df['Start Time']).dt.month_name()
    df['end_month'] = pd.to_datetime(df['End Time']).dt.month_name()
    df['start_day'] = pd.to_datetime(df['Start Time']).dt.day_name()
    df['end_day'] = pd.to_datetime(df['End Time']).dt.day_name()
    df['hour'] = pd.to_datetime(df['Start Time']).dt.hour
    df['common_time'] = pd.to_datetime(df['Start Time']).dt.time
    df['station_combination'] = df['Start Station'] + ' to ' + df['End Station']
    
   #The code below determines the aggregation level of the dataframe i.e by month, day or all duration
    if month != 'all':
        df = df[df['start_month'] == month]
        
    if day != 'all':
        df = df[df['start_day'] == day]
    
    return df


# In[5]:

#time_stats function calculates the key metrics relevant for our analysis
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""
    
    #Calculates processing time
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    #Display the most common month
    most_common_month = df['start_month'].mode()[0]
    print(f'The most common month is:\n {most_common_month}')

    #Display the most common day of week
    most_common_day = df['start_day'].mode()[0]
    print(f'The most common day is:\n {most_common_day}')

    #Display the most common start hour
    most_common_start_hour = df['hour'].mode()[0]
    print(f'The most common start hour is:\n {most_common_start_hour}')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


# In[6]:

#station_stats function calculates the key metrics relevant for our analysis
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""
    
    #Calculates processing time
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    #Display most commonly used start station
    popular_start_station = df['Start Station'].mode()[0]
    print(f'The most commonly used start station is:\n {popular_start_station}')

    #Display most commonly used end station
    popular_end_station = df['End Station'].mode()[0]
    print(f'The most commonly used end station is:\n {popular_end_station}')

    #Display most frequent combination of start station and end station trip
    most_common_trip = df['station_combination'].mode()[0]
    print(f'The most commonly trip combination is:\n {most_common_trip}')


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


# In[7]:

#time_duration_stats function calculates the key metrics relevant for our analysis
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""
    
    #Calculates processing time
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    #Display total travel time
    total_travel_time = df['Trip Duration'].sum()
    print(f'The total travel time is:\n {total_travel_time}')

    #Display mean travel time
    avg_travel_time = df['Trip Duration'].mean()
    print(f'The average travel time is:\n {avg_travel_time}')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


# In[8]:

#user_stats function calculates the key metrics relevant for our analysis
def user_stats(df):
    """Displays statistics on bikeshare users."""
    
    #This function appends the columns not available on the washington document using the if fuction and the
    #comparison with columns, computations are carried out using pandas methods for analysis required

    
    #Calculates processing time
    print('\nCalculating User Stats...\n')
    start_time = time.time()

    #Display counts of user types
    count_user_types = df['User Type'].value_counts()
    print(f'The categories of users are:\n {count_user_types}')

    #Display counts of gender
    if 'Gender' in df:
        gender_count = df['Gender'].value_counts()
        print(f'The gender count is:\n {gender_count}') 
    else:
        print()
    

    #Display earliest, most recent, and most common year of birth
    if 'Birth Year' in df:
        earliest_birth_year = int(df['Birth Year'].min())
        latest_birth_year = int(df['Birth Year'].max())
        popular_birth_year = int(df['Birth Year'].mode()[0])

        print(f'The earliest birth year is:\n {earliest_birth_year}')
        print(f'The most recent birth year is:\n {latest_birth_year}')
        print(f'The most common birth year is:\n {popular_birth_year}')  
    else:
        print()

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


# In[11]:

#function to control program loop
def main():
    
    #This is the program iteration that controls the cycle of the code
    while True:
        #This assigns the out of load data to df
        df = load_data()
        
        #function call for each function declared
        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        
        
        #This statement below displays the data depending on users choice
        while True:
            try:
                report = input('\nWould you like to see the data? Enter yes or no: \n')

                if report.title() in valid :
                    if report.title() == 'Yes':
                        print (df.head(5))
                        print (df.tail(5))
                        break
                    elif report.title() == 'No':
                        break
                else:
                    raise ValueError
            except ValueError:
                print('sorry this is not a valid input')
                 
        #This decides if the code runs again or contiues
        print('-'*40)
        restart = input('\nWould you like to restart? Enter yes or no: \n')
        if restart.lower() != 'yes':
            break

#initialization statement
if __name__ == "__main__":
	main()


# In[ ]:






Welcome to Bikeshare Report!
------------------------------

This program provides detailed analysis of Bikeshare  in Chicago, New York and Washington


You have entered Chicago as your choice

----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month is:
 June
The most common day is:
 Tuesday
The most common start hour is:
 17

This took 0.06311988830566406 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most commonly used start station is:
 Streeter Dr & Grand Ave
The most commonly used end station is:
 Streeter Dr & Grand Ave
The most commonly trip combination is:
 Lake Shore Dr & Monroe St to Streeter Dr & Grand Ave

This took 0.13287615776062012 seconds.
----------------------------------------

Calculating Trip Duration...

The total travel time is:
 280871787
The average travel time is:
 936.23929

This took 0.0010991096496582031 seconds.
----------------------------