In [2]:
import numpy as np
import pandas as pd
import time

#with open(directory, 'r') as f:
    #file_data = f.read()
    #text = file_data.split() #--NotebookApp.iopub_data_rate_limit needs to be increased

def get_filter(user_inp):
    """
    Gets response from user concerning a particular question, and eliminates the chances of producing a wrong answer.
    Asks user to specify a city, filter to apply to the raw data, month and/or day (if filter is specified) to analyze 
    from a set of specified options from dictionaries, or even simply choose from a yes or no scenario.
    
    INPUT/ARGS: Takes in any of the keys from dict_set to discern which question to prompt to the user.
    BODY: Uses any of the specified dictionaries in dict_set (dependent on the input) to arrange the question accordingly
    (using for loop, with nested if) and get user response, preventing the user from providing a wrong input (using while loop).
    OUTPUT/RETURNS:
        (str) city/month/day/filter/(yes or no) - user's choice from options in cities/months/days/filter/resp(dictionary)
    """
    cities = {'c':'chicago', 'n':'new_york_city', 'w':'washington'}
    months = {'jan':'January', 'feb':'February', 'mar':'March', 'apr':'April', 'may':'May', 'jun':'June'}
    days = {'mon':'Monday', 'tue':'Tuesday', 'wed':'Wednesday', 'thur':'Thursday', 'fri':'Friday',
            'sat':'Saturday', 'sun':'Sunday'}
    fltr, resp = {'mon':'Month', 'dy':'Day', 'bt':'Both', 'no':'None'}, {'y':'Yes', 'n':'No' } 
    dict_set = {'City':cities, 'Month':months, 'Day':days, 'Filter':fltr, 'Response':resp}
    dictionary = dict_set[user_inp]
    for x in range(len(dictionary)):
        if x == 0:
            input_str = 'Enter'+ ' (' + list(dictionary.keys())[x] + ') for ' + list(dictionary.values())[x] + ', '
        elif x == (len(dictionary)-1):
            input_str = input_str + ' (' + list(dictionary.keys())[x] + ') for ' + list(dictionary.values())[x] + ': '
        else:
            input_str = input_str + ' (' + list(dictionary.keys())[x] + ') for ' + list(dictionary.values())[x] + ', '
    reply = 'False'
    while reply == 'False':
        user_input = (str(input(input_str)).lower())
        if user_input in dictionary.keys():
            output, reply = dictionary[user_input], 'True'
            reply = 'True'
        else:
            print('Please follow the stated instruction, to allow the code run properly')
    return(output)   
    
def filter_data(city, data_filter, Month, Day):
    """
    Loads data for the specified city and filters by month and/or day if applicable.

    INPUT/ARGS:
        (str) city - name of the city to analyze.
        (str) data_filter - type of filter to apply to raw data (month only, day only, both or none).
        (str) month - name of the month to filter by, if filter is month only or both.
        (str) day - name of the day of week to filter by, if filter is day only or both.
    BODY: Reads data from .csv file, converts the time/date column to python datetime format, and filters
          data by specified filter type.
    OUTPUT/RETURNS:
        df - Pandas DataFrame containing city data filtered by month and/or day, or none.
    """
    filename = city + '.csv'
    df = pd.read_csv(filename)
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['Months'] = df['Start Time'].dt.month_name(locale='English')
    df['Days'] = df['Start Time'].dt.day_name(locale='English')
    df['Hours'] = df['Start Time'].dt.hour
    if data_filter == 'None':
        df = df
    elif data_filter == 'Month':
        spef_mon = Month
        df_month = df[df.Months == spef_mon]
        df = df_month
    elif data_filter == 'Day':
        spef_day = Day
        df_day = df[df.Days == spef_day]
        df = df_day
    elif data_filter == 'Both':
        spef_mon, spef_day = Month, Day
        df_month = df[df.Months == spef_mon]
        df_month_day = df_month[df_month.Days == spef_day]
        df = df_month_day
    return(df)

def stats(df):
    """
    Seciton 1: Displays statistics on the most frequent times of travel.
    Section 2: Displays statistics on the most popular stations and trip.
    Section 3: Displays statistics on the total and average trip duration.
    Section 4: Displays statistics on bikeshare users.
    
    OUTPUT/RETURN: section_1, section_2, section_3, seciton_4 - Panda DataFrame showing the various statistics.
    """
    mc_mon = df['Months'].value_counts().idxmax()
    mc_day = df['Days'].value_counts().idxmax()
    mc_hour = df['Hours'].value_counts().idxmax()
    section_1 = {'Most Common Month': [mc_mon], 'Most Common Day': [mc_day], 'Most Common Hour': [mc_hour]}
    section_1 = pd.DataFrame(section_1, index=[1])
    
    mc_stat = df['Start Station'].value_counts().idxmax()
    mc_end = df['End Station'].value_counts().idxmax()
    df['Route'] = df['Start Station'] + ' - ' + df['End Station']
    mc_route = df['Route'].value_counts().idxmax()
    section_2 = {'Most Common Start Station': [mc_stat], 'Most Common End Station': [mc_end], 'Most Common Route': [mc_route]}
    section_2 = pd.DataFrame(section_2, index=[1])
    
    df['Trip Duration (Mins)'] = df['Trip Duration']/60
    tot_dur = np.sum(df['Trip Duration (Mins)'])
    ave_dur = np.mean(df['Trip Duration (Mins)'])
    section_3 = {'Total Travel Time (Mins)': [tot_dur], 'Average Travel Time (Mins)': [ave_dur]}
    section_3 = pd.DataFrame(section_3, index=[1])
    
    if 'Gender' in df and 'Birth Year' in df:
        user_t = dict(df['User Type'].value_counts())
        gend = dict(df['Gender'].value_counts())
        section_4 = {**user_t, **gend}
        section_4 = pd.DataFrame(section_4, index=[1])
        section_4['Earliest Birth Year'], section_4['Most Recent Birth Year'] = np.min(df['Birth Year']), np.max(df['Birth Year'])
        mc_by = df['Birth Year'].value_counts().idxmax()
        section_4['Most Common Birth Year'] = mc_by
    else:
        user_t = dict(df['User Type'].value_counts())
        section_4 = pd.DataFrame(user_t, index=[1])
        
    return(section_1, section_2, section_3, section_4)

#Interactive Section: Takes user input, to decide whether to compute the statistics or not.
print('Hello! Would you like to see Statistical Breakdown of Bike Share Data?')
reply = 'False'
user_input = get_filter('Response')
while reply == 'False':   
    if user_input == 'Yes':
        print('Would you like to see data for Chicago, New York, or Washington?')
        cty = get_filter('City')

        print('Would you like to filter the data by Month, Day, Both, or not at all?')
        data_filter = get_filter('Filter')
        if data_filter == 'Month':
            print('Which month - January, February, March, April, May, or June?')
            mnth, dy = get_filter('Month'), ''
        elif data_filter == 'Day':
            print('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?')
            mnth, dy = '', get_filter('Day'),
        elif data_filter == 'Both':
            print('Which month - January, February, March, April, May, or June?')
            mnth = get_filter('Month')
            print('Which day - Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday?')
            dy = get_filter('Day')
        elif data_filter == 'None':
            mnth, dy = '', ''
        print('\nCity: ({}), Filter: ({}), Month: ({}), Day: ({})'.format(cty, data_filter, mnth, dy))
        start_time = time.time()
        data_frame = filter_data(cty, data_filter, mnth, dy)
        print("\nIt took {} second(s) to load and filter the raw data of {} size." .format((time.time() - start_time), data_frame.shape))
        start_time = time.time()
        section_1, section_2, section_3, section_4 = stats(data_frame)
        print("\nIt took %s second(s) to calculate the needed Statistical Data." % (time.time() - start_time))
        print('\nSecion 1:\n{}' .format(section_1))
        print('\nSecion 2:\n{}' .format(section_2))
        print('\nSecion 3:\n{}' .format(section_3))
        print('\nSecion 4:\n{}' .format(section_4))
        print('\nWould you like to review the Statistics filtering a different set of data?')
        resp = get_filter('Response')
        if resp == 'Yes':
            reply = 'False'
        elif resp == 'No':
            reply = 'True'
            print('\nIf you want to review the Statistics filtering a different set of data, please restart the Script.')
    elif user_input == 'No':
        reply = 'True'
        print('\nIf you want to view the Statistic at a later time, please restart the Script.')
        
#Interactive Section: Takes user input, to decide whether to display raw data or not.
print('\nWould you like to see the Bike Share raw Data?')
user_input = get_filter('Response')
if user_input == 'Yes':
    print('\nWould you like to see data for Chicago, New York, or Washington?')
    cty = get_filter('City')
    filename = cty + '.csv'
    directory = 'C:/Users/The Presence/Documents/Personal Document/Personal Development/Data Science (Python Course)/Labs Tasks-20200924T221854Z-001/Lab Results/Capstone Project/bikeshare-2/' + filename 
    df = pd.read_csv(directory)
    answer, var_1, var_2 = 'False', 0, 5
    while answer == 'False':
        print('\n',df[var_1:var_2])
        var_1 += 5
        var_2 += 5
        print('\nWould you like to see some more Bike Share raw Data?')
        user_input_1 = get_filter('Response')
        if user_input_1 == 'Yes':
            answer = 'False'
        elif user_input_1 == 'No':
            answer = 'True'
    print('\nIf you want to view the Data at a later time, please restart the Script.')
elif user_input == 'No':
    print('\nIf you want to view the Data at a later time, please restart the Script.')

Hello! Would you like to see Statistical Breakdown of Bike Share Data?
Enter (y) for Yes,  (n) for No: y
Would you like to see data for Chicago, New York, or Washington?
Enter (c) for chicago,  (n) for new_york_city,  (w) for washington: c
Would you like to filter the data by Month, Day, Both, or not at all?
Enter (mon) for Month,  (dy) for Day,  (bt) for Both,  (no) for None: no

City: (chicago), Filter: (None), Month: (), Day: ()

It took 1.4599645137786865 second(s) to load and filter the raw data of (300000, 12) size.

It took 0.46530890464782715 second(s) to calculate the needed Statistical Data.

Secion 1:
  Most Common Month Most Common Day  Most Common Hour
1              June         Tuesday                17

Secion 2:
  Most Common Start Station  Most Common End Station  \
1   Streeter Dr & Grand Ave  Streeter Dr & Grand Ave   

                                   Most Common Route  
1  Lake Shore Dr & Monroe St - Streeter Dr & Gran...  

Secion 3:
   Total Travel Time (Mins)