In [47]:
import time
import pandas as pd
import numpy as np

# Data Structure

In [48]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
cities = list(CITY_DATA.keys())

# Helper function to ask user input

In [49]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')   
    city = None
    month = None
    day = None
    
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs

    while True:
        city = input("\nWhat city would your like to explore? [Chicago, New York City, or Washington?]\n").lower()
        if city in cities:
            print('Awesome!')
            break
        else:
            print('Please choose one of the three cities given')
            
    # get user input for month (all, january, february, ... , june)        
            
    while True:
        month = input("\nNow please choose one of the following: [January, February, March, April, May, June, or all]\n").lower()
        if month in months:
            print('Great!')
            break
        else:
            print('Please choose one of the given options!')
    
  # get user input for day of week (all, monday, tuesday, ... sunday)  
    
    while True:
        day = input("\nChoose a day: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all]\n").lower()
        if day in days:
            print('Nice!')
            break
        else:
            print('Please choose one of the days of the week or all!')
            
    



    print('-'*40)
    return city, month, day




In [50]:
city, month, day = get_filters()

Hello! Let's explore some US bikeshare data!

What city would your like to explore? [Chicago, New York City, or Washington?]
chicago
Awesome!

Now please choose one of the following: [January, February, March, April, May, June, or all]
january
Great!

Choose a day: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all]
monday
Nice!
----------------------------------------


In [51]:
city, month, day

('chicago', 'january', 'monday')

# Load Data

In [52]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #TO DO: Read data from CSV
    
    df = pd.read_csv(CITY_DATA[city])
    
    #Converted into Start Time column to datetime
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
#TO DO: filter to correct month and day (if not all)

    #Extracted month and day to creat new columns
    
    df['month'] = df['Start Time'].dt.month
    
    df['day_of_week'] = df['Start Time'].dt.strftime('%A')
    
    #filter for month
    
    if month != 'all':
        
        months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
        
        month = months.index(month) + 1
        
        #Filtering by month to create new dataframe
        
        df = df[df['month'] == month]
   
   #filter for day
    
    if day != 'all':
        
        days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
        
       
        
        df = df[df['day_of_week'] == day.title()]
    

    return df


In [53]:
df = load_data(city, month, day)

In [54]:
df.head(20)

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
121,42280,2017-01-16 14:54:38,2017-01-16 15:09:03,865,Field Museum,Millennium Park,Subscriber,Male,1992.0,1,Monday
167,77795,2017-01-23 18:50:16,2017-01-23 18:56:43,387,Indiana Ave & Roosevelt Rd,Michigan Ave & Washington St,Subscriber,Male,1990.0,1,Monday
236,42046,2017-01-16 11:29:52,2017-01-16 11:43:56,844,Pine Grove Ave & Waveland Ave,Ravenswood Ave & Montrose Ave (*),Subscriber,Female,1990.0,1,Monday
253,73287,2017-01-23 07:21:14,2017-01-23 07:32:39,685,Artesian Ave & Hubbard St,Racine Ave (May St) & Fulton St,Subscriber,Male,1972.0,1,Monday
256,3680,2017-01-02 23:40:20,2017-01-02 23:45:25,305,Lincoln Ave & Diversey Pkwy,Lincoln Ave & Belmont Ave,Subscriber,Male,1987.0,1,Monday
496,2337,2017-01-02 11:28:54,2017-01-02 11:46:50,1076,Desplaines St & Kinzie St,Desplaines St & Kinzie St,Subscriber,Male,1985.0,1,Monday
579,76688,2017-01-23 17:10:46,2017-01-23 17:17:47,421,Financial Pl & Congress Pkwy,Wabash Ave & Roosevelt Rd,Subscriber,Male,1991.0,1,Monday
634,106343,2017-01-30 19:11:40,2017-01-30 19:19:15,455,Michigan Ave & Washington St,Canal St & Madison St,Subscriber,Male,1986.0,1,Monday
683,21541,2017-01-09 14:58:30,2017-01-09 15:06:32,482,Wells St & Huron St,Clinton St & Jackson Blvd,Subscriber,Male,1976.0,1,Monday
894,21264,2017-01-09 12:57:39,2017-01-09 13:00:54,195,Broadway & Waveland Ave,Pine Grove Ave & Irving Park Rd,Subscriber,Male,1986.0,1,Monday


# Statistics Section

## Time Stats

In [55]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # display the most common month
    
    df['month'] = df['Start Time'].dt.month_name()
    
    common_month = df['month'].mode()[0]
    
    print('The most common month: ', common_month)


    # display the most common day of week
    
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    common_day_of_week = df['day_of_week'].mode()[0]
    
    print('The most common week: ', common_day_of_week)


    # display the most common start hour
    
    df['hour'] = df['Start Time'].dt.hour
    
    common_hour = df['hour'].mode()[0]
    
    print('The most common hour: ', common_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [56]:
time_stats(df)


Calculating The Most Frequent Times of Travel...

The most common month:  January
The most common week:  Monday
The most common hour:  17

This took 0.019850969314575195 seconds.
----------------------------------------


# Station Stats

In [57]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # display most commonly used start station
    
    df['common_start_station'] = df['Start Station'].value_counts().idxmax()
    
    common_start_station = df['common_start_station'].mode()[0]
    
    print('The most common start station: ', common_start_station)

    # display most commonly used end station
    
    df['common_end_station'] = df['End Station'].value_counts().idxmax()
    
    common_end_station = df['common_end_station'].mode()[0]
    
    print('The most common end station: ', common_end_station)


    # display most frequent combination of start station and end station trip
    # referenced both start and end stations in order to be able to concatenate strings
    
    start = df['Start Station']
    
    end = df['End Station']
    
    frequent_combination = (start + end).mode().loc[0]
    
    print("\nThe most frequent combination of start station and end station trip is:\n", frequent_combination)
    
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



In [58]:
station_stats(df)


Calculating The Most Popular Stations and Trip...

The most common start station:  Clinton St & Washington Blvd
The most common end station:  Clinton St & Washington Blvd

The most frequent combination of start station and end station trip is:
 Michigan Ave & Lake StCanal St & Madison St

This took 0.013569831848144531 seconds.
----------------------------------------


## Trip Duration Stats

In [59]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    
    trip_duration_sum = df['Trip Duration'].sum()
    
    print("Total travel time: {} (s)".format(trip_duration_sum))


    # display mean travel time
    
    trip_duration_mean = df["Trip Duration"].mean()
    
    print("Total travel time is: {} (s)".format(trip_duration_mean))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [60]:
trip_duration_stats(df)


Calculating Trip Duration...

Total travel time: 2247180 (s)
Total travel time is: 639.8576309794988 (s)

This took 0.008212804794311523 seconds.
----------------------------------------


## User Stats

In [61]:
# Remeber chicago doesn't have gender and birth year

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    
    user_type_count = df['User Type'].value_counts()
    
    print("\nThe user type count is: ", user_type_count)


    # Display counts of gender
    
    try:
        gender_count = df['Gender'].value.counts()
        print("\nThe gender counts are: ", gender_count)
    except:
        print("There isn't a Gender column for this file")


    # Display earliest, most recent, and most common year of birth
    
    try:
        earliest_birth = int(df['Birth Year'].min())
        most_recent_birth = int(df['Birth Year'].max())
        most_common_birth = int(df['Birth Year'].mode()[0])
        print("\nThe earliest birth year: {}.\n\nThe most recent birth year: {}.\n\nThe most common year of birth: {}.".format(earliest_birth, most_recent_birth, most_common_birth))
    except:
        print("There is no information about birth year in this file")
        
    


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)




In [62]:
user_stats(df)


Calculating User Stats...


The user type count is:  Subscriber    3408
Customer       104
Name: User Type, dtype: int64
There isn't a Gender column for this file

The earliest birth year: 1934.

The most recent birth year: 1999.

The most common year of birth: 1989.

This took 0.010119915008544922 seconds.
----------------------------------------


## Promp User Function

In [63]:
def show_raw_data(df):
    """displays raw data by user request. Ask if they want 5 additional rows to be shown"""
    print(df.head())
    data_amt = 0
    while True:
        raw_data = input("\nWould you like to see the next five rows of raw data? Enter Yes or No.\n")
        if raw_data.casefold() != 'yes':
            return
        data_amt = data_amt + 5
        print(df.iloc[data_amt:data_amt+5])

In [64]:
show_raw_data(df)

     Unnamed: 0          Start Time             End Time  Trip Duration  \
121       42280 2017-01-16 14:54:38  2017-01-16 15:09:03            865   
167       77795 2017-01-23 18:50:16  2017-01-23 18:56:43            387   
236       42046 2017-01-16 11:29:52  2017-01-16 11:43:56            844   
253       73287 2017-01-23 07:21:14  2017-01-23 07:32:39            685   
256        3680 2017-01-02 23:40:20  2017-01-02 23:45:25            305   

                     Start Station                        End Station  \
121                   Field Museum                    Millennium Park   
167     Indiana Ave & Roosevelt Rd       Michigan Ave & Washington St   
236  Pine Grove Ave & Waveland Ave  Ravenswood Ave & Montrose Ave (*)   
253      Artesian Ave & Hubbard St    Racine Ave (May St) & Fulton St   
256    Lincoln Ave & Diversey Pkwy          Lincoln Ave & Belmont Ave   

      User Type  Gender  Birth Year    month day_of_week  hour  \
121  Subscriber    Male      1992.0  January

## Main Function

In [65]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        """asks user if they want to see 5 rows of data"""
        while True:
            raw_data = input("\nWould you like to see five rows of raw data? Enter Yes or No.\n")
            if  raw_data.lower() != 'yes':
                break
            show_raw_data(df)
            break
        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

# in a script, uncomment this:
# if __name__ --"

Hello! Let's explore some US bikeshare data!

What city would your like to explore? [Chicago, New York City, or Washington?]
new york city
Awesome!

Now please choose one of the following: [January, February, March, April, May, June, or all]
may
Great!

Choose a day: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all]
sunday
Nice!
----------------------------------------

Calculating The Most Frequent Times of Travel...

The most common month:  May
The most common week:  Sunday
The most common hour:  13

This took 0.019751787185668945 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

The most common start station:  Central Park S & 6 Ave
The most common end station:  Central Park S & 6 Ave

The most frequent combination of start station and end station trip is:
 Central Park S & 6 AveCentral Park S & 6 Ave

This took 0.014663219451904297 seconds.
----------------------------------------

Calculating Trip Duration.

In [None]:
main()

Hello! Let's explore some US bikeshare data!
