In [1]:
import time
import pandas as pd
import numpy as np

# Data Structure

In [2]:
CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
cities = list(CITY_DATA.keys())

# Helper function to ask user input

In [3]:
def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')   
    city = None
    month = None
    day = None
    
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs

    while True:
        city = input("\nWhat city would your like to explore? [Chicago, New York City, or Washington?]\n").lower()
        if city in cities:
            print('Awesome!')
            break
        else:
            print('Please choose one of the three cities given')
            
    # get user input for month (all, january, february, ... , june)        
            
    while True:
        month = input("\nNow please choose one of the following: [January, February, March, April, May, June, or all]\n").lower()
        if month in months:
            print('Great!')
            break
        else:
            print('Please choose one of the given options!')
    
  # get user input for day of week (all, monday, tuesday, ... sunday)  
    
    while True:
        day = input("\nChoose a day: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all]\n").lower()
        if day in days:
            print('Nice!')
            break
        else:
            print('Please choose one of the days of the week or all!')
            
    



    print('-'*40)
    return city, month, day




In [5]:
city, month, day = get_filters()

Hello! Let's explore some US bikeshare data!

What city would your like to explore? [Chicago, New York City, or Washington?]
Chicago
Awesome!

Now please choose one of the following: [January, February, March, April, May, June, or all]
all
Great!

Choose a day: [Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday, or all]
ALL
Nice!
----------------------------------------


In [6]:
city, month, day

('chicago', 'all', 'all')

# Load Data

In [7]:
def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    #TO DO: Read data from CSV
    
    df = pd.read_csv(CITY_DATA[city])
    
    #Converted into Start Time column to datetime
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    
#TO DO: filter to correct month and day (if not all)

    #Extracted month and day to creat new columns
    
    df['month'] = df['Start Time'].dt.month
    
    df['day_of_week'] = df['Start Time'].dt.strftime('%A')
    
    #filter for month
    
    if month != 'all':
        
        months = ['january', 'february', 'march', 'april', 'may', 'june', 'all']
        
        month = months.index(month) + 1
        
        #Filtering by month to create new dataframe
        
        df = df[df['month'] == month]
   
   #filter for day
    
    if day != 'all':
        
        days = ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday', 'all']
        
       
        
        df = df[df['day_of_week'] == day.title()]
    

    return df


In [8]:
df = load_data(city, month, day)

In [9]:
df.head(20)

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year,month,day_of_week
0,1423854,2017-06-23 15:09:32,2017-06-23 15:14:53,321,Wood St & Hubbard St,Damen Ave & Chicago Ave,Subscriber,Male,1992.0,6,Friday
1,955915,2017-05-25 18:19:03,2017-05-25 18:45:53,1610,Theater on the Lake,Sheffield Ave & Waveland Ave,Subscriber,Female,1992.0,5,Thursday
2,9031,2017-01-04 08:27:49,2017-01-04 08:34:45,416,May St & Taylor St,Wood St & Taylor St,Subscriber,Male,1981.0,1,Wednesday
3,304487,2017-03-06 13:49:38,2017-03-06 13:55:28,350,Christiana Ave & Lawrence Ave,St. Louis Ave & Balmoral Ave,Subscriber,Male,1986.0,3,Monday
4,45207,2017-01-17 14:53:07,2017-01-17 15:02:01,534,Clark St & Randolph St,Desplaines St & Jackson Blvd,Subscriber,Male,1975.0,1,Tuesday
5,1473887,2017-06-26 09:01:20,2017-06-26 09:11:06,586,Clinton St & Washington Blvd,Canal St & Taylor St,Subscriber,Male,1990.0,6,Monday
6,961916,2017-05-26 09:41:44,2017-05-26 09:46:25,281,Ashland Ave & Lake St,Wood St & Hubbard St,Subscriber,Female,1983.0,5,Friday
7,65924,2017-01-21 14:28:38,2017-01-21 14:40:41,723,Larrabee St & Kingsbury St,Larrabee St & Armitage Ave,Customer,,,1,Saturday
8,606841,2017-04-20 16:08:51,2017-04-20 16:20:20,689,Sedgwick St & Huron St,Halsted St & Blackhawk St (*),Subscriber,Male,1984.0,4,Thursday
9,135470,2017-02-06 18:00:47,2017-02-06 18:09:00,493,Stetson Ave & South Water St,Clinton St & Washington Blvd,Subscriber,Male,1979.0,2,Monday


# Statistics Section

## Time Stats

In [10]:
def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # display the most common month
    
    df['month'] = df['Start Time'].dt.month_name()
    
    common_month = df['month'].mode()[0]
    
    print('The most common month: ', common_month)


    # display the most common day of week
    
    df['day_of_week'] = df['Start Time'].dt.day_name()
    
    common_day_of_week = df['day_of_week'].mode()[0]
    
    print('The most common week: ', common_day_of_week)


    # display the most common start hour
    
    df['hour'] = df['Start Time'].dt.hour
    
    common_hour = df['hour'].mode()[0]
    
    print('The most common hour: ', common_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [11]:
time_stats(df)


Calculating The Most Frequent Times of Travel...

The most common month:  June
The most common week:  Tuesday
The most common hour:  17

This took 0.26693010330200195 seconds.
----------------------------------------


# Station Stats

In [12]:
def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()
    
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # display most commonly used start station
    
    df['common_start_station'] = df['Start Station'].value_counts().idxmax()
    
    common_start_station = df['common_start_station'].mode()[0]
    
    print('The most common start station: ', common_start_station)

    # display most commonly used end station
    
    df['common_end_station'] = df['End Station'].value_counts().idxmax()
    
    common_end_station = df['common_end_station'].mode()[0]
    
    print('The most common end station: ', common_end_station)


    # display most frequent combination of start station and end station trip
    # referenced both start and end stations in order to be able to concatenate strings
    
    start = df['Start Station']
    
    end = df['End Station']
    
    frequent_combination = (start + end).mode().loc[0]
    
    print("\nThe most frequent combination of start station and end station trip is:\n", frequent_combination)
    
    

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)



In [13]:
station_stats(df)


Calculating The Most Popular Stations and Trip...

The most common start station:  Streeter Dr & Grand Ave
The most common end station:  Streeter Dr & Grand Ave

The most frequent combination of start station and end station trip is:
 Lake Shore Dr & Monroe StStreeter Dr & Grand Ave

This took 0.2510542869567871 seconds.
----------------------------------------


## Trip Duration Stats

In [14]:
def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    
    trip_duration_sum = df['Trip Duration'].sum()
    
    print("Total travel time: {} (s)".format(trip_duration_sum))


    # display mean travel time
    
    trip_duration_mean = df["Trip Duration"].mean()
    
    print("Total travel time is: {} (s)".format(trip_duration_mean))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

In [15]:
trip_duration_stats(df)


Calculating Trip Duration...

Total travel time: 280871787 (s)
Total travel time is: 936.23929 (s)

This took 0.007169961929321289 seconds.
----------------------------------------


## User Stats

In [16]:
# Remeber chicago doesn't have gender and birth year

def user_stats(df):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    
    user_type_count = df['User Type'].value_counts()
    
    print("\nThe user type count is: ", user_type_count)


    # Display counts of gender
    
    try:
        gender_count = df['Gender'].value.counts()
        print("\nThe gender counts are: ", gender_count)
    except:
        print("There isn't a Gender column for this file")


    # Display earliest, most recent, and most common year of birth
    
    try:
        earliest_birth = int(df['Birth Year'].min())
        most_recent_birth = int(df['Birth Year'].max())
        most_common_birth = int(df['Birth Year'].mode()[0])
        print("\nThe earliest birth year: {}.\n\nThe most recent birth year: {}.\n\nThe most common year of birth: {}.".format(earliest_birth, most_recent_birth, most_common_birth))
    except:
        print("There is no information about birth year in this file")
        
    


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)




In [17]:
user_stats(df)


Calculating User Stats...


The user type count is:  Subscriber    238889
Customer       61110
Dependent          1
Name: User Type, dtype: int64
There isn't a Gender column for this file

The earliest birth year: 1899.

The most recent birth year: 2016.

The most common year of birth: 1989.

This took 0.034304141998291016 seconds.
----------------------------------------


## Promp User Function

In [18]:
def show_raw_data(df):
    """displays raw data by user request. Ask if they want 5 additional rows to be shown"""
    print(df.head())
    data_amt = 0
    while True:
        raw_data = input("\nWould you like to see the next five rows of raw data? Enter Yes or No.\n")
        if raw_data.casefold() != 'yes':
            return
        data_amt = data_amt + 5
        print(df.iloc[data_amt:data_amt+5])

In [19]:
show_raw_data(df)

   Unnamed: 0          Start Time             End Time  Trip Duration  \
0     1423854 2017-06-23 15:09:32  2017-06-23 15:14:53            321   
1      955915 2017-05-25 18:19:03  2017-05-25 18:45:53           1610   
2        9031 2017-01-04 08:27:49  2017-01-04 08:34:45            416   
3      304487 2017-03-06 13:49:38  2017-03-06 13:55:28            350   
4       45207 2017-01-17 14:53:07  2017-01-17 15:02:01            534   

                   Start Station                   End Station   User Type  \
0           Wood St & Hubbard St       Damen Ave & Chicago Ave  Subscriber   
1            Theater on the Lake  Sheffield Ave & Waveland Ave  Subscriber   
2             May St & Taylor St           Wood St & Taylor St  Subscriber   
3  Christiana Ave & Lawrence Ave  St. Louis Ave & Balmoral Ave  Subscriber   
4         Clark St & Randolph St  Desplaines St & Jackson Blvd  Subscriber   

   Gender  Birth Year    month day_of_week  hour     common_start_station  \
0    Male      

## Main Function

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)
        """asks user if they want to see 5 rows of data"""
        while True:
            raw_data = input("\nWould you like to see five rows of raw data? Enter Yes or No.\n")
            if  raw_data.lower() != 'yes':
                break
            show_raw_data(df)
            break
        

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()

# in a script, uncomment this:
# if __name__ --"

Hello! Let's explore some US bikeshare data!
