In [None]:
import time
import pandas as pd
import numpy as np

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

def get_filters():
    """
    Asks user to specify a city, month, and day to analyze.

    Returns:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    # get user input for city (chicago, new york city, washington). HINT: Use a while loop to handle invalid inputs
    print("chicago,washington,new york city ")
    city=input("please choose a city \n ").lower()
    while city not in CITY_DATA.keys():
          print ("please enter a valid city and try again ")
          city =input("please choose a city \n ").lower()
    
    # get user input for month (all, january, february, ... , june)
    months=['january','february','march','april','may','june','all']
    while True:
        print("january,february,march,april,may,june,all")
        month=input("choose a month \n ").lower()
        if month in months:
            break
        else:
            print("please enter a valid month and try again ")
    # get user input for day of week (all, monday, tuesday, ... sunday)
    days=['sunday','monday','tueday','wednesday','thursday','friday','saturday','all']
    while True:
        day=input("choose a day (sunday,monday,tueday,wednesday,thursday,friday,saturday,all) \n ").lower()
        if day in days:
            break
        else:
            print('please enter a valid day and try again ')  
    print('-'*40)
    return city, month, day


def load_data(city, month, day):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df=pd.read_csv(CITY_DATA[city])
    df['Start Time']=pd.to_datetime(df['Start Time'])
    df['month']=df['Start Time'].dt.month
    df['week_day']=df['Start Time'].dt.day_name()
    df['start hour']=df['Start Time'].dt.hour
    if month != 'all':
        months=['january','february','march','april','may','june']
        month=months.index(month)+1
        df=df[df['month']==month]
        
    if day!= 'all':
        df=df[df['week_day'] == day.title()]
        
        
    return df


def time_stats(df):
    """Displays statistics on the most frequent times of travel."""

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_month=df['month'].mode()[0]
    print("most common month is ",most_month)

    # display the most common day of week
    most_day=df['week_day'].mode()[0]
    print("most common day of week is ",most_day)
    # display the most common start hour
    most_hour=df['start hour'].mode()[0]
    print("most common start hour is ", most_hour)


    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def station_stats(df):
    """Displays statistics on the most popular stations and trip."""

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    most_station=df['Start Station'].mode()[0]
    print("most commonly used start station is",most_station)
    # display most commonly used end station
    most_end=df['End Station'].mode()[0]
    print("most commonly used end station is",most_end)

    # display most frequent combination of start station and end station trip
    most_common = (df['Start Station'] + " -- " + df['End Station']).mode()[0]

    print("most frequent combination of start station and end station trip is \n",most_common)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def trip_duration_stats(df):
    """Displays statistics on the total and average trip duration."""

    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    # display total travel time
    total_travel=df['Trip Duration'].sum()
    print("total travel time",total_travel )
    # display mean travel time
    total_travel_mean=df['Trip Duration'].mean()
    print("mean travel time " ,total_travel_mean )

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df,city):
    """Displays statistics on bikeshare users."""

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    # Display counts of user types
    user_type=df['User Type'].value_counts()
    print("counts of user types\n", user_type )

    # Display counts of gender
    if city =='washington':
        print("this city doesnt have the gender or age data")
    else:
        gender=df['Gender'].value_counts()
        print("counts of gender\n",gender)


    # Display earliest, most recent, and most common year of birth
        earliest_age= df['Birth Year'].min()
        recent_age=df['Birth Year'].max()
        common_age=df['Birth Year'].mode()[0]
        print("earliest, most recent, and most common year of birth \n",
             earliest_age,"---",recent_age,"-----",common_age)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def display_data(df):
    #ask the user if he would to display 5 rows from the data
    index=0
    print("you can see 5 rows if you want")
    user_choice=input("type YES if you want to see the 5 rows type NO if you dont\n").lower()
    while user_choice not in ['yes','no']:
        print("please enter a valid choice and try again")
        user_choice=input("type YES if you want to see the 5 rows type NO if you dont\n").lower()
        
    while user_choice=='yes':
             
            print(df.iloc[index: index+5])
            index += 5
            next_rows = input("do you want to see the next 5 rows?\n").lower()
            while next_rows not in ['yes','no']:
                print("please enter a valid choice and try again")
                next_rows = input("do you want to see the next 5 rows? \n").lower()
            if next_rows != 'yes':
                break
                    
            
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df,city)
        display_data(df)
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break


if __name__ == "__main__":
	main()


Hello! Let's explore some US bikeshare data!
chicago,washington,new york city 
please choose a city 
 chicago
january,february,march,april,may,june,all
choose a month 
 all
choose a day (sunday,monday,tueday,wednesday,thursday,friday,saturday,all) 
 all
----------------------------------------

Calculating The Most Frequent Times of Travel...

most common month is  6
most common day of week is  Tuesday
most common start hour is  17

This took 0.030376434326171875 seconds.
----------------------------------------

Calculating The Most Popular Stations and Trip...

most commonly used start station is Streeter Dr & Grand Ave
most commonly used end station is Streeter Dr & Grand Ave
most frequent combination of start station and end station trip is 
 Lake Shore Dr & Monroe St -- Streeter Dr & Grand Ave

This took 0.176527738571167 seconds.
----------------------------------------

Calculating Trip Duration...

total travel time 280871787
mean travel time  936.23929

This took 0.00099778175

In [3]:
df=pd.read_csv('new_york_city.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Start Time,End Time,Trip Duration,Start Station,End Station,User Type,Gender,Birth Year
0,5688089,2017-06-11 14:55:05,2017-06-11 15:08:21,795,Suffolk St & Stanton St,W Broadway & Spring St,Subscriber,Male,1998.0
1,4096714,2017-05-11 15:30:11,2017-05-11 15:41:43,692,Lexington Ave & E 63 St,1 Ave & E 78 St,Subscriber,Male,1981.0
2,2173887,2017-03-29 13:26:26,2017-03-29 13:48:31,1325,1 Pl & Clinton St,Henry St & Degraw St,Subscriber,Male,1987.0
3,3945638,2017-05-08 19:47:18,2017-05-08 19:59:01,703,Barrow St & Hudson St,W 20 St & 8 Ave,Subscriber,Female,1986.0
4,6208972,2017-06-21 07:49:16,2017-06-21 07:54:46,329,1 Ave & E 44 St,E 53 St & 3 Ave,Subscriber,Male,1992.0
