In [None]:
import time
import pandas as pd
import numpy as np
import re

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

months = ['january', 'february', 'march', 'april', 'may', 'june', 'july', 'august', 'september', 'october', 'november', 'december', 'all']

days = ['sunday', 'monday', 'tuesday', 'wednesday',
        'thursday', 'friday', 'saturday', 'all']

def get_filters():
    while True:
        try:
            city = str(input("Type in the desired city you wanna look at, choose from: Chicago, New York City, or Washington").lower())
            if city not in CITY_DATA[city]:
                raise
        except:
            print('Sorry, {} is not a valid input. Try again'.format(city).title())
            continue
        else:
            print('Loading data from {}'.format(city.title()))
            break
    
    while True:
        try:
            month = str(input("Now enter your desired month to lookup, or type 'all' if you want data for all the months").lower())
            if month not in months:
                raise
        except:
            print("Sorry, {} is not a valid input. Try again".format(month).title())
            continue
        else:
            print("{} was selected".format(month.title()))
            break
            
    while True:
        try:
            day = str(input("Specify a day of the week you want to look up (Sunday, Monday, etc), otherwise, type in 'all' to see everyday's data ").lower())
            if day not in days:
                raise
        except:
            print("Sorry, {} is not a valid day. Try again".format(day).title())
            continue
        else:
            print("Thanks. Getting your data ready!")
            break
    
    print('-'*40)

    return city, month, day


def load_data(city, month, day):
    df = pd.read_csv(CITY_DATA[city])
    df['Start Time'] = pd.to_datetime(df['Start Time'])
    df['month'] = df['Start Time'].dt.month
    df['weekday'] = df['Start Time'].dt.day_name
    df['hour'] = df['Start Time'].dt.hour
    
    if month != 'all':
        df[df['month'] == month.title()]
        
    if day != 'all':
        df[df['weekday'] == day.title()]
        
    return df

def time_stats(df):
    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()
    
    most_common_month = df['month'].value_counts().idxmax()
    
    print("The month with the most rides is: {}".format(most_common_month))

    most_common_day = df['weekday'].value_counts().idxmax()

    print("The most common day of the week is: {}".format(most_common_day))

    most_common_hour = df['hour'].value_counts().idxmax()
    
    print("And the most common hour in the day is: {}".format(most_common_hour))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)

def station_stats(df):

    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    most_pop_start_station = df['Start Station'].value_counts().idxmax()
    
    print("The most popular start station is {}".format(most_pop_start_station))
    
    most_pop_end_station = df['End Station'].value_counts().idxmax()
    
    print("The most popular end station is {}".format(most_pop_end_station))
    
    most_pop_combination_station = df.groupby(['Start Station', 'End Station']).size().sort_values(ascending=False).idxmax()
    
    print("The most popular trip route is from {} to {}".format(most_pop_combination_station[0], most_pop_combination_station[1]))
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
    

def trip_duration_stats(df):
    
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()

    totaltime = df['Trip Duration'].round().apply(pd.to_timedelta, unit='s').sum()
    
    st = str(totaltime)
    
    ttsplit  = re.split(" |:", st)
    
    print("Total travel time is {} days, {} hours, {} minutes and {} seconds".format(ttsplit[0], ttsplit[2], ttsplit[3], ttsplit[4]))
    
    averagetime = df['Trip Duration'].mean()
    
    print("The average time for a trip is {} seconds".format(averagetime))

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)


def user_stats(df):

    print('\nCalculating User Stats...\n')
    start_time = time.time()

    user_count = df['User Type'].value_counts()

    for user, count in user_count.items():
        print("There are {} {} user".format(count,user))
    
    if 'Gender' not in df.columns:
        print("Sorry, no gender data available in this city's database")
    
    else:
        user_gender = df['Gender'].value_counts()
        for gender, count in user_gender.items():
            print("There is {} {} in our user base".format(count, gender))
        
    if 'Birth Year' not in df.columns:
        print("Sorry, no birth date data available in this city's database")
    
    else:
        most_common_year = int(df['Birth Year'].value_counts().idxmax())
        print("The most common birth year is {}".format(most_common_year))
        
        most_recent_year = int(df['Birth Year'].max())
        print("The most recent birth year is {}".format(most_recent_year))
        
        earliest_year = int(df['Birth Year'].min())
        print("The earliest year is {}".format(earliest_year))
 
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-'*40)
        
        
def main():
    while True:

        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

            
if __name__ == "__main__":
	main()