# The Bike Sharing Project
## From Udacity
### Importing Libraries


In [12]:
import time
import pandas as pd
import numpy as np
import datetime as dt

CITY_DATA = { 'chicago': 'chicago.csv',
              'new york city': 'new_york_city.csv',
              'washington': 'washington.csv' }

### Filters
Lets the user set filter: which city, month and day.
Asks user to specify a city, month, and day to analyze.


In [13]:
def get_filters():

    print('Hello! Let\'s explore some US bikeshare data!')
    
    city = input("Which city?: ")
    city = city.lower()
    while city not in ['chicago', 'washington', 'new york city']:
        city = input("Which city: ").lower()

    # get user input for month (all, january, february, ... , june)
    month = input("Which month: ")
    month = month.lower()
    while month not in ['january', 'february', 'march', 'april', 'may', 'june', 'all']:
        month = input("Which month: ").lower()

    # get user input for day of week (all, monday, tuesday, ... sunday)
    day = input("Which day?")
    day= day.lower();
    while day not in ["monday", "tuesday", "wednesday", "thursday", "friday", "saturday", "sunday", 'all']:
        day = input("Which day: ").lower()
    
    print('Filters ' + '-'*80 + ' Filters')
    return city, month, day

### Load Data from .csv files
After the user selected city, month and day, we load that selected data into a DataFrame

In [14]:
def load_data(city, month, day):

    # load data file into a dataframe
    df = pd.read_csv("C:\\Users\\lucab\\jupiter_notebook_files\\project_bikes\\" + CITY_DATA[city])
    df_2 = df 
    # When I only create a new column in the df DataFrame, 
    # it will also be filtered and I cannot find out what the busiest months and days are. (The copy is linked by pointers)
    # Therefore, a new dataframe

    # convert the Start Time column to datetime
    df['Start Time'] = pd.to_datetime(df['Start Time'])

    # extract month and day of week from Start Time to create new columns
    df['month'] = df['Start Time'].dt.month
    df['day_of_week'] = df['Start Time'].dt.day_name() #converts the date (Bsp: 22.02.1995 to 22 = Mittwoch) and adds it to new column day_of_week
    # for some reason, the weekday_name method does not work here. Only in the udacity ide. But day_name() seems to work
    
    # filter by month if applicable. --> get the index of the month of interest
    if month != 'all':
        # use the index of the months list to get the corresponding int
        months = ['january', 'february', 'march', 'april', 'may', 'june']
        month = months.index(month) + 1
    
        # filter by month to create the new dataframe
        df = df[df['month'] == month] # --> get only the Start Times in that month

    # filter by day of week if applicable
    if day != 'all':
        # filter by day of week to create the new dataframe
        df = df[df['day_of_week'] == day.title()] # We compare the 
    print('Load Data ' + '-'*80 + ' Load Data')

    return df, df_2

### Time Stats
Displays statistics on the most frequent times of travel.

In [15]:
def time_stats(df, df_2):

    print('\nCalculating The Most Frequent Times of Travel...\n')
    start_time = time.time()

    # display the most common month
    most_common_month = pd.to_datetime(df_2['Start Time']).dt.month.value_counts()
    print('The most common month is:', most_common_month.index[0])
    
    # display the most common day of week
    busyest_day = df_2['Start Time'].dt.day.value_counts() 
    print("Most common day: ", busyest_day.index[0])

    # display the most common start hour
    busiest_hour = df['Start Time'].dt.hour.value_counts() # use datetime module. and count values -> Index 0 is the highest occurence
    print('Busiest hour is: ', busiest_hour.index[0])
    #print(type(busiest_hour))
    
    # display most frequent combination of start station and end station trip
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('Time Stats ' + '-'*80 + ' Time Stats')

### Finding the busiest bike station
Displays statistics on the most popular stations and trip.

In [16]:
def station_stats(df):
    print('\nCalculating The Most Popular Stations and Trip...\n')
    start_time = time.time()

    # display most commonly used start station
    busiest_start_station = df['Start Station'].value_counts() # value_counts counts the occuerences and puts them in an asending order. We are interested in the index. Most occurence = index 0
    print('Busiest Start Station is: ', busiest_start_station.index[0])

    # display most commonly used end station
    busiest_end_station = df['End Station'].value_counts()
    print('Busiest End Station is: ', busiest_end_station.index[0])

    # display most frequent combination of start station and end station trip
    busiest_combi = df[['Start Station', 'End Station']].value_counts()

    #df.groupby(['Start Station','End Station']).size()
    if busiest_combi.values[0] != busiest_combi.values[1]:
        print('The most frequent combi of start and end is: ', busiest_combi.index[0])
    else:
        print('The most frequent combi of start and end is: ', busiest_combi.index[0], 'and ', busiest_combi.index[1])

    print(busiest_combi)
    
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('Station Stats ' + '-'*80 + ' Station Stats')

### Calculating the total travel time
We can find the total time travelled by subtracting the end time by the start time. To do so, we add a new column to the dataframe with the time difference and then add that column up.

In [17]:
def trip_duration_stats(df):
    print('\nCalculating Trip Duration...\n')
    start_time = time.time()
    
    # display total travel time
    #travel_time = pd.to_datetime(df['End Time']).dt.time - pd.to_datetime(df['Start Time']).dt.time
    travel_end = pd.to_datetime(df['End Time'])
    travel_start = pd.to_datetime(df['Start Time'])
    df['Differenz'] = travel_end - travel_start
    total_travel_time = df['Differenz'].sum()
    print('The total travel time is:', total_travel_time)
    
    # display mean travel time
    mean_travel_time = df['Differenz'].mean()
    print('The mean travel time is: ', mean_travel_time)

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('Trip Dur Stats ' + '-'*80 + ' Trip Dur Stats')

### User Stats
Displays statistics on bikeshare users.

In [18]:
def user_stats(df, city):
    print('\nCalculating User Stats...\n')
    start_time = time.time()

    #print(df)
    # Display counts of user types
    user_type = df['User Type'].value_counts()
    print('Amount of Subscribers: ', user_type['Subscriber'])
    print('Amount of Customers: ', user_type['Customer'])
    
    # Display counts of gender
    if city != 'washington':
        gender_count = df['Gender'].value_counts()
        print('The amount of men is: ', gender_count['Male'])
        print('The amount of woman is: ', gender_count['Female'])

        # Display earliest, most recent, and most common year of birth
        #print(df['Birth Year'])
        most_recent_birthday = df['Birth Year'].max()
        most_common_birth_year = df['Birth Year'].value_counts()
        print('The youngest traveller was born: ', most_recent_birthday)
        print('Most travellers were born in: ', most_common_birth_year.index[0])
    else:
        print('There exists no data about that for Washington')
    print("\nThis took %s seconds." % (time.time() - start_time))
    print('User Stats ' + '-'*80 + ' User Stats')

### Main Function
calling all funcs

In [19]:
def main():
    while True:
        # city, month, day = 'chicago', 'march', 'wednesday'
        city, month, day = get_filters() #commment, so I dont have to type it in each time. uncomment when everything else done
        df, df_2 = load_data(city, month, day)
        time_stats(df, df_2)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df, city)
        
        n = 5
        while True:
            show_data = input('Would you like to see 5 (more) rows of data? Type yes or no.\n').lower()
            if show_data == 'yes':
                print(df.head(n))
                n += 5
            else:
                break
            
        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

if __name__ == "__main__":
	main()

Hello! Let's explore some US bikeshare data!
Which city?: chicago
Which month: march
Which day?monday
Filters -------------------------------------------------------------------------------- Filters
Load Data -------------------------------------------------------------------------------- Load Data

Calculating The Most Frequent Times of Travel...

The most common month is: 6
Most common day:  18
Busiest hour is:  17

This took 0.04681682586669922 seconds.
Time Stats -------------------------------------------------------------------------------- Time Stats

Calculating The Most Popular Stations and Trip...

Busiest Start Station is:  Clinton St & Washington Blvd
Busiest End Station is:  Clinton St & Washington Blvd
The most frequent combi of start and end is:  ('Calumet Ave & 33rd St', 'State St & 33rd St')
Start Station                End Station                  
Calumet Ave & 33rd St        State St & 33rd St               10
Columbus Dr & Randolph St    Clinton St & Washington Blv