#### 改进内容：
1. 自定义函数，用来处理用户错误输入。
2. 增加数据处理自定义函数的文档内容。
3. 改进数据处理自定义函数的错误细节问题
4. 增加try except 语句处理KeyError

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
CITY_DATA = { 'Chicago': 'chicago.csv',
              'New York City': 'new_york_city.csv',
              'Washington': 'washington.csv' }
MONTHS = ["All","January"," Fabruary","March", "April", "May","June"]
WEEK_DAY = ["All","Monday", "Tuesday", "Wednesday", "Thursday","Friday","Saturday","Sunday"]

- 创建自定义函数，处理用户的输入。

In [None]:
def get_user_input(input_bulletin,error_notice,valid_container):
    """
    to get users' valid input for city ,month and day
    Args:
        (str) input_bulletin - let users know what should be input by them
        (str) error_notice - to give users a hint that they input wrong content
        (dict or list) valid_container - to supply the constant of valid container
        
    Returns:
        (str) result - return valid users's input. 

    """
    while True:
        result = input(input_bulletin).title()
        if result in valid_container:
            return result           
        else:
            print(error_notice) 
        
        

In [None]:
def get_filters():
    """
    to filter the users' input and return the tuple whose elements are city, month and day respectively.
    """
    print('Hello! Let\'s explore some US bikeshare data!')
    city = get_user_input("Please select the city:Chicago,New York City or Washington? ",
                          "Invalid input! Please reenter",CITY_DATA.keys())
    month = get_user_input("Please select month you'd like to explore,ALL or January, February, March, April, May, June? ",
                          "Invalid input! Please reenter",MONTHS)
    day = get_user_input("Please select the day of week you'd like to explore,ALL or Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, Sunday? ",
                          "Invalid input! Please reenter",WEEK_DAY)
    return city,month,day

- 将df[df["month"]== month_num]改为df=df[df["month"]== month_num]，实现筛选功能
- 将条件句中的条件"all"改为"All"，因为在处理用户的输入时候，进行了title()处理

In [None]:
def load_data(city, month= None, day= None):
    """
    Loads data for the specified city and filters by month and day if applicable.

    Args:
        (str) city - name of the city to analyze
        (str) month - name of the month to filter by, or "all" to apply no month filter
        (str) day - name of the day of week to filter by, or "all" to apply no day filter
    Returns:
        df - Pandas DataFrame containing city data filtered by month and day
    """
    df=pd.read_csv(CITY_DATA[city])
    df["Start Time"]=pd.to_datetime(df["Start Time"])
    df["year"] = df["Start Time"].dt.year
    df["month"] = df["Start Time"].dt.month
    df["weekday_name"] = df["Start Time"].dt.weekday_name
    df["hour"] = df["Start Time"].dt.hour
    df["Trip"] = df["Start Station"]+df["End Station"]
    if month != "All":
        month_num = MONTHS.index(month)+1
        df=df[df["month"]== month_num]
    if day != "All":
        df=df[df["weekday_name"]==day.title()]
    return df

- 分析the most popular hour的时候，.tolist没有调用，改为.tolist()

In [None]:
def time_stats(df):
    """
    Displays statistics on the most frequent times of travel.
    """
    print("Initialize Processing... ")
    the_most_common_month = df["month"].mode().tolist()[0]
    print(f"the most common month is {the_most_common_month}")
    
    print("Initialize Processing... ")
    the_most_common_weekday = df["weekday_name"].value_counts().index[0]
    print(f"the most common weekday is {the_most_common_weekday}")
    
    print("Initialize Processing... ")
    the_most_popular_hour = df["hour"].mode().tolist()[0]
    print(f"the most popular hour is {the_most_popular_hour}")

In [None]:
def station_stats(df):
    """
    Displays statistics on the most popular stations and trip.
    """
    print("Initialize Processing... ")
    the_most_popular_start_station = df["Start Station"].value_counts().index[0]
    the_most_popular_end_station = df["End Station"].value_counts().index[0]
    print(f"the most popular start station is {the_most_popular_start_station}")
    print(f"the most popular end station is {the_most_popular_end_station}")
    
    print("Initialize Processing... ")
    the_most_popular_trip =df["Trip"].value_counts().index[0]
    print(f"the most popular trip is {the_most_popular_trip}")

In [None]:
def trip_duration_stats(df):
    """
    Displays statistics on the total and average trip duration.
    """
    print("Initialize Processing... ")
    the_total_trip_duration = df["Trip Duration"].sum()
    print(f"the total trip duration is {the_total_trip_duration} minutes ")
    
    print("Initialize Processing... ")
    the_average_trip_duration = df["Trip Duration"].mean()
    print(f"the average of trip duration is {the_average_trip_duration} minutes ")

- washington的csv文件中没有"Gender"和"Birth Year"这两个列名，需要使用try excep语句进行处理。

In [None]:
def user_stats(df):
    """
    Displays statistics on bikeshare users.
    """
    print("Initialize Processing... ")
    the_user_type_dict = df["User Type"].value_counts().to_dict()
    for k,v in the_user_type_dict.items():
        print(f"the type of '{k}' have {v} persons ")
        
    print("Initialize Processing... ")
    try:
        the_gendar_of_user_dict = df["Gender"].value_counts().to_dict()
        for k,v in the_gendar_of_user_dict.items():
            print(f"the gender of '{k}' have {v} persons")
    except KeyError :
        print("Sorry!There is no relevant statistical information")

    print("Initialize Processing... ")
    try:
        the_birth_year_min =df["Birth Year"].min()
        the_birth_year_max =df["Birth Year"].max()
        the_birth_year_the_most_common = df["Birth Year"].mode().tolist()[0]
        print(f"the earliest birth year is {the_birth_year_min}")
        print(f"the latest birth year is {the_birth_year_max} ")
        print(f"the most common birth year is {the_birth_year_the_most_common}")
    except KeyError :
        print("Sorry!There is no relevant statistical information")

In [None]:
def main():
    while True:
        city, month, day = get_filters()
        df = load_data(city, month, day)

        time_stats(df)
        station_stats(df)
        trip_duration_stats(df)
        user_stats(df)

        restart = input('\nWould you like to restart? Enter yes or no.\n')
        if restart.lower() != 'yes':
            break

In [None]:
if __name__ == "__main__":
	main()