# Data Analysis with Pandas
## Cycle Share Data Set
### Author: Grace Choi, 1/20/2021

In [1]:
import pandas as pd

In [2]:
df_station = pd.read_csv('./station.csv')
df_trip = pd.read_csv('./trip.csv', error_bad_lines=False)
df_weather = pd.read_csv('./weather.csv', parse_dates=['Date'])

b'Skipping line 50794: expected 12 fields, saw 20\n'


### 1. What is the average trip duration for a borrowed bicycle?

In [3]:
avg_trip = df_trip['tripduration'].mean()
avg_trip

1178.2956753399776

### 2. What's the most common age of a bicycle-sharer?

In [4]:
year = df_trip['birthyear'].mode()
age = (2020 - year).item()
age

33.0

### 3. Given all the weather data here, find the average precipitation per month, and the median precipitation. 

In [5]:
df_weather.reset_index(drop=True)
df_weather['Month'] = df_weather['Date'].dt.month
df_weather['Year'] = df_weather['Date'].dt.year
df_weather['Day'] = df_weather['Date'].dt.day
avg_precipitation = df_weather.groupby(['Month'])['Precipitation_In'].mean()
avg_precipitation

Month
1     0.143548
2     0.168421
3     0.156935
4     0.051333
5     0.012419
6     0.030500
7     0.012097
8     0.018226
9     0.041000
10    0.189000
11    0.187833
12    0.236290
Name: Precipitation_In, dtype: float64

In [6]:
median_precipitation = df_weather.groupby(['Month'])['Precipitation_In'].median()
median_precipitation

Month
1     0.020
2     0.040
3     0.025
4     0.000
5     0.000
6     0.000
7     0.000
8     0.000
9     0.000
10    0.040
11    0.035
12    0.100
Name: Precipitation_In, dtype: float64

### 4. What's the average number of bikes at a given bike station?

In [7]:
avg_bikes = df_station['current_dockcount'].mean()
avg_bikes

16.517241379310345

### 5. When a bike station is modified, is it more likely that it'll lost bikes or gain bikes? How do you know?
> More likely to lose bikes because the difference between the current dockcount and the install dockcount is an average of 1 less bike.

In [8]:
df_station['count_difference'] = df_station['current_dockcount'] - df_station['install_dockcount']
avg_difference = df_station['count_difference'].mean()
avg_difference

-1.0689655172413792

### 6a. What station has the most bikes currently docked?

In [9]:
busiest_station = df_station[['station_id', 'current_dockcount']].sort_values('current_dockcount', ascending=False).head(1)['station_id'].any()
busiest_station

'CH-08'

### 6b. How many cyclers are non-binary?

In [10]:
non_binary_cyclers = df_trip['gender'].value_counts()['Other']
non_binary_cyclers

3431

### 6c. How many bike users are Members?

In [11]:
member_count = df_trip['usertype'].value_counts()['Member']
member_count

181557

### 6d. Find the average temp per month

In [12]:
df_weather.reset_index(drop=True)
df_weather['Month'] = df_weather['Date'].dt.month
df_weather['Year'] = df_weather['Date'].dt.year
df_weather['Day'] = df_weather['Date'].dt.day
avg_temp_per_month = df_weather.groupby(['Month'])['Mean_Temperature_F'].mean()
avg_temp_per_month

Month
1     45.225806
2     49.267857
3     51.048387
4     55.116667
5     60.822581
6     66.616667
7     70.177419
8     69.596774
9     62.566667
10    58.840000
11    46.800000
12    45.709677
Name: Mean_Temperature_F, dtype: float64

## Tests

In [13]:
def test():

    def assert_equal(actual,expected):
        assert actual == expected, f"Expected {expected} but got {actual}"

    assert_equal(avg_trip, 1178.2956753399776)
    assert_equal(age, 33.0)
    assert_equal(avg_precipitation.iloc[0], 0.14354838709677417)
    assert_equal(median_precipitation.iloc[0], 0.020)
    assert_equal(avg_bikes, 16.517241379310345)
    assert_equal(avg_difference, -1.0689655172413792)
    assert_equal(busiest_station, 'CH-08')
    assert_equal(non_binary_cyclers, 3431)
    assert_equal(member_count, 181557)
    assert_equal(avg_temp_per_month.iloc[0], 45.225806451612904)

    print("Success!!!")

test()

Success!!!
