In [1]:
# global installs for required packages, very common to see these aliased as so
import pandas as pd
import numpy as np
from datetime import date as dt

In [2]:
# read in station data
station = pd.read_csv('./station.csv')
station.head()

Unnamed: 0,station_id,name,lat,long,install_date,install_dockcount,modification_date,current_dockcount,decommission_date
0,BT-01,3rd Ave & Broad St,47.618418,-122.350964,10/13/2014,18,,18,
1,BT-03,2nd Ave & Vine St,47.615829,-122.348564,10/13/2014,16,,16,
2,BT-04,6th Ave & Blanchard St,47.616094,-122.341102,10/13/2014,16,,16,
3,BT-05,2nd Ave & Blanchard St,47.61311,-122.344208,10/13/2014,14,,14,
4,CBD-03,7th Ave & Union St,47.610731,-122.332447,10/13/2014,20,,20,


In [3]:
#read in the trip data
trip = pd.read_csv('./trip.csv.zip', skiprows=[50793]).drop_duplicates()
trip.head()

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_name,to_station_name,from_station_id,to_station_id,usertype,gender,birthyear
0,431,10/13/2014 10:31,10/13/2014 10:48,SEA00298,985.935,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1960.0
1,432,10/13/2014 10:32,10/13/2014 10:48,SEA00195,926.375,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1970.0
2,433,10/13/2014 10:33,10/13/2014 10:48,SEA00486,883.831,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1988.0
3,434,10/13/2014 10:34,10/13/2014 10:48,SEA00333,865.937,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1977.0
4,435,10/13/2014 10:34,10/13/2014 10:49,SEA00202,923.923,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1971.0


In [4]:
# read in weather data
weather = pd.read_csv('./weather.csv')
weather.head()

Unnamed: 0,Date,Max_Temperature_F,Mean_Temperature_F,Min_TemperatureF,Max_Dew_Point_F,MeanDew_Point_F,Min_Dewpoint_F,Max_Humidity,Mean_Humidity,Min_Humidity,...,Mean_Sea_Level_Pressure_In,Min_Sea_Level_Pressure_In,Max_Visibility_Miles,Mean_Visibility_Miles,Min_Visibility_Miles,Max_Wind_Speed_MPH,Mean_Wind_Speed_MPH,Max_Gust_Speed_MPH,Precipitation_In,Events
0,10/13/2014,71,62.0,54,55,51,46,87,68,46,...,29.79,29.65,10,10,4,13,4,21,0.0,Rain
1,10/14/2014,63,59.0,55,52,51,50,88,78,63,...,29.75,29.54,10,9,3,10,5,17,0.11,Rain
2,10/15/2014,62,58.0,54,53,50,46,87,77,67,...,29.71,29.51,10,9,3,18,7,25,0.45,Rain
3,10/16/2014,71,61.0,52,49,46,42,83,61,36,...,29.95,29.81,10,10,10,9,4,-,0.0,Rain
4,10/17/2014,64,60.0,57,55,51,41,87,72,46,...,29.78,29.73,10,10,6,8,3,-,0.14,Rain



When a bike station is modified, is it more likely that it’ll lose bikes or gain bikes? How do you know?
Come up with 3 more questions that can be answered with this data set.

## Q1: What is the average trip duration for a borrowed bicycle?

### Answer: About 20 minutes (1202.61/60)

In [5]:
pd.DataFrame(trip['tripduration']).mean()

tripduration    1202.61221
dtype: float64

## Q2: What's the most common age of a bicycle-sharer?

### Answer: 28 years old

In [6]:
trip['trip_date'] = pd.to_datetime(trip['starttime']).dt.normalize()

trip['age'] = trip['trip_date'].dt.year - trip['birthyear']

pd.DataFrame(trip['age'].groupby(trip.age).count().sort_values(ascending = False).head())

Unnamed: 0_level_0,age
age,Unnamed: 1_level_1
28.0,12258
30.0,8552
27.0,7651
29.0,7504
32.0,7388


## Q3: Given all the weather data here, find the average precipitation per month, and the median precipitation.

### Answer: See printouts

In [7]:
weather['Month'] = pd.to_datetime(weather['Date']).dt.month
pd.DataFrame(weather['Precipitation_In'].groupby(weather.Month).mean())

Unnamed: 0_level_0,Precipitation_In
Month,Unnamed: 1_level_1
1,0.143548
2,0.168421
3,0.156935
4,0.051333
5,0.012419
6,0.0305
7,0.012097
8,0.018226
9,0.041
10,0.189


In [8]:
pd.DataFrame(weather['Precipitation_In'].groupby(weather.Month).median())

Unnamed: 0_level_0,Precipitation_In
Month,Unnamed: 1_level_1
1,0.02
2,0.04
3,0.025
4,0.0
5,0.0
6,0.0
7,0.0
8,0.0
9,0.0
10,0.04


## Q4: What’s the average number of bikes at a given bike station?

### Answer: 16-17 bikes per station (mean = 16.52)

In [9]:
station['current_dockcount'].describe()

count    58.000000
mean     16.517241
std       5.117021
min       0.000000
25%      16.000000
50%      18.000000
75%      18.000000
max      26.000000
Name: current_dockcount, dtype: float64

## Q5: When a bike station is modified, is it more likely that it’ll lose bikes or gain bikes? How do you know?

### Answer: Bike station is likely to lose 3.76 docks when modified

In [10]:
station_mod = station.dropna(subset = ['modification_date'])
station_mod['mod_change'] = station_mod['current_dockcount'] - station_mod['install_dockcount']
station_mod['mod_change'].describe()
# station_mod.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


count    17.000000
mean     -3.764706
std       8.392924
min     -20.000000
25%      -6.000000
50%      -2.000000
75%       2.000000
max       8.000000
Name: mod_change, dtype: float64

## Q6: How many total trips have been taken on rainy days?

### Answer: 80,961 rides

In [11]:
weather['Date_New'] = pd.to_datetime(weather['Date']).dt.normalize()

In [12]:
rainy_stuff = pd.merge(weather, trip, left_on='Date_New', right_on='trip_date')
drop_no_rain = rainy_stuff.dropna(subset = ['Events'])
rainy_rides = drop_no_rain[drop_no_rain['Events'].str.contains("Rain")]
total_rain_rides = rainy_rides['Events'].count()
total_rain_rides



80961

## Q7: What is the average difference in number of trips on rainy days versus not?

## Answer: On average, 148 more trips are taken on days with no rain

In [13]:
rainless_rides = rainy_stuff['trip_date'].count() - rainy_rides['trip_date'].count()
rainless_rides

155104

In [14]:
rain_only = weather.dropna(subset = ['Events'])
rain_today = rain_only[rain_only['Events'].str.contains("Rain")]
rainy_days = rain_today['Events'].count()
rainy_days

310

In [15]:
rainless_days = weather['Date'].count() - rainy_days
rainless_days

379

In [16]:
rain_rides_per_day = total_rain_rides / rainy_days

In [17]:
good_weather_rides = rainless_rides / rainless_days

In [18]:
good_weather_rides - rain_rides_per_day

148.08086645671972

## Q8: What percentage of rides are taken by members?

### Answer: 61.92%

In [19]:
trip['usertype'].groupby(trip.usertype).count()

usertype
Member                    146171
Short-Term Pass Holder     89894
Name: usertype, dtype: int64

In [20]:
percent_member = trip[trip['usertype'].str.contains("Member")].count() / trip['usertype'].count()
percent_member['usertype']


0.6191981022176095