In [18]:
import pandas as pd
from datetime import datetime

### Load Divvy and Weather Data

In [2]:
%time
divvy_data = pd.read_csv("~/dev/me/chipy-mentorship/divvy.csv")

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 8.11 µs


  interactivity=interactivity, compiler=compiler, result=result)


In [3]:
divvy_data.shape

(9527208, 20)

In [11]:
divvy_data.head()
divvy_data.columns

Index(['Unnamed: 0', 'trip_id', 'starttime', 'stoptime', 'bikeid',
       'tripduration', 'from_station_id', 'from_station_name', 'to_station_id',
       'to_station_name', 'usertype', 'gender', 'birthyear', 'DATE', 'PRCP',
       'SNOW', 'SNWD', 'TAVG', 'TMAX', 'TMIN'],
      dtype='object')

### Drop Unnecessary and Empty Weather Data Columns

In [13]:
bad_columns = ['Unnamed: 0', 'PRCP', 'SNOW', 'SNWD',
               'TAVG', 'TMAX', 'TMIN', 'DATE']
divvy_data_cleanned = divvy_data.drop(labels=bad_columns,axis=1)
divvy_data_cleanned.head()

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,gender,birthyear
0,9379901,2016-04-30 23:59:00,2016-05-01 00:11:00,21,733,123,California Ave & Milwaukee Ave,374,Western Ave & Walton St,Subscriber,Male,1982.0
1,9379900,2016-04-30 23:58:00,2016-05-01 00:07:00,3583,556,349,Halsted St & Wrightwood Ave,165,Clark St & Grace St,Subscriber,Male,1991.0
2,9379899,2016-04-30 23:58:00,2016-05-01 00:02:00,4557,253,59,Wabash Ave & Roosevelt Rd,273,Michigan Ave & 18th St,Subscriber,Male,1984.0
3,9379898,2016-04-30 23:54:00,2016-05-01 00:08:00,2443,802,289,Wells St & Concord Ln,199,Wabash Ave & Grand Ave,Subscriber,Male,1978.0
4,9379897,2016-04-30 23:52:00,2016-05-01 00:11:00,50,1146,239,Western Ave & Leland Ave,227,Southport Ave & Waveland Ave,Customer,,


In [5]:
weather_data = pd.read_csv("~/dev/me/chipy-mentorship/weather_data.csv")

In [6]:
weather_data.shape

(1094, 10)

In [7]:
weather_data.head()

Unnamed: 0,STATION,NAME,DATE,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS,WDMV
0,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",1/1/2016,0.0,0.0,3.0,30.0,18.0,23.0,
1,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",1/2/2016,0.0,0.0,3.0,30.0,21.0,23.0,
2,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",1/3/2016,0.0,0.0,3.0,36.0,22.0,28.0,
3,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",1/4/2016,0.0,0.0,3.0,30.0,24.0,26.0,
4,USC00111497,"CHICAGO BOTANIC GARDEN, IL US",1/5/2016,0.0,0.0,3.0,32.0,19.0,22.0,


### Use the Same Data Format in Each DataFrame
This is necessary for us to be able to join the two DataFrames on date.

In [27]:
dateparse = lambda x: pd.datetime.strptime(x, '%Y-%m-%d')
divvy_data_cleanned['start_date'] = pd.to_datetime(divvy_data_cleanned['starttime'], format='%Y-%m-%d').dt.date

In [29]:
divvy_data_cleanned[['starttime', 'start_date']].head()

Unnamed: 0,starttime,start_date
0,2016-04-30 23:59:00,2016-04-30
1,2016-04-30 23:58:00,2016-04-30
2,2016-04-30 23:58:00,2016-04-30
3,2016-04-30 23:54:00,2016-04-30
4,2016-04-30 23:52:00,2016-04-30


In [31]:
weather_data['date'] = pd.to_datetime(weather_data['DATE'], format='%m/%d/%Y').dt.date
weather_data[['DATE', 'date']].head()

Unnamed: 0,DATE,date
0,1/1/2016,2016-01-01
1,1/2/2016,2016-01-02
2,1/3/2016,2016-01-03
3,1/4/2016,2016-01-04
4,1/5/2016,2016-01-05


### Join Weather and Divvy Data by Date

In [32]:
divvy_plus_weather_data = divvy_data_cleanned.merge(weather_data,
                                                    how='left',
                                                    left_on='start_date',
                                                    right_on='date')
divvy_plus_weather_data.head()

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_id,from_station_name,to_station_id,to_station_name,usertype,...,NAME,DATE,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS,WDMV,date
0,9379901,2016-04-30 23:59:00,2016-05-01 00:11:00,21,733,123,California Ave & Milwaukee Ave,374,Western Ave & Walton St,Subscriber,...,"CHICAGO BOTANIC GARDEN, IL US",4/30/2016,0.0,0.0,0.0,49.0,41.0,45.0,,2016-04-30
1,9379900,2016-04-30 23:58:00,2016-05-01 00:07:00,3583,556,349,Halsted St & Wrightwood Ave,165,Clark St & Grace St,Subscriber,...,"CHICAGO BOTANIC GARDEN, IL US",4/30/2016,0.0,0.0,0.0,49.0,41.0,45.0,,2016-04-30
2,9379899,2016-04-30 23:58:00,2016-05-01 00:02:00,4557,253,59,Wabash Ave & Roosevelt Rd,273,Michigan Ave & 18th St,Subscriber,...,"CHICAGO BOTANIC GARDEN, IL US",4/30/2016,0.0,0.0,0.0,49.0,41.0,45.0,,2016-04-30
3,9379898,2016-04-30 23:54:00,2016-05-01 00:08:00,2443,802,289,Wells St & Concord Ln,199,Wabash Ave & Grand Ave,Subscriber,...,"CHICAGO BOTANIC GARDEN, IL US",4/30/2016,0.0,0.0,0.0,49.0,41.0,45.0,,2016-04-30
4,9379897,2016-04-30 23:52:00,2016-05-01 00:11:00,50,1146,239,Western Ave & Leland Ave,227,Southport Ave & Waveland Ave,Customer,...,"CHICAGO BOTANIC GARDEN, IL US",4/30/2016,0.0,0.0,0.0,49.0,41.0,45.0,,2016-04-30


In [33]:
divvy_plus_weather_data.shape

(9527208, 24)

In [34]:
divvy_plus_weather_data[['PRCP', 'SNOW', 'SNWD', 'TMAX', 'TMIN', 'TOBS', 'WDMV']].describe()

Unnamed: 0,PRCP,SNOW,SNWD,TMAX,TMIN,TOBS,WDMV
count,9498366.0,8073564.0,8045113.0,9494047.0,9502661.0,9502661.0,6580197.0
mean,0.1187134,0.02456002,0.1388659,68.48671,49.32205,56.14396,21.67987
std,0.3442404,0.2572655,0.8252898,17.76356,15.67575,15.82938,171.672
min,0.0,0.0,0.0,5.0,-8.0,-6.0,0.0
25%,0.0,0.0,0.0,55.0,38.0,44.0,5.0
50%,0.0,0.0,0.0,73.0,53.0,61.0,9.9
75%,0.05,0.0,0.0,82.0,63.0,69.0,19.9
max,3.76,6.4,13.0,97.0,78.0,83.0,4460.8
