# Data Cleaning and Consistency Checks

## Table of Contents
### 01. Importing Libraries and Data
### 02. Data Cleaning and Consistency Checks
#### Missing Values
#### Duplicates
#### Mixed-type Data
#### Renaming Columns
#### Checking Data Types
### 03. Descriptive Statistics
### 04. Adding New Variables
### 05. Dropping Columns

## 01. Importing Libraries and Data

In [54]:
#importing libaries
import pandas as pd
import numpy as np
import os

In [56]:
#importing data
#Creating path:
path = r'C:\Users\canam\Achievement 6\Citibike Analysis'

In [58]:
#importing data
df = pd.read_csv(os.path.join(path, '02 Data', 'Original Data', 'citibike.csv'), index_col = False)

In [61]:
#checking shape of the data
df.shape

(50000, 18)

## Data Cleaning and Consistency Checks

### Missing Values

In [64]:
#finding missing values
df.isnull().sum()

trip_id                       0
bike_id                       0
weekday                       0
start_hour                    0
start_time                    0
start_station_id              0
start_station_name            0
start_station_latitude        0
start_station_longitude       0
end_time                      0
end_station_id                0
end_station_name              0
end_station_latitude          0
end_station_longitude         0
trip_duration                 0
subscriber                    0
birth_year                 6979
gender                        0
dtype: int64

In [66]:
#There are 6979 rows with birth year missing.  Removing these would mean removing more than 5% of data so I will use mean imputation.  

In [68]:
#imputing missing birth_year values with mean
df['birth_year_mean'] = df['birth_year'].fillna(df['birth_year'].mean())

In [70]:
#checking to see that missing values were imputed 
df.isnull().sum()

trip_id                       0
bike_id                       0
weekday                       0
start_hour                    0
start_time                    0
start_station_id              0
start_station_name            0
start_station_latitude        0
start_station_longitude       0
end_time                      0
end_station_id                0
end_station_name              0
end_station_latitude          0
end_station_longitude         0
trip_duration                 0
subscriber                    0
birth_year                 6979
gender                        0
birth_year_mean               0
dtype: int64

In [74]:
#checking new column birth_year_mean
df.head(5)

Unnamed: 0,trip_id,bike_id,weekday,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,birth_year,gender,birth_year_mean
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,1968.0,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1983.0,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1989.0,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,1988.0,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,,0,1975.627786


In [76]:
#getting rid of the birth_year column with missing values 

In [78]:
df=df.drop(columns = ['birth_year'])

In [80]:
#checking to see if birth_year column has been dropped
df.head(5)

Unnamed: 0,trip_id,bike_id,weekday,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year_mean
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786


In [82]:
#renaming birth_year_mean to birth_year
df.rename(columns = {'birth_year_mean' : 'birth_year'}, inplace = True)

In [86]:
#cheking to see that the column was renamed
df.head()

Unnamed: 0,trip_id,bike_id,weekday,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786


### Duplicates

In [89]:
#looking for duplicates
df_dups = df[df.duplicated()]

In [91]:
df_dups

Unnamed: 0,trip_id,bike_id,weekday,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year


In [93]:
##there are no duplicates

### Mixed-type data

In [96]:
##checking for mixed-type data
for col in df.columns.tolist():
  weird = (df[[col]].map(type) != df[[col]].iloc[0].apply(type)).any(axis = 1)
  if len (df[weird]) > 0:
    print (col)

In [98]:
##there are no columns with mixed data 

### Renaming Columns

In [101]:
#checking columns for things that need to be renamed 
df.head(5)

Unnamed: 0,trip_id,bike_id,weekday,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786


In [103]:
#changing weekday to day_of_week
df.rename(columns = {'weekday' : 'day_of_week'}, inplace = True)

In [105]:
#checking to see if it has been changed
df.head()

Unnamed: 0,trip_id,bike_id,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786


### Checking Data Types

In [107]:
df.dtypes

trip_id                     object
bike_id                      int64
day_of_week                 object
start_hour                   int64
start_time                  object
start_station_id             int64
start_station_name          object
start_station_latitude     float64
start_station_longitude    float64
end_time                    object
end_station_id               int64
end_station_name            object
end_station_latitude       float64
end_station_longitude      float64
trip_duration                int64
subscriber                  object
gender                       int64
birth_year                 float64
dtype: object

In [111]:
#Need to make the following changes:
#bike_id to string
#start_time into datetime
#start_station_id into string
#end_time into datetime
#end_station_id into string
#gender into string 

In [113]:
#converting bike_id into string
df['bike_id'] = df['bike_id'].astype('str')

In [115]:
#converting start_time into datetime 
df['start_time'] = pd.to_datetime(df['start_time'])

In [117]:
#converting start_station_id into string
df['start_station_id'] = df['start_station_id'].astype('str')

In [119]:
#converting end_time to datetime
df['end_time'] = pd.to_datetime(df['end_time'])

In [121]:
#converting end_station_id into string
df['end_station_id'] = df['end_station_id'].astype('str')

In [123]:
#converting gender into string
df['gender'] = df['gender'].astype('str')

In [125]:
#checking to see that data types were changed 
df.dtypes

trip_id                            object
bike_id                            object
day_of_week                        object
start_hour                          int64
start_time                 datetime64[ns]
start_station_id                   object
start_station_name                 object
start_station_latitude            float64
start_station_longitude           float64
end_time                   datetime64[ns]
end_station_id                     object
end_station_name                   object
end_station_latitude              float64
end_station_longitude             float64
trip_duration                       int64
subscriber                         object
gender                             object
birth_year                        float64
dtype: object

### Descriptive Statistics

In [133]:
#gathering descriptive statistics
df.describe()

Unnamed: 0,start_hour,start_time,start_station_latitude,start_station_longitude,end_time,end_station_latitude,end_station_longitude,trip_duration,birth_year
count,50000.0,50000,50000.0,50000.0,50000,50000.0,50000.0,50000.0,50000.0
mean,14.14524,2013-09-16 14:31:33.819680,40.73417,-73.991109,2013-09-16 14:46:46.003320064,40.733859,-73.991351,838.9829,1975.627786
min,0.0,2013-09-01 00:00:35,40.680342,-74.017134,2013-09-01 00:07:15,40.680342,-74.017134,60.0,1899.0
25%,10.0,2013-09-09 10:25:46.750000128,40.720196,-74.000271,2013-09-09 10:40:52.249999872,40.720196,-74.001547,417.0,1970.0
50%,15.0,2013-09-16 18:54:12.500000,40.735877,-73.990765,2013-09-16 19:11:10,40.735354,-73.991218,672.0,1975.627786
75%,18.0,2013-09-23 21:56:02.750000128,40.75002,-73.981923,2013-09-23 22:06:59,40.749013,-73.98205,1112.0,1983.0
max,23.0,2013-09-30 23:58:17,40.770513,-73.950048,2013-10-01 00:15:29,40.770513,-73.950048,2697.0,1997.0
std,4.860541,,0.019911,0.012555,,0.019885,0.012569,573.663997,10.28601


In [135]:
#the minimum birth year is 1899, which is highly unlikely.  

In [137]:
#imputing mean where birth_year < 1913 (making the cutoff age 100 and the data is from 2013) 
df.loc[df['birth_year'] < 1913, 'birth_year'] = 1975.627786 

In [139]:
#checking descriptive statistics again
df.describe()

Unnamed: 0,start_hour,start_time,start_station_latitude,start_station_longitude,end_time,end_station_latitude,end_station_longitude,trip_duration,birth_year
count,50000.0,50000,50000.0,50000.0,50000,50000.0,50000.0,50000.0,50000.0
mean,14.14524,2013-09-16 14:31:33.819680,40.73417,-73.991109,2013-09-16 14:46:46.003320064,40.733859,-73.991351,838.9829,1975.662435
min,0.0,2013-09-01 00:00:35,40.680342,-74.017134,2013-09-01 00:07:15,40.680342,-74.017134,60.0,1917.0
25%,10.0,2013-09-09 10:25:46.750000128,40.720196,-74.000271,2013-09-09 10:40:52.249999872,40.720196,-74.001547,417.0,1970.0
50%,15.0,2013-09-16 18:54:12.500000,40.735877,-73.990765,2013-09-16 19:11:10,40.735354,-73.991218,672.0,1975.627786
75%,18.0,2013-09-23 21:56:02.750000128,40.75002,-73.981923,2013-09-23 22:06:59,40.749013,-73.98205,1112.0,1983.0
max,23.0,2013-09-30 23:58:17,40.770513,-73.950048,2013-10-01 00:15:29,40.770513,-73.950048,2697.0,1997.0
std,4.860541,,0.019911,0.012555,,0.019885,0.012569,573.663997,10.158183


In [None]:
#now the minimum birth_year is 1917

In [141]:
df.head()

Unnamed: 0,trip_id,bike_id,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786


### Creating new variables

In [154]:
#creating an age variable
df['Age']= 2013 - df['birth_year']

In [156]:
#checking to see if it was added
df.head()

Unnamed: 0,trip_id,bike_id,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year,Age
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0,45.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0,30.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0,24.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0,25.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786,37.372214


In [158]:
#renaming Age column to be consistent with others 
df.rename(columns = {'Age' : 'age'}, inplace = True)

In [160]:
#checking to see that it was renamed
df.head()

Unnamed: 0,trip_id,bike_id,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year,age
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0,45.0
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0,30.0
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0,24.0
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0,25.0
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786,37.372214


In [166]:
#creating a column for trip duration in minutes
df['trip_duration_mins']= df['trip_duration']/60

In [168]:
#checking to see if trip_duration_mins was added
df.head()

Unnamed: 0,trip_id,bike_id,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,trip_duration,subscriber,gender,birth_year,age,trip_duration_mins
0,LnQzQk,16013,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,993,Subscriber,2,1968.0,45.0,16.55
1,IL9boN,15230,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,581,Subscriber,1,1983.0,30.0,9.683333
2,46clGB,17942,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,361,Subscriber,1,1989.0,24.0,6.016667
3,v7vdFt,19683,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,561,Subscriber,2,1988.0,25.0,9.35
4,VGBsb5,18024,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,2296,Non-Subscriber,0,1975.627786,37.372214,38.266667


 ### Dropping Columns

In [190]:
#dropping columns that are not needed in our analysis
df = df.drop(['trip_id', 'bike_id', 'trip_duration', 'birth_year'], axis=1)

In [194]:
#checking to see that columns have been dropped
df.head()

Unnamed: 0,day_of_week,start_hour,start_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_time,end_station_id,end_station_name,end_station_latitude,end_station_longitude,subscriber,gender,age,trip_duration_mins
0,Mon,18,2013-09-09 18:18:55,523,W 38 St & 8 Ave,40.754666,-73.991382,2013-09-09 18:35:28,334,W 20 St & 7 Ave,40.742388,-73.997262,Subscriber,2,45.0,16.55
1,Thu,18,2013-09-12 18:38:53,257,Lispenard St & Broadway,40.719392,-74.002472,2013-09-12 18:48:34,236,St Marks Pl & 2 Ave,40.728419,-73.98714,Subscriber,1,30.0,9.683333
2,Wed,19,2013-09-18 19:44:04,479,9 Ave & W 45 St,40.760193,-73.991255,2013-09-18 19:50:05,513,W 56 St & 10 Ave,40.768254,-73.988639,Subscriber,1,24.0,6.016667
3,Sat,11,2013-09-28 11:54:37,527,E 33 St & 1 Ave,40.743156,-73.974347,2013-09-28 12:03:58,441,E 52 St & 2 Ave,40.756014,-73.967416,Subscriber,2,25.0,9.35
4,Sat,18,2013-09-07 18:08:22,521,8 Ave & W 31 St,40.75045,-73.994811,2013-09-07 18:46:38,476,E 31 St & 3 Ave,40.743943,-73.979661,Non-Subscriber,0,37.372214,38.266667


In [196]:
df.to_csv(os.path.join(path, '02 Data','Prepared Data', 'citibike_cleaned.csv'))