In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
df = pd.read_csv('bikedata.csv')
df.head(10)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,C809ED75D6160B2A,electric_bike,2021-05-30 11:58:15,2021-05-30 12:10:39,,,,,41.9,-87.63,41.89,-87.61,casual
1,DD59FDCE0ACACAF3,electric_bike,2021-05-30 11:29:14,2021-05-30 12:14:09,,,,,41.88,-87.62,41.79,-87.58,casual
2,0AB83CB88C43EFC2,electric_bike,2021-05-30 14:24:01,2021-05-30 14:25:13,,,,,41.92,-87.7,41.92,-87.7,casual
3,7881AC6D39110C60,electric_bike,2021-05-30 14:25:51,2021-05-30 14:41:04,,,,,41.92,-87.7,41.94,-87.69,casual
4,853FA701B4582BAF,electric_bike,2021-05-30 18:15:39,2021-05-30 18:22:32,,,,,41.94,-87.69,41.94,-87.7,casual
5,F5E63DFD96B2A737,electric_bike,2021-05-30 11:33:41,2021-05-30 11:57:17,,,,,41.88,-87.63,41.89,-87.62,casual
6,C884951E36656727,electric_bike,2021-05-30 10:51:37,2021-05-30 11:06:20,,,,,41.93,-87.7,41.9,-87.69,casual
7,48B60B250FE75AF9,electric_bike,2021-05-05 13:57:03,2021-05-05 14:14:58,,,,,41.78,-87.64,41.78,-87.64,casual
8,E3D0CC2FE1359880,electric_bike,2021-05-05 11:31:26,2021-05-05 11:34:03,,,,,41.78,-87.64,41.78,-87.64,casual
9,4382735758ABF2CE,electric_bike,2021-05-04 19:51:05,2021-05-04 20:17:26,,,,,41.78,-87.64,41.78,-87.64,casual


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531633 entries, 0 to 531632
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   ride_id             531633 non-null  object 
 1   rideable_type       531633 non-null  object 
 2   started_at          531633 non-null  object 
 3   ended_at            531633 non-null  object 
 4   start_station_name  477889 non-null  object 
 5   start_station_id    477889 non-null  object 
 6   end_station_name    473439 non-null  object 
 7   end_station_id      473439 non-null  object 
 8   start_lat           531633 non-null  float64
 9   start_lng           531633 non-null  float64
 10  end_lat             531181 non-null  float64
 11  end_lng             531181 non-null  float64
 12  member_casual       531633 non-null  object 
dtypes: float64(4), object(9)
memory usage: 52.7+ MB


In [4]:
#Combine latitude and longitude into new column
df['start_coord'] = df['start_lat'].astype(str) + ', ' + df['start_lng'].astype(str)
df['end_coord'] = df['end_lat'].astype(str) + ', ' + df['end_lng'].astype(str)

#Drop old latitude and longitude columns
columnstodrop = ['start_lat', 'start_lng', 'end_lat', 'end_lng']
df.drop(columnstodrop, axis=1, inplace=True)
df.head(5)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,member_casual,start_coord,end_coord
0,C809ED75D6160B2A,electric_bike,2021-05-30 11:58:15,2021-05-30 12:10:39,,,,,casual,"41.9, -87.63","41.89, -87.61"
1,DD59FDCE0ACACAF3,electric_bike,2021-05-30 11:29:14,2021-05-30 12:14:09,,,,,casual,"41.88, -87.62","41.79, -87.58"
2,0AB83CB88C43EFC2,electric_bike,2021-05-30 14:24:01,2021-05-30 14:25:13,,,,,casual,"41.92, -87.7","41.92, -87.7"
3,7881AC6D39110C60,electric_bike,2021-05-30 14:25:51,2021-05-30 14:41:04,,,,,casual,"41.92, -87.7","41.94, -87.69"
4,853FA701B4582BAF,electric_bike,2021-05-30 18:15:39,2021-05-30 18:22:32,,,,,casual,"41.94, -87.69","41.94, -87.7"


In [5]:
#Change start and end time to datetime data type and create new column with duration. 
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])
df['duration'] = df['ended_at'] - df['started_at']
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 531633 entries, 0 to 531632
Data columns (total 11 columns):
 #   Column              Non-Null Count   Dtype         
---  ------              --------------   -----         
 0   ride_id             531633 non-null  object        
 1   rideable_type       531633 non-null  object        
 2   started_at          531633 non-null  datetime64[ns]
 3   ended_at            531633 non-null  datetime64[ns]
 4   start_station_name  477889 non-null  object        
 5   start_station_id    477889 non-null  object        
 6   end_station_name    473439 non-null  object        
 7   end_station_id      473439 non-null  object        
 8   member_casual       531633 non-null  object        
 9   start_coord         531633 non-null  object        
 10  end_coord           531633 non-null  object        
dtypes: datetime64[ns](2), object(9)
memory usage: 44.6+ MB


In [6]:
df['duration'] = df['ended_at'] - df['started_at']
df.head(5)

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,member_casual,start_coord,end_coord,duration
0,C809ED75D6160B2A,electric_bike,2021-05-30 11:58:15,2021-05-30 12:10:39,,,,,casual,"41.9, -87.63","41.89, -87.61",0 days 00:12:24
1,DD59FDCE0ACACAF3,electric_bike,2021-05-30 11:29:14,2021-05-30 12:14:09,,,,,casual,"41.88, -87.62","41.79, -87.58",0 days 00:44:55
2,0AB83CB88C43EFC2,electric_bike,2021-05-30 14:24:01,2021-05-30 14:25:13,,,,,casual,"41.92, -87.7","41.92, -87.7",0 days 00:01:12
3,7881AC6D39110C60,electric_bike,2021-05-30 14:25:51,2021-05-30 14:41:04,,,,,casual,"41.92, -87.7","41.94, -87.69",0 days 00:15:13
4,853FA701B4582BAF,electric_bike,2021-05-30 18:15:39,2021-05-30 18:22:32,,,,,casual,"41.94, -87.69","41.94, -87.7",0 days 00:06:53


In [7]:
df2 = df[df['start_coord'] == df['end_coord']]
df2['start_coord'].value_counts()

41.892278, -87.612043                     1560
41.880958, -87.616743                     1114
41.8810317, -87.62408432                   522
41.963982, -87.638181                      521
41.926277, -87.630834                      490
                                          ... 
41.84198816666667, -87.61695566666667        1
41.786691, -87.655859                        1
41.924078, -87.635939                        1
41.831098833333336, -87.62695233333334       1
42.01, -87.72                                1
Name: start_coord, Length: 1261, dtype: int64

In [15]:
df2[~df2['start_station_name'].isnull()]

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,member_casual,start_coord,end_coord
25,0F3AE375DEC608D9,classic_bike,2021-05-02 08:21:19,2021-05-02 09:20:46,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,casual,"41.90096039, -87.62377664","41.90096039, -87.62377664"
73,344011882FBBB8BD,classic_bike,2021-05-31 07:35:08,2021-05-31 08:33:45,Sheridan Rd & Noyes St (NU),604,Sheridan Rd & Noyes St (NU),604,casual,"42.058239, -87.677432","42.058239, -87.677432"
119,BF99592737266B31,classic_bike,2021-05-07 06:03:21,2021-05-07 07:12:20,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,casual,"41.90096039, -87.62377664","41.90096039, -87.62377664"
127,059C6EFD3DA52B3C,classic_bike,2021-05-19 18:04:07,2021-05-19 18:51:41,Sheridan Rd & Noyes St (NU),604,Sheridan Rd & Noyes St (NU),604,casual,"42.058239, -87.677432","42.058239, -87.677432"
181,13576FA2EB822C12,classic_bike,2021-05-24 17:59:36,2021-05-24 18:01:07,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,casual,"41.90096039, -87.62377664","41.90096039, -87.62377664"
...,...,...,...,...,...,...,...,...,...,...,...
528024,5B2A425D80D52ECE,docked_bike,2021-05-23 11:50:39,2021-05-23 12:56:32,Sheridan Rd & Noyes St (NU),604,Sheridan Rd & Noyes St (NU),604,casual,"42.058239, -87.677432","42.058239, -87.677432"
528117,337FA0571DD14142,docked_bike,2021-05-21 12:22:11,2021-05-21 13:16:27,Cornell Dr & Hayes Dr,653,Cornell Dr & Hayes Dr,653,casual,"41.780595, -87.584846","41.780595, -87.584846"
528127,8DA0B7A0F1AE799F,docked_bike,2021-05-30 14:32:16,2021-05-30 15:51:16,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,casual,"41.90096, -87.623777","41.90096, -87.623777"
528128,A1C628886A823C95,docked_bike,2021-05-30 09:47:48,2021-05-30 09:49:31,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,casual,"41.90096, -87.623777","41.90096, -87.623777"
