## A more detailed analysis of the Divvy Chicago Bike Share data.

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

#### Loading the data.

In [3]:
df = pd.read_csv('202004-divvy-tripdata.csv')
df

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,A847FADBBC638E45,docked_bike,2020-04-26 17:45:14,2020-04-26 18:12:03,Eckhart Park,86,Lincoln Ave & Diversey Pkwy,152.0,41.8964,-87.6610,41.9322,-87.6586,member
1,5405B80E996FF60D,docked_bike,2020-04-17 17:08:54,2020-04-17 17:17:03,Drake Ave & Fullerton Ave,503,Kosciuszko Park,499.0,41.9244,-87.7154,41.9306,-87.7238,member
2,5DD24A79A4E006F4,docked_bike,2020-04-01 17:54:13,2020-04-01 18:08:36,McClurg Ct & Erie St,142,Indiana Ave & Roosevelt Rd,255.0,41.8945,-87.6179,41.8679,-87.6230,member
3,2A59BBDF5CDBA725,docked_bike,2020-04-07 12:50:19,2020-04-07 13:02:31,California Ave & Division St,216,Wood St & Augusta Blvd,657.0,41.9030,-87.6975,41.8992,-87.6722,member
4,27AD306C119C6158,docked_bike,2020-04-18 10:22:59,2020-04-18 11:15:54,Rush St & Hubbard St,125,Sheridan Rd & Lawrence Ave,323.0,41.8902,-87.6262,41.9695,-87.6547,casual
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84771,200E9CDFC5685AA0,docked_bike,2020-04-16 16:10:16,2020-04-16 16:23:11,Dearborn Pkwy & Delaware Pl,140,Dearborn Pkwy & Delaware Pl,140.0,41.8990,-87.6299,41.8990,-87.6299,member
84772,F58A8F2ABCB5D95B,docked_bike,2020-04-30 17:56:12,2020-04-30 18:15:21,Kimbark Ave & 53rd St,322,Cottage Grove Ave & 51st St,351.0,41.7996,-87.5947,41.8030,-87.6066,casual
84773,A3754693A80E4913,docked_bike,2020-04-24 19:57:33,2020-04-24 21:50:43,Sedgwick St & Schiller St,236,Wells St & Elm St,182.0,41.9076,-87.6386,41.9032,-87.6343,casual
84774,D610CABB67F7B744,docked_bike,2020-04-02 17:59:55,2020-04-02 18:42:26,Damen Ave & Charleston St,310,Damen Ave & Charleston St,310.0,41.9201,-87.6779,41.9201,-87.6779,casual


we currently won't work with the geographical points, and station names are also not needed. We will only analyze with numeric information for the time being.

#### Cleaning and Sorting

In [4]:
#deleting unnecessary columns
df = df.drop(['rideable_type','ride_id','start_lat','start_lng','end_lat','end_lng','start_station_name','end_station_name'], axis = 1)
df

Unnamed: 0,started_at,ended_at,start_station_id,end_station_id,member_casual
0,2020-04-26 17:45:14,2020-04-26 18:12:03,86,152.0,member
1,2020-04-17 17:08:54,2020-04-17 17:17:03,503,499.0,member
2,2020-04-01 17:54:13,2020-04-01 18:08:36,142,255.0,member
3,2020-04-07 12:50:19,2020-04-07 13:02:31,216,657.0,member
4,2020-04-18 10:22:59,2020-04-18 11:15:54,125,323.0,casual
...,...,...,...,...,...
84771,2020-04-16 16:10:16,2020-04-16 16:23:11,140,140.0,member
84772,2020-04-30 17:56:12,2020-04-30 18:15:21,322,351.0,casual
84773,2020-04-24 19:57:33,2020-04-24 21:50:43,236,182.0,casual
84774,2020-04-02 17:59:55,2020-04-02 18:42:26,310,310.0,casual


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84776 entries, 0 to 84775
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   started_at        84776 non-null  object 
 1   ended_at          84776 non-null  object 
 2   start_station_id  84776 non-null  int64  
 3   end_station_id    84677 non-null  float64
 4   member_casual     84776 non-null  object 
dtypes: float64(1), int64(1), object(3)
memory usage: 3.2+ MB


In [6]:
#we want to find ride duration so need to change to datetime format
df['started_at'] = df['started_at'].astype('datetime64')
df['ended_at'] = df['ended_at'].astype('datetime64')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84776 entries, 0 to 84775
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   started_at        84776 non-null  datetime64[ns]
 1   ended_at          84776 non-null  datetime64[ns]
 2   start_station_id  84776 non-null  int64         
 3   end_station_id    84677 non-null  float64       
 4   member_casual     84776 non-null  object        
dtypes: datetime64[ns](2), float64(1), int64(1), object(1)
memory usage: 3.2+ MB


In [7]:
df['duration'] = (df['ended_at'] - df['started_at'])
df.head(2)

Unnamed: 0,started_at,ended_at,start_station_id,end_station_id,member_casual,duration
0,2020-04-26 17:45:14,2020-04-26 18:12:03,86,152.0,member,0 days 00:26:49
1,2020-04-17 17:08:54,2020-04-17 17:17:03,503,499.0,member,0 days 00:08:09


In [8]:
#converting duration to seconds for easier manipulation
df['duration'] = df['duration'] / np.timedelta64(1, 's')
df.head(4)

Unnamed: 0,started_at,ended_at,start_station_id,end_station_id,member_casual,duration
0,2020-04-26 17:45:14,2020-04-26 18:12:03,86,152.0,member,1609.0
1,2020-04-17 17:08:54,2020-04-17 17:17:03,503,499.0,member,489.0
2,2020-04-01 17:54:13,2020-04-01 18:08:36,142,255.0,member,863.0
3,2020-04-07 12:50:19,2020-04-07 13:02:31,216,657.0,member,732.0


In [9]:
df['weekday'] = df['started_at'].dt.dayofweek
df

Unnamed: 0,started_at,ended_at,start_station_id,end_station_id,member_casual,duration,weekday
0,2020-04-26 17:45:14,2020-04-26 18:12:03,86,152.0,member,1609.0,6
1,2020-04-17 17:08:54,2020-04-17 17:17:03,503,499.0,member,489.0,4
2,2020-04-01 17:54:13,2020-04-01 18:08:36,142,255.0,member,863.0,2
3,2020-04-07 12:50:19,2020-04-07 13:02:31,216,657.0,member,732.0,1
4,2020-04-18 10:22:59,2020-04-18 11:15:54,125,323.0,casual,3175.0,5
...,...,...,...,...,...,...,...
84771,2020-04-16 16:10:16,2020-04-16 16:23:11,140,140.0,member,775.0,3
84772,2020-04-30 17:56:12,2020-04-30 18:15:21,322,351.0,casual,1149.0,3
84773,2020-04-24 19:57:33,2020-04-24 21:50:43,236,182.0,casual,6790.0,4
84774,2020-04-02 17:59:55,2020-04-02 18:42:26,310,310.0,casual,2551.0,3


In [10]:
#now for extracting month
df['Month'] = df['started_at'].dt.month
df

Unnamed: 0,started_at,ended_at,start_station_id,end_station_id,member_casual,duration,weekday,Month
0,2020-04-26 17:45:14,2020-04-26 18:12:03,86,152.0,member,1609.0,6,4
1,2020-04-17 17:08:54,2020-04-17 17:17:03,503,499.0,member,489.0,4,4
2,2020-04-01 17:54:13,2020-04-01 18:08:36,142,255.0,member,863.0,2,4
3,2020-04-07 12:50:19,2020-04-07 13:02:31,216,657.0,member,732.0,1,4
4,2020-04-18 10:22:59,2020-04-18 11:15:54,125,323.0,casual,3175.0,5,4
...,...,...,...,...,...,...,...,...
84771,2020-04-16 16:10:16,2020-04-16 16:23:11,140,140.0,member,775.0,3,4
84772,2020-04-30 17:56:12,2020-04-30 18:15:21,322,351.0,casual,1149.0,3,4
84773,2020-04-24 19:57:33,2020-04-24 21:50:43,236,182.0,casual,6790.0,4,4
84774,2020-04-02 17:59:55,2020-04-02 18:42:26,310,310.0,casual,2551.0,3,4


Now that we have all the useful information in our desired format we will delete the started at and ended at columns.

In [11]:
df = df.drop(['started_at','ended_at'],axis = 1)
df.info()
df

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 84776 entries, 0 to 84775
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   start_station_id  84776 non-null  int64  
 1   end_station_id    84677 non-null  float64
 2   member_casual     84776 non-null  object 
 3   duration          84776 non-null  float64
 4   weekday           84776 non-null  int64  
 5   Month             84776 non-null  int64  
dtypes: float64(2), int64(3), object(1)
memory usage: 3.9+ MB


Unnamed: 0,start_station_id,end_station_id,member_casual,duration,weekday,Month
0,86,152.0,member,1609.0,6,4
1,503,499.0,member,489.0,4,4
2,142,255.0,member,863.0,2,4
3,216,657.0,member,732.0,1,4
4,125,323.0,casual,3175.0,5,4
...,...,...,...,...,...,...
84771,140,140.0,member,775.0,3,4
84772,322,351.0,casual,1149.0,3,4
84773,236,182.0,casual,6790.0,4,4
84774,310,310.0,casual,2551.0,3,4


#### Saving for later use.

In [12]:
w0420 = df

In [13]:
w0420.to_csv('w0420.csv')

checking null values

In [14]:
df.isnull().sum()

start_station_id     0
end_station_id      99
member_casual        0
duration             0
weekday              0
Month                0
dtype: int64

In [51]:
df1 = df[df.isna().any(axis=1)]
df1

Unnamed: 0,start_station_id,end_station_id,member_casual,duration,weekday,Month
1001,289,,member,2127.0,1,4
1864,343,,member,298.0,0,4
2167,15,,member,10312.0,3,4
2458,137,,casual,3669.0,3,4
3834,157,,member,1552.0,5,4
...,...,...,...,...,...,...
83383,21,,casual,34774.0,2,4
83881,535,,member,1877.0,0,4
84306,232,,member,1876.0,3,4
84415,539,,member,15.0,0,4


In [56]:
df1.loc[(df1['start_station_id'] == 535)]

Unnamed: 0,start_station_id,end_station_id,member_casual,duration,weekday,Month
83881,535,,member,1877.0,0,4


Lets leave the NA values out of the analysis.

Filtering out the annual members.

In [59]:
df1 = df.loc[(df['member_casual'] == 'member')]
df1

Unnamed: 0,start_station_id,end_station_id,member_casual,duration,weekday,Month
0,86,152.0,member,1609.0,6,4
1,503,499.0,member,489.0,4,4
2,142,255.0,member,863.0,2,4
3,216,657.0,member,732.0,1,4
5,173,35.0,member,324.0,3,4
...,...,...,...,...,...,...
84767,126,106.0,member,482.0,6,4
84768,268,141.0,member,2129.0,6,4
84769,268,268.0,member,8.0,6,4
84770,106,268.0,member,425.0,6,4


#### Starting the analysis.

In [60]:
df1.groupby(['weekday']).sum().sort_values('duration', ascending = False)

Unnamed: 0_level_0,start_station_id,end_station_id,duration,Month
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
6,2763800,2750126.0,18349524.0,45760
5,2071231,2089704.0,13752484.0,35368
1,2142010,2128850.0,11931316.0,36628
3,2148687,2126061.0,11114134.0,37056
0,1871893,1881569.0,8615312.0,32256
4,1731916,1729498.0,8227987.0,29848
2,1592413,1589254.0,6770926.0,27676


The duration column in above table tells us that Sunday was the busiest day for cycling trips and Wednesday was the least busy. Note: Disregard the other columns they don't mean anything in this table.

In [71]:
df1.groupby(['weekday']).count().sort_values('duration', ascending = False)

Unnamed: 0_level_0,start_station_id,end_station_id,member_casual,duration,Month
weekday,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,11440,11429,11440,11440,11440
3,9264,9256,9264,9264,9264
1,9157,9146,9157,9157,9157
5,8842,8833,8842,8842,8842
0,8064,8055,8064,8064,8064
4,7462,7454,7462,7462,7462
2,6919,6916,6919,6919,6919


The duration column in this table tells us the number of bike trips made on every day. We can see that after Sunday, the most bike trips were made on Thursday, followed by Tuesday and then Saturday. 

So we know now that people tend to take longer rides on weekends. Note also that in both the tables, sorted by trip duration and number of rides respectively, the top 4 spots are taken by the same days i.e. Sunday, Saturday, Tuesday and Thursday. 

Interesting...no?

Note: Again, the other columns mean nothing in this instance so ignore them.

Now we want to know the sum of duration for each station from where bike was rented.

In [86]:
#pivot table showing sum of trip duration per start_station_id and in the columns we have every day of the week starting from Monday.
p3 = pd.pivot_table(df1, values='duration', index='start_station_id', columns='weekday', aggfunc='sum', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False) 
p3

weekday,0,1,2,3,4,5,6
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2,29473.0,30674.0,12063.0,16854.0,19330.0,106368.0,104923.0
3,931.0,504.0,,2361.0,,929.0,4828.0
4,,,,,694.0,,
5,11652.0,14910.0,9072.0,8850.0,10840.0,6512.0,12563.0
6,,3028.0,,1217.0,1635.0,,
...,...,...,...,...,...,...,...
663,5059.0,10206.0,6950.0,6870.0,2494.0,5309.0,8265.0
664,3595.0,3311.0,5623.0,6346.0,5626.0,,1164.0
666,,1232.0,372.0,1103.0,3901.0,8963.0,9795.0
672,23086.0,25409.0,16188.0,28893.0,48298.0,32312.0,30698.0


In [82]:
p1 = pd.pivot_table(df1, values='duration', index='start_station_id', columns=None , aggfunc='sum') 

In [139]:
#sorting from longest duration
long_dur = p1.sort_values(ascending = False, by = 'duration')
long_dur

Unnamed: 0_level_0,duration
start_station_id,Unnamed: 1_level_1
194,3643342.0
642,3252788.0
17,2322205.0
138,753011.0
282,672609.0
...,...
583,783.0
4,694.0
445,632.0
588,532.0


This table tells us that station id 194 holds the esteemed position of being the station of choice from where the cumulatively longest rides were booked. Would be good to single out the top ten or so stations in this for further analysis. 

Now lets arrange this table via count. And then check again.

In [140]:
p2 = pd.pivot_table(df1, values='duration', index='start_station_id', columns=None , aggfunc='count') 
most_rides = p2.sort_values(ascending = False, by = 'duration')
most_rides

Unnamed: 0_level_0,duration
start_station_id,Unnamed: 1_level_1
176,604
211,593
110,545
56,516
94,451
...,...
588,1
651,1
562,1
649,1


Here the numbers tell a different story. Maybe the stations from where the longest duration rides were issued were on the outskirts of the city or far away from the business districts, while the number of rides from these stations tells us that they were near, or in the business districts itself. All of this is conjecture, yet.

Lets compare these numbers via days also. Maybe it will give a new angle to these observations.

In [93]:
#p3 is the pivot table we called earlier. Aggregating by sum of duration and sorting by station name and weekdays.
p3.sort_values(ascending = False, by = 6)

weekday,0,1,2,3,4,5,6
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
194,14401.0,14975.0,18665.0,18707.0,9143.0,25024.0,3542427.0
118,27099.0,35997.0,18850.0,23390.0,18135.0,41220.0,248749.0
324,46911.0,66067.0,30143.0,48138.0,37292.0,75982.0,200694.0
330,4171.0,10969.0,591.0,9565.0,3913.0,5535.0,196377.0
316,20128.0,9462.0,8008.0,7921.0,1598.0,15523.0,190803.0
...,...,...,...,...,...,...,...
590,,,,,,3859.0,
593,7564.0,,,,2091.0,1446.0,
639,2085.0,,,,,,
650,836.0,,,2308.0,,1118.0,


Turns out we can only sort via individual day. No problem, we know sunday was the busiest day. So we sorted by Sunday and as we can see the station id on top looks familiar, doesn't it? Yes its the same station from which the longest rides were started.

Changing the weekday in the code will tell us which station issued the cumulatively longest rides on that day.

Lets play some more with the pivot table.

In [119]:
p4 = pd.pivot_table(df1, values='duration', index=['start_station_id'], columns='end_station_id', aggfunc='sum', fill_value=None, margins=False, dropna=True, margins_name='All', observed=False)
p4

end_station_id,2.0,3.0,5.0,6.0,7.0,9.0,11.0,12.0,13.0,14.0,...,659.0,660.0,661.0,662.0,663.0,664.0,666.0,671.0,672.0,673.0
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,18594.0,,1573.0,,3878.0,,,,,,...,2048.0,,,,,,,,,
3,504.0,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,8221.0,,,,,,,,...,,,,,,,,,,
6,,,,235.0,1554.0,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663,,,,,,,,,,,...,,,,,32845.0,,,,,
664,,,,,,,,,,,...,,,,,,2694.0,,,,
666,,,,,,,,,1795.0,,...,2878.0,,,,,,,,,
672,,,,,,,,,,,...,,,,,,,,,30794.0,876.0


In [130]:
df194 = p4.iloc[175:176]
df194[df194.columns[~df194.isnull().all()]]

end_station_id,7.0,17.0,18.0,24.0,25.0,26.0,28.0,31.0,33.0,35.0,...,301.0,321.0,329.0,394.0,442.0,623.0,627.0,671.0,672.0,673.0
start_station_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
194,1101.0,1625.0,316.0,4793.0,2332.0,4259.0,2167.0,1286.0,2064.0,961.0,...,4167.0,1540.0,3503.0,661.0,2948.0,2033.0,395.0,3523202.0,692.0,3617.0


In [134]:
df194 = df194.transpose()
df194

start_station_id,194
end_station_id,Unnamed: 1_level_1
2.0,
3.0,
5.0,
6.0,
7.0,1101.0
...,...
664.0,
666.0,
671.0,3523202.0
672.0,692.0


In [138]:
df194 = df194.dropna()
df194.sort_values(ascending = False, by = 194)

start_station_id,194
end_station_id,Unnamed: 1_level_1
671.0,3523202.0
194.0,7939.0
176.0,6862.0
220.0,5992.0
144.0,5338.0
24.0,4793.0
255.0,4578.0
199.0,4420.0
26.0,4259.0
301.0,4167.0


This tells us that the most rides or the longest duration rides from station 194 ended their journey at station 671. If we do this for the ten busiest stations according to count and according to cumulative trip duration we can find some interesting patterns.

So now we will seperate out the five busiest stations in the long_dur and most_rides tables, and then find out their most sought after destination station, like we just did above for station id 194.

##### Platform Nine and three quarters?

In [144]:
long_dur

Unnamed: 0_level_0,duration
start_station_id,Unnamed: 1_level_1
194,3643342.0
642,3252788.0
17,2322205.0
138,753011.0
282,672609.0
...,...
583,783.0
4,694.0
445,632.0
588,532.0


So we will find for station id 642, 17, 138 and 282.

In [151]:
df17 = p4.iloc[13:14]
df17 = df17.transpose()
df17 = df17.dropna()
df17 = df17.sort_values(ascending = False, by = 17)
df17

start_station_id,17
end_station_id,Unnamed: 1_level_1
658.0,2169922.0
17.0,18342.0
236.0,6663.0
622.0,6649.0
128.0,6290.0
...,...
16.0,407.0
374.0,401.0
29.0,293.0
30.0,256.0


Most visited station from station id 17 = station id 658

Now for station id 138.

In [161]:
df138 = p4.iloc[124:125]
df138 = df138.transpose()
df138 = df138.dropna()
df138 = df138.sort_values(ascending = False, by = 138)
df138

start_station_id,138
end_station_id,Unnamed: 1_level_1
211.0,631560.0
133.0,6378.0
115.0,5565.0
635.0,5517.0
289.0,5211.0
...,...
27.0,597.0
94.0,441.0
53.0,388.0
140.0,377.0


Most visited station from station id 138 = station id 211.

In [166]:
df283 = p4.iloc[259:260]
df283 = df283.transpose()
df283 = df283.dropna()
df283 = df283.sort_values(ascending = False, by = 283)
df283

start_station_id,283
end_station_id,Unnamed: 1_level_1
42.0,11752.0
492.0,10376.0
74.0,9853.0
211.0,7567.0
23.0,4443.0
156.0,4208.0
289.0,4143.0
292.0,3285.0
627.0,2960.0
283.0,2636.0


Hmm..this information doesn't tell much. There is no special destination logging way more than the other stations in duration. So we will not go forward with this station's destination.

Now the same exercise for stations with most_rides.

In [168]:
most_rides

Unnamed: 0_level_0,duration
start_station_id,Unnamed: 1_level_1
176,604
211,593
110,545
56,516
94,451
...,...
588,1
651,1
562,1
649,1


Finding hot destinations for station id's 176,211,110,56 and 94.

In [177]:
temp = p4.iloc[259:260]
def hot(stid):
    global temp
    temp = temp.transpose()
    temp = temp.dropna()
    temp = temp.sort_values(ascending = False, by = stid)
    return temp
df283 = hot(283)
df283.head()
    


start_station_id,283
end_station_id,Unnamed: 1_level_1
42.0,11752.0
492.0,10376.0
74.0,9853.0
211.0,7567.0
23.0,4443.0


In [182]:
temp = p4.iloc[159:160]
df176 = hot(176)
df176.head()

start_station_id,176
end_station_id,Unnamed: 1_level_1
176.0,73812.0
364.0,20395.0
199.0,16235.0
77.0,15044.0
180.0,14466.0


Most visited station from station id 176 = station id 176.

In [188]:
temp = p4.iloc[192:193]
df211 = hot(211)
df211.head()

start_station_id,211
end_station_id,Unnamed: 1_level_1
211.0,47118.0
91.0,32599.0
255.0,22971.0
127.0,17875.0
289.0,16471.0


Not significant.

In [191]:
temp = p4.iloc[96:97]
df110 = hot(110)
df110.head()

start_station_id,110
end_station_id,Unnamed: 1_level_1
110.0,109182.0
199.0,21364.0
117.0,19607.0
59.0,17184.0
166.0,15576.0


Most visited station from station id 110 = station id 110.

In [194]:
temp = p4.iloc[52:53]
df56 = hot(56)
df56.head()

start_station_id,56
end_station_id,Unnamed: 1_level_1
56.0,53773.0
84.0,17726.0
210.0,16086.0
173.0,12197.0
657.0,10948.0


Most visited station from station id 56 = station id 56.

In [196]:
temp = p4.iloc[82:83]
df94 = hot(94)
df94.head()

start_station_id,94
end_station_id,Unnamed: 1_level_1
94.0,46839.0
176.0,26067.0
156.0,18112.0
211.0,9428.0
288.0,8822.0


Most visited station from station id 94 = station id 94.

For the stations with most rides, the most popular destination stations were the same as the starting stations.