In [5]:
# Import Dependencies
import pandas as pd
import numpy as np
import sqlite3
from sqlalchemy import create_engine
engine = create_engine('sqlite:///data/hist_trips.sqlite', echo=False)

In [6]:
# Capture all data from the hist_trips table inside hist_trips.sqlite
riders_tbl = pd.read_sql("SELECT * FROM hist_trips", con=engine)
riders_tbl.head()

Unnamed: 0,index,trip_id,start_time,end_time,tripduration,from_station_id,to_station_id,usertype,gender,birthyear
0,0,22178529,4/1/2019,4/1/2019,446.0,81,56,Subscriber,Male,5/28/2005
1,1,22178530,4/1/2019,4/1/2019,1048.0,317,59,Subscriber,Female,6/6/2005
2,2,22178531,4/1/2019,4/1/2019,252.0,283,174,Subscriber,Male,6/12/2005
3,3,22178532,4/1/2019,4/1/2019,357.0,26,133,Subscriber,Male,6/15/2005
4,4,22178533,4/1/2019,4/1/2019,1007.0,202,129,Subscriber,Male,6/14/2005


In [7]:
# Capture alld data from the stations table inside hist_trips.sqlite
stations_tbl = pd.read_sql("SELECT * FROM stations", con=engine)
stations_tbl.head()

Unnamed: 0,index,ID,Station Name,Address,Total Docks,Docks in Service,Status,Latitude,Longitude,Location
0,0,102,Stony Island Ave & 67th St,,11,11,In Service,41.773459,-87.58534,"(41.77345849948, -87.5853397391)"
1,1,103,Clinton St & Polk St,,15,15,In Service,41.871467,-87.640949,"(41.87146651779, -87.6409491327)"
2,2,11,Jeffery Blvd & 71st St,,11,11,In Service,41.766638,-87.57645,"(41.76663823695, -87.5764501141)"
3,3,12,South Shore Dr & 71st St,,15,15,In Service,41.766409,-87.565688,"(41.7664094567, -87.5656875719)"
4,4,132,Wentworth Ave & 24th St (Temp),,15,15,In Service,41.850084,-87.632141,"(41.85008369347577, -87.6321405172348)"


In [8]:
# Join station name on from station id
from_station_name = pd.merge(riders_tbl, stations_tbl, left_on='from_station_id', right_on='ID', how='left')
from_station_name.head()

Unnamed: 0,index_x,trip_id,start_time,end_time,tripduration,from_station_id,to_station_id,usertype,gender,birthyear,index_y,ID,Station Name,Address,Total Docks,Docks in Service,Status,Latitude,Longitude,Location
0,0,22178529,4/1/2019,4/1/2019,446.0,81,56,Subscriber,Male,5/28/2005,333.0,81.0,Daley Center Plaza,,39.0,39.0,In Service,41.884241,-87.629634,"(41.884241, -87.629634)"
1,1,22178530,4/1/2019,4/1/2019,1048.0,317,59,Subscriber,Female,6/6/2005,292.0,317.0,Wood St & Taylor St,,23.0,23.0,In Service,41.869154,-87.671045,"(41.869154, -87.671045)"
2,2,22178531,4/1/2019,4/1/2019,252.0,283,174,Subscriber,Male,6/12/2005,378.0,283.0,LaSalle St & Jackson Blvd,,35.0,35.0,In Service,41.878166,-87.631929,"(41.878166, -87.631929)"
3,3,22178532,4/1/2019,4/1/2019,357.0,26,133,Subscriber,Male,6/15/2005,458.0,26.0,McClurg Ct & Illinois St,,31.0,31.0,In Service,41.890359,-87.617532,"(41.890359, -87.617532)"
4,4,22178533,4/1/2019,4/1/2019,1007.0,202,129,Subscriber,Male,6/14/2005,238.0,202.0,Halsted St & 18th St,,15.0,15.0,In Service,41.857499,-87.646277,"(41.857499, -87.646277)"


In [9]:
# Peform another join to also add station name for the to station id
station_names = pd.merge(from_station_name, stations_tbl, left_on='to_station_id', right_on='ID', how='left')
station_names.head()

Unnamed: 0,index_x,trip_id,start_time,end_time,tripduration,from_station_id,to_station_id,usertype,gender,birthyear,...,index,ID_y,Station Name_y,Address_y,Total Docks_y,Docks in Service_y,Status_y,Latitude_y,Longitude_y,Location_y
0,0,22178529,4/1/2019,4/1/2019,446.0,81,56,Subscriber,Male,5/28/2005,...,150.0,56.0,Desplaines St & Kinzie St,,27.0,27.0,In Service,41.888716,-87.644448,"(41.888716036, -87.6444478533)"
1,1,22178530,4/1/2019,4/1/2019,1048.0,317,59,Subscriber,Female,6/6/2005,...,270.0,59.0,Wabash Ave & Roosevelt Rd,,23.0,23.0,In Service,41.867227,-87.625961,"(41.867227, -87.625961)"
2,2,22178531,4/1/2019,4/1/2019,252.0,283,174,Subscriber,Male,6/12/2005,...,413.0,174.0,Canal St & Madison St,,35.0,35.0,In Service,41.882091,-87.639833,"(41.882091, -87.639833)"
3,3,22178532,4/1/2019,4/1/2019,357.0,26,133,Subscriber,Male,6/15/2005,...,5.0,133.0,Kingsbury St & Kinzie St,,31.0,31.0,In Service,41.889177,-87.638506,"(41.88917683258, -87.6385057718)"
4,4,22178533,4/1/2019,4/1/2019,1007.0,202,129,Subscriber,Male,6/14/2005,...,226.0,129.0,Blue Island Ave & 18th St,,15.0,15.0,In Service,41.857556,-87.661535,"(41.857556, -87.661535)"


In [10]:
# Select the columns you want to use.
df = station_names[['Station Name_x', 'Station Name_y', 'Location_x', 'Location_y','gender', 'tripduration']]
df.head()

Unnamed: 0,Station Name_x,Station Name_y,Location_x,Location_y,gender,tripduration
0,Daley Center Plaza,Desplaines St & Kinzie St,"(41.884241, -87.629634)","(41.888716036, -87.6444478533)",Male,446.0
1,Wood St & Taylor St,Wabash Ave & Roosevelt Rd,"(41.869154, -87.671045)","(41.867227, -87.625961)",Female,1048.0
2,LaSalle St & Jackson Blvd,Canal St & Madison St,"(41.878166, -87.631929)","(41.882091, -87.639833)",Male,252.0
3,McClurg Ct & Illinois St,Kingsbury St & Kinzie St,"(41.890359, -87.617532)","(41.88917683258, -87.6385057718)",Male,357.0
4,Halsted St & 18th St,Blue Island Ave & 18th St,"(41.857499, -87.646277)","(41.857556, -87.661535)",Male,1007.0


In [11]:
df.dtypes

Station Name_x    object
Station Name_y    object
Location_x        object
Location_y        object
gender            object
tripduration      object
dtype: object

In [12]:
df["tripduration"] = df["tripduration"].str.replace(",","").astype(float)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [7]:
#df["tripduration"] = pd.to_numeric(df["tripduration"], errors = 'coerce')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [13]:
df_trips = df.astype({"Station Name_x": str, "Station Name_y": str, "gender": str, "tripduration": float})
df.head()
df.count()

Station Name_x    807416
Station Name_y    807392
Location_x        807416
Location_y        807392
gender            807430
tripduration      807430
dtype: int64

In [14]:
# Drop rows with NAN's
df_trips = df.dropna()
df_trips.count()

Station Name_x    807380
Station Name_y    807380
Location_x        807380
Location_y        807380
gender            807380
tripduration      807380
dtype: int64

In [15]:
# Find the average drip duration
df_gender = df_trips.groupby(['Station Name_x', 'Station Name_y', 'Location_x', 'Location_y', 'gender']).mean().reset_index()

df_gender.head()

Unnamed: 0,Station Name_x,Station Name_y,Location_x,Location_y,gender,tripduration
0,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Female,62.0
1,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Male,1190.0
2,2112 W Peterson Ave,Broadway & Argyle St,"(41.991178, -87.683593)","(41.973815, -87.65966)",Male,957.0
3,2112 W Peterson Ave,Broadway & Granville Ave,"(41.991178, -87.683593)","(41.9947796884, -87.6602845349)",Male,483.0
4,2112 W Peterson Ave,Broadway & Ridge Ave,"(41.991178, -87.683593)","(41.9840446107, -87.6602738295)",Male,1204.666667


In [16]:
df_gender["tripduration"].describe()

count    1.093150e+05
mean     1.370088e+03
std      1.845796e+04
min      6.100000e+01
25%      6.803333e+02
50%      1.041000e+03
75%      1.511859e+03
max      4.439590e+06
Name: tripduration, dtype: float64

In [17]:
df_gender = df_gender.rename(columns = {'Station Name_x': 'Station_Name_x', 'Station Name_y':'Station_Name_y', 'tripduration': 'Avg_duration'})
df_gender = df_gender.astype({"Avg_duration": int})
df_gender['Avg_duration'] = df_gender['Avg_duration'] / 60
df_gender.head()

Unnamed: 0,Station_Name_x,Station_Name_y,Location_x,Location_y,gender,Avg_duration
0,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Female,1.033333
1,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Male,19.833333
2,2112 W Peterson Ave,Broadway & Argyle St,"(41.991178, -87.683593)","(41.973815, -87.65966)",Male,15.95
3,2112 W Peterson Ave,Broadway & Granville Ave,"(41.991178, -87.683593)","(41.9947796884, -87.6602845349)",Male,8.05
4,2112 W Peterson Ave,Broadway & Ridge Ave,"(41.991178, -87.683593)","(41.9840446107, -87.6602738295)",Male,20.066667


In [18]:
df_count = df_trips.groupby(['Station Name_x', 'Station Name_y', 'Location_x', 'Location_y', 'gender']).count().reset_index()
df_count = df_count.rename(columns = {'Station Name_x': 'Station_Name_x', 'Station Name_y':'Station_Name_y','tripduration': 'Trip_counts'})
df_count.head()

Unnamed: 0,Station_Name_x,Station_Name_y,Location_x,Location_y,gender,Trip_counts
0,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Female,1
1,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Male,1
2,2112 W Peterson Ave,Broadway & Argyle St,"(41.991178, -87.683593)","(41.973815, -87.65966)",Male,1
3,2112 W Peterson Ave,Broadway & Granville Ave,"(41.991178, -87.683593)","(41.9947796884, -87.6602845349)",Male,1
4,2112 W Peterson Ave,Broadway & Ridge Ave,"(41.991178, -87.683593)","(41.9840446107, -87.6602738295)",Male,3


In [23]:
df_merge = pd.merge(df_gender, df_count, on=['Station_Name_x', 'Station_Name_y', 'Location_x', 'Location_y', 'gender'], how='outer')
# df_merge = df_merge.nlargest(10,'Trip_counts')
df_merge.head()

Unnamed: 0,Station_Name_x,Station_Name_y,Location_x,Location_y,gender,Avg_duration,Trip_counts
0,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Female,1.033333,1
1,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Male,19.833333,1
2,2112 W Peterson Ave,Broadway & Argyle St,"(41.991178, -87.683593)","(41.973815, -87.65966)",Male,15.95,1
3,2112 W Peterson Ave,Broadway & Granville Ave,"(41.991178, -87.683593)","(41.9947796884, -87.6602845349)",Male,8.05,1
4,2112 W Peterson Ave,Broadway & Ridge Ave,"(41.991178, -87.683593)","(41.9840446107, -87.6602738295)",Male,20.066667,3


In [24]:
# Create Database Connection
# ----------------------------------
# Establish Connection
engine = create_engine("sqlite:///riders.sqlite")
conn = engine.connect()

In [25]:
df_merge.to_sql('riders', con=engine, if_exists='replace')
engine.execute("SELECT * FROM riders").fetchall()

[(0, '2112 W Peterson Ave', '2112 W Peterson Ave', '(41.991178, -87.683593)', '(41.991178, -87.683593)', 'Female', 1.0333333333333334, 1),
 (1, '2112 W Peterson Ave', '2112 W Peterson Ave', '(41.991178, -87.683593)', '(41.991178, -87.683593)', 'Male', 19.833333333333332, 1),
 (2, '2112 W Peterson Ave', 'Broadway & Argyle St', '(41.991178, -87.683593)', '(41.973815, -87.65966)', 'Male', 15.95, 1),
 (3, '2112 W Peterson Ave', 'Broadway & Granville Ave', '(41.991178, -87.683593)', '(41.9947796884, -87.6602845349)', 'Male', 8.05, 1),
 (4, '2112 W Peterson Ave', 'Broadway & Ridge Ave', '(41.991178, -87.683593)', '(41.9840446107, -87.6602738295)', 'Male', 20.066666666666666, 3),
 (5, '2112 W Peterson Ave', 'Broadway & Sheridan Rd', '(41.991178, -87.683593)', '(41.952833, -87.649993)', 'Male', 29.566666666666666, 1),
 (6, '2112 W Peterson Ave', 'Broadway & Thorndale Ave', '(41.991178, -87.683593)', '(41.98974251144, -87.6601406209)', 'Male', 7.216666666666667, 10),
 (7, '2112 W Peterson Ave',

In [34]:
df_from = df_ridercount.groupby(['Station_Name_x', 'Station_Name_y','gender']).sum().reset_index()
df_from.head()

NameError: name 'df_ridercount' is not defined

In [26]:
# Capture all data from the hist_trips table inside hist_trips.sqlite
trips_tbl = pd.read_sql("SELECT * FROM riders", con=engine)
trips_tbl

Unnamed: 0,index,Station_Name_x,Station_Name_y,Location_x,Location_y,gender,Avg_duration,Trip_counts
0,0,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Female,1.033333,1
1,1,2112 W Peterson Ave,2112 W Peterson Ave,"(41.991178, -87.683593)","(41.991178, -87.683593)",Male,19.833333,1
2,2,2112 W Peterson Ave,Broadway & Argyle St,"(41.991178, -87.683593)","(41.973815, -87.65966)",Male,15.950000,1
3,3,2112 W Peterson Ave,Broadway & Granville Ave,"(41.991178, -87.683593)","(41.9947796884, -87.6602845349)",Male,8.050000,1
4,4,2112 W Peterson Ave,Broadway & Ridge Ave,"(41.991178, -87.683593)","(41.9840446107, -87.6602738295)",Male,20.066667,3
5,5,2112 W Peterson Ave,Broadway & Sheridan Rd,"(41.991178, -87.683593)","(41.952833, -87.649993)",Male,29.566667,1
6,6,2112 W Peterson Ave,Broadway & Thorndale Ave,"(41.991178, -87.683593)","(41.98974251144, -87.6601406209)",Male,7.216667,10
7,7,2112 W Peterson Ave,Budlong Woods Library,"(41.991178, -87.683593)","(41.98366470886, -87.6964225611)",Female,33.216667,1
8,8,2112 W Peterson Ave,California Ave & Cortez St,"(41.991178, -87.683593)","(41.900363, -87.696704)",Male,40.583333,6
9,9,2112 W Peterson Ave,Campbell Ave & Montrose Ave,"(41.991178, -87.683593)","(41.96152593287, -87.6911650414)",Male,18.450000,1


In [142]:
df_from = df_from.rename(columns = {'tripduration': 'Trip_counts'})
df_from.head()

Unnamed: 0,Station Name_x,gender,Trip_counts
0,2112 W Peterson Ave,Female,11
1,2112 W Peterson Ave,Male,32
2,63rd St Beach,Female,8
3,63rd St Beach,Male,46
4,900 W Harrison St,Female,337


In [1]:
df_top10_from = df_from.sort_values('Trip_counts', ascending=False)
df_top10_from_female = df_top10_from.loc[df_top10_from['gender'] == 'Female']
df_top10_from_female = df_top10_from_female.nlargest(10,'Trip_counts')
df_top10_from_female

NameError: name 'df_from' is not defined

In [140]:
df_top10_from = df_from.sort_values('Trip_counts', ascending=False)
df_top10_from_male = df_top10_from.loc[df_top10_from['gender'] == 'Male']
df_top10_from_male = df_top10_from_male.nlargest(10,'Trip_counts')
df_top10_from_male

Unnamed: 0,Station Name_x,gender,Trip_counts
155,Canal St & Adams St,Male,9742
282,Clinton St & Washington Blvd,Male,9273
274,Clinton St & Madison St,Male,8569
286,Columbus Dr & Randolph St,Male,6489
161,Canal St & Madison St,Male,5803
448,Franklin St & Monroe St,Male,5697
574,Kingsbury St & Kinzie St,Male,5602
310,Daley Center Plaza,Male,5052
589,LaSalle St & Jackson Blvd,Male,4149
736,Michigan Ave & Washington St,Male,4134
