# 01. Import libraries

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import os
import folium
import json
from folium.plugins import MarkerCluster

In [2]:
# Create a path for easier importing
path = r'C:\Users\blim9\Desktop\Google Data Analytics\Case Study'

In [3]:
df = pd.read_csv(os.path.join (path, 'Data', 'Prepared Data', 'final_dataset.csv'))

In [4]:
# Ensuring we see the full Latitudes and Longitudes
pd.set_option('display.precision', 10)

# 02. Create the Folium map

In [5]:
df.head()

Unnamed: 0.1,Unnamed: 0,ride_id,rideable_type,member_type,start_date_time,end_date_time,ride_length,month,day_of_week,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_long,end_lat,end_long
0,30,DD06751C6019D865,classic_bike,annual,2021-08-08 17:21:26,2021-08-08 17:25:37,251.0,August,Sunday,Desplaines St & Kinzie St,TA1306000003,Kingsbury St & Kinzie St,KA1503000043,41.888718,-87.64445,41.889175,-87.638504
1,36,79973DC3B232048F,classic_bike,annual,2021-08-27 08:53:52,2021-08-27 09:18:29,1477.0,August,Friday,Larrabee St & Armitage Ave,TA1309000006,Michigan Ave & Oak St,13042,41.918083,-87.643745,41.90096,-87.62378
2,72,F41EB054E44ACFDA,classic_bike,casual,2021-08-12 16:52:09,2021-08-12 16:56:51,282.0,August,Thursday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378
3,121,B149E6C71A1C3B14,classic_bike,casual,2021-08-23 15:33:04,2021-08-23 16:09:00,2156.0,August,Monday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378
4,123,C41829CD6CC5A8B6,classic_bike,casual,2021-08-23 10:11:09,2021-08-23 10:51:11,2402.0,August,Monday,Aberdeen St & Jackson Blvd,13157,Aberdeen St & Jackson Blvd,13157,41.877728,-87.654785,41.877728,-87.654785


In [6]:
# Drop the Unnamed: 0 column as it's not needed
df = df.drop('Unnamed: 0', axis=1)

In [7]:
df.head()

Unnamed: 0,ride_id,rideable_type,member_type,start_date_time,end_date_time,ride_length,month,day_of_week,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_long,end_lat,end_long
0,DD06751C6019D865,classic_bike,annual,2021-08-08 17:21:26,2021-08-08 17:25:37,251.0,August,Sunday,Desplaines St & Kinzie St,TA1306000003,Kingsbury St & Kinzie St,KA1503000043,41.888718,-87.64445,41.889175,-87.638504
1,79973DC3B232048F,classic_bike,annual,2021-08-27 08:53:52,2021-08-27 09:18:29,1477.0,August,Friday,Larrabee St & Armitage Ave,TA1309000006,Michigan Ave & Oak St,13042,41.918083,-87.643745,41.90096,-87.62378
2,F41EB054E44ACFDA,classic_bike,casual,2021-08-12 16:52:09,2021-08-12 16:56:51,282.0,August,Thursday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378
3,B149E6C71A1C3B14,classic_bike,casual,2021-08-23 15:33:04,2021-08-23 16:09:00,2156.0,August,Monday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378
4,C41829CD6CC5A8B6,classic_bike,casual,2021-08-23 10:11:09,2021-08-23 10:51:11,2402.0,August,Monday,Aberdeen St & Jackson Blvd,13157,Aberdeen St & Jackson Blvd,13157,41.877728,-87.654785,41.877728,-87.654785


In [8]:
# Create a column that only has the value 1. This will be used to add up how many times each station was used to begin a trip.
df['sum_col'] = 1

In [9]:
# Create a column that sums up the total of times each station a user began their trip at
df["num_of_station_starts"] = df.groupby("start_station_id")['sum_col'].transform("sum")

In [10]:
df['num_of_station_starts'].max()

75996

In [12]:
df['num_of_station_starts'].dtype

dtype('int64')

In [13]:
df['num_of_station_starts'] = df['num_of_station_starts'].astype('int32')

In [14]:
# Create a new column turning it into a STR type for Folium to show numbers
df['num_of_station_starts_STR'] = df['num_of_station_starts'].astype('str')

In [15]:
df.head()

Unnamed: 0,ride_id,rideable_type,member_type,start_date_time,end_date_time,ride_length,month,day_of_week,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_long,end_lat,end_long,sum_col,num_of_station_starts,num_of_station_starts_STR
0,DD06751C6019D865,classic_bike,annual,2021-08-08 17:21:26,2021-08-08 17:25:37,251.0,August,Sunday,Desplaines St & Kinzie St,TA1306000003,Kingsbury St & Kinzie St,KA1503000043,41.888718,-87.64445,41.889175,-87.638504,1,22527,22527
1,79973DC3B232048F,classic_bike,annual,2021-08-27 08:53:52,2021-08-27 09:18:29,1477.0,August,Friday,Larrabee St & Armitage Ave,TA1309000006,Michigan Ave & Oak St,13042,41.918083,-87.643745,41.90096,-87.62378,1,10194,10194
2,F41EB054E44ACFDA,classic_bike,casual,2021-08-12 16:52:09,2021-08-12 16:56:51,282.0,August,Thursday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378,1,40648,40648
3,B149E6C71A1C3B14,classic_bike,casual,2021-08-23 15:33:04,2021-08-23 16:09:00,2156.0,August,Monday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378,1,40648,40648
4,C41829CD6CC5A8B6,classic_bike,casual,2021-08-23 10:11:09,2021-08-23 10:51:11,2402.0,August,Monday,Aberdeen St & Jackson Blvd,13157,Aberdeen St & Jackson Blvd,13157,41.877728,-87.654785,41.877728,-87.654785,1,11436,11436


In [18]:
df_subset = df.drop_duplicates(subset = ['start_station_id'])

In [19]:
df_subset.head()

Unnamed: 0,ride_id,rideable_type,member_type,start_date_time,end_date_time,ride_length,month,day_of_week,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_long,end_lat,end_long,sum_col,num_of_station_starts,num_of_station_starts_STR
0,DD06751C6019D865,classic_bike,annual,2021-08-08 17:21:26,2021-08-08 17:25:37,251.0,August,Sunday,Desplaines St & Kinzie St,TA1306000003,Kingsbury St & Kinzie St,KA1503000043,41.888718,-87.64445,41.889175,-87.638504,1,22527,22527
1,79973DC3B232048F,classic_bike,annual,2021-08-27 08:53:52,2021-08-27 09:18:29,1477.0,August,Friday,Larrabee St & Armitage Ave,TA1309000006,Michigan Ave & Oak St,13042,41.918083,-87.643745,41.90096,-87.62378,1,10194,10194
2,F41EB054E44ACFDA,classic_bike,casual,2021-08-12 16:52:09,2021-08-12 16:56:51,282.0,August,Thursday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378,1,40648,40648
4,C41829CD6CC5A8B6,classic_bike,casual,2021-08-23 10:11:09,2021-08-23 10:51:11,2402.0,August,Monday,Aberdeen St & Jackson Blvd,13157,Aberdeen St & Jackson Blvd,13157,41.877728,-87.654785,41.877728,-87.654785,1,11436,11436
6,987AF38D6208B7E1,classic_bike,casual,2021-08-20 22:38:08,2021-08-20 23:32:13,3245.0,August,Friday,Wells St & Walton St,TA1306000011,Wells St & Walton St,TA1306000011,41.89993,-87.63443,41.89993,-87.63443,1,9541,9541


In [20]:
m = folium.Map(location = [41.888718, -87.644450], tiles = 'OpenStreetMap', zoom_start=12)

markerCluster = MarkerCluster().add_to(m)

for i, row in df_subset.iterrows():
    lat = df_subset.at[i, 'start_lat']
    lng = df_subset.at[i, 'start_long']
    
    popup = 'Station : ' + str(df_subset.at[i, 'start_station_name']) + '<br>' + 'Uses : ' + df_subset.at[i, 'num_of_station_starts_STR']

    folium.Marker(location = [lat, lng], popup= popup, icon = folium.Icon(color='blue', icon='fire')).add_to(markerCluster)
m

In [21]:
# Drop the num_of_start_stations_STR column as it's not needed anymore
df = df.drop(columns = ['num_of_station_starts_STR'])

In [22]:
df.head()

Unnamed: 0,ride_id,rideable_type,member_type,start_date_time,end_date_time,ride_length,month,day_of_week,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_long,end_lat,end_long,sum_col,num_of_station_starts
0,DD06751C6019D865,classic_bike,annual,2021-08-08 17:21:26,2021-08-08 17:25:37,251.0,August,Sunday,Desplaines St & Kinzie St,TA1306000003,Kingsbury St & Kinzie St,KA1503000043,41.888718,-87.64445,41.889175,-87.638504,1,22527
1,79973DC3B232048F,classic_bike,annual,2021-08-27 08:53:52,2021-08-27 09:18:29,1477.0,August,Friday,Larrabee St & Armitage Ave,TA1309000006,Michigan Ave & Oak St,13042,41.918083,-87.643745,41.90096,-87.62378,1,10194
2,F41EB054E44ACFDA,classic_bike,casual,2021-08-12 16:52:09,2021-08-12 16:56:51,282.0,August,Thursday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378,1,40648
3,B149E6C71A1C3B14,classic_bike,casual,2021-08-23 15:33:04,2021-08-23 16:09:00,2156.0,August,Monday,Michigan Ave & Oak St,13042,Michigan Ave & Oak St,13042,41.90096,-87.62378,41.90096,-87.62378,1,40648
4,C41829CD6CC5A8B6,classic_bike,casual,2021-08-23 10:11:09,2021-08-23 10:51:11,2402.0,August,Monday,Aberdeen St & Jackson Blvd,13157,Aberdeen St & Jackson Blvd,13157,41.877728,-87.654785,41.877728,-87.654785,1,11436


In [23]:
# Save the map as a HTML file
m.save('bike_station_map.html')

In [24]:
# Export the data set
df.to_csv(os.path.join (path, 'Data', 'Prepared Data', 'final_dataset_with_map.csv'))