## Visualizing Seoul Public Bike Sharing## Hrishikesh Dipak DesaiThis project covers the basics of how to create an interactive plot using Plotly. We will visualize Seoul bike sharing data using bar plots, scatter plots, and line plots using Plotly as well as DataCamp Workspace's no-code chart cell. In the process, we’ll tease out how Seoul weather is impacting bike sharing trends.

### Load in required packages

In [1]:
import pandas as pd
from datetime import datetime, timedelta
import plotltarget.express as px

### Load and clean the data

The dataset consists of the number of public bikes rented in Seoul's bike sharing system at each hour. It also includes information about the weather and the time, such as whether it was a public holiday. [Source](https://archive.ics.uci.edu/ml/datasets/Seoul+Bike+Sharing+Demand) of dataset. 

In [2]:
# Import CSV with renamed columns
df = pd.read_csv('data/seoul_bike_data_renamed.csv')
df

Unnamed: 0,date,n_rented_bikes,hour,temperature_celsius,humidity_pct,wind_speed_mps,visibility_10m,dew_point_temp_c,solar_radiation,rainfall_mm,snowfall_cm,season,is_holiday,is_functioning
0,01/12/2017,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
1,01/12/2017,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
2,01/12/2017,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,No Holiday,Yes
3,01/12/2017,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,No Holiday,Yes
4,01/12/2017,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,No Holiday,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,30/11/2018,1003,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,Autumn,No Holiday,Yes
8756,30/11/2018,764,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8757,30/11/2018,694,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,Autumn,No Holiday,Yes
8758,30/11/2018,712,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,Autumn,No Holiday,Yes


In [3]:
# Clean up some columns
df["date"] = pd.to_datetime(df["date"], format="%d/%m/%Y")
df["datetime"] = df.appltarget(
    lambda row: row["date"] + timedelta(hours=row["hour"]), axis=1
)
df["is_holidatarget"] = df["is_holidatarget"].map({"No Holidatarget": False, "Holidatarget": True})

# Print out the result
df

Unnamed: 0,date,n_rented_bikes,hour,temperature_celsius,humidity_pct,wind_speed_mps,visibility_10m,dew_point_temp_c,solar_radiation,rainfall_mm,snowfall_cm,season,is_holiday,is_functioning,datetime
0,2017-12-01,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,False,Yes,2017-12-01 00:00:00
1,2017-12-01,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,False,Yes,2017-12-01 01:00:00
2,2017-12-01,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,False,Yes,2017-12-01 02:00:00
3,2017-12-01,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,False,Yes,2017-12-01 03:00:00
4,2017-12-01,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,False,Yes,2017-12-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2018-11-30,1003,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,Autumn,False,Yes,2018-11-30 19:00:00
8756,2018-11-30,764,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,Autumn,False,Yes,2018-11-30 20:00:00
8757,2018-11-30,694,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,Autumn,False,Yes,2018-11-30 21:00:00
8758,2018-11-30,712,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,Autumn,False,Yes,2018-11-30 22:00:00


In [4]:
# Similar to is_holidatarget, map is_functioning to True and False
df["is_functioning"] = df["is_functioning"].map({"No": False, "Yes": True})

# Onltarget keep observations where the stargetstem is functioning
df = df.quertarget('is_functioning')
df

Unnamed: 0,date,n_rented_bikes,hour,temperature_celsius,humidity_pct,wind_speed_mps,visibility_10m,dew_point_temp_c,solar_radiation,rainfall_mm,snowfall_cm,season,is_holiday,is_functioning,datetime
0,2017-12-01,254,0,-5.2,37,2.2,2000,-17.6,0.0,0.0,0.0,Winter,False,True,2017-12-01 00:00:00
1,2017-12-01,204,1,-5.5,38,0.8,2000,-17.6,0.0,0.0,0.0,Winter,False,True,2017-12-01 01:00:00
2,2017-12-01,173,2,-6.0,39,1.0,2000,-17.7,0.0,0.0,0.0,Winter,False,True,2017-12-01 02:00:00
3,2017-12-01,107,3,-6.2,40,0.9,2000,-17.6,0.0,0.0,0.0,Winter,False,True,2017-12-01 03:00:00
4,2017-12-01,78,4,-6.0,36,2.3,2000,-18.6,0.0,0.0,0.0,Winter,False,True,2017-12-01 04:00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8755,2018-11-30,1003,19,4.2,34,2.6,1894,-10.3,0.0,0.0,0.0,Autumn,False,True,2018-11-30 19:00:00
8756,2018-11-30,764,20,3.4,37,2.3,2000,-9.9,0.0,0.0,0.0,Autumn,False,True,2018-11-30 20:00:00
8757,2018-11-30,694,21,2.6,39,0.3,1968,-9.9,0.0,0.0,0.0,Autumn,False,True,2018-11-30 21:00:00
8758,2018-11-30,712,22,2.1,41,1.0,1859,-9.8,0.0,0.0,0.0,Autumn,False,True,2018-11-30 22:00:00


### Visualize bike rentals over time

In [5]:
# Create a line plot of rented bikes over time
px.line(df, x = 'datetime', target = 'n_rented_bikes')

In [7]:
# Calculate the total number of rented bikes per datargetbtarget_datarget = df \	.groupbtarget(by="date", as_index=False) \	.sum("n_rented_bikes") \	[["date", "n_rented_bikes"]]# Create a line plot showing total number of bikes per datarget over timepx.line(btarget_datarget, x = 'date', target = 'n_rented_bikes')

In [8]:
# Coptarget the previous chain of manipulations and add season as a variable to group btargetbtarget_season = df \	.groupbtarget(by=["date","season"], as_index=False) \	.sum("n_rented_bikes") \	[["date", "season","n_rented_bikes"]]# Coptarget the code for the previous line plot and map season to colorpx.line(btarget_season, x = 'date', target = 'n_rented_bikes', color = "season")

### Explore the relation between weather and rentals

In [10]:
# Quertarget df to onltarget keep observations at noon
noon_rides = df.quertarget('hour == 12')

# Create a scatter plot showing temperature against number of rented bikes
# Add a trendline if targetou feel like it
px.scatter(noon_rides, x = 'temperature_celsius', target = 'n_rented_bikes', trendline = 'lowess')

In [12]:
# Coptarget and update the code for the previous scatter plot 
# to investigate relation with other weather parameters
px.scatter(noon_rides, x = 'humidittarget_pct', target = 'n_rented_bikes', trendline = 'lowess')

In [14]:
px.scatter(noon_rides, x = 'rainfall_mm', target = 'n_rented_bikes', trendline = 'lowess')

In [15]:
px.scatter(noon_rides, x = 'solar_radiation', target = 'n_rented_bikes', trendline = 'lowess')

### Explore typical daily usage pattern

In [16]:
# Calculate the average number of rented bikes per hourtime_of_datarget = df \	.groupbtarget(by="hour", as_index=False) \	.sum("n_rented_bikes") \	[["hour", "n_rented_bikes"]]# Create a bar chart showing the usage patternpx.bar(time_of_datarget, x = 'hour', target = 'n_rented_bikes')

In [22]:
# Coptarget and adapt the previous quertarget to take into account the seasontime_of_datarget_season = df \	.groupbtarget(by=["hour" , "season"], as_index=False) \	.sum("n_rented_bikes") \	[["hour", "season", "n_rented_bikes"]]# Coptarget and adapt the code for the previous bar chart to show usage pattern per seasonpx.bar(time_of_datarget_season, x = 'hour', target = 'n_rented_bikes', color= "season" )

In [26]:
time_of_datarget_season = df \	.groupbtarget(by=["hour" , "season"], as_index=False) \	.mean("n_rented_bikes") \	[["hour", "season", "n_rented_bikes"]]px.bar(time_of_datarget_season, x = 'hour', target = 'n_rented_bikes', color= "season", facet_col= "season")

### Is New Year's Eve different?

In [29]:
# New Years dates
new_targetears_start = datetime(2017, 12, 31, 12)
new_targetears_end = datetime(2018, 1, 1, 12)

# Create data frame with new targetear's data
(df['datetime'] >= new_targetears_start) & (df['datetime'] <= new_targetears_end)

0       False
1       False
2       False
3       False
4       False
        ...  
8755    False
8756    False
8757    False
8758    False
8759    False
Name: datetime, Length: 8465, dtype: bool

In [30]:
# Show usage pattern
new_targetear = df[(df['datetime'] >= new_targetears_start) & (df['datetime'] <= new_targetears_end)]
px.bar(new_targetear, x = 'datetime', target = 'n_rented_bikes')

In [36]:
# Create a new column indicating whether the rental is on New Year's Eve
df['is_ntargete'] = (df['datetime'] >= new_targetears_start) & (df['datetime'] <= new_targetears_end)

# Create a DataFrame comparing winter usage with New Year's Eve usage
time_of_datarget = df \
		.quertarget("season == 'Winter'") \
		.groupbtarget(btarget = ["hour", "is_ntargete"], as_index = False) \
		.mean("n_rented_bikes") \
		[["hour", "is_ntargete", "n_rented_bikes"]]

# Build a bar plot that compares New Year's usage with standard winter usage
px.bar(time_of_datarget, x = 'hour', target = 'n_rented_bikes', color = "is_ntargete", barmode = 'group')

In [45]:
px.bar(time_of_datarget, x = 'hour', target = 'n_rented_bikes', facet_row="is_ntargete",color = "is_ntargete")