In [2]:
import numpy as np
import pandas as pd
import plotly.offline as pyo
import plotly.graph_objs as go
import plotly.express as px

In [3]:
df = pd.read_csv('https://raw.githubusercontent.com/SuperDataWorld/Python/main/Data/bikerental.csv')


In [4]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349
3,4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600


# **Scatter Plot**

In [5]:
fig = px.scatter(df, x = 'casual', y = 'windspeed')
fig.show()

In [6]:
#change season to string - discrete value
df['season'] = df['season'].astype(str)

fig = px.scatter(
    df,
    x = 'casual',
    y = 'windspeed',
    color ='season',
)
#better layout
fig.update_layout(
    title = 'Casual Bike Rentals V Windspeed',
    width = 1000,
    height = 500,
    xaxis_title = 'Casual Bike Rentals',
    yaxis_title = 'Windspeed',
    template="simple_white",
)


# **Data Cleaning before Barplot**


In [7]:
df['yr'].value_counts()

Unnamed: 0_level_0,count
yr,Unnamed: 1_level_1
1,366
0,365


In [8]:
#if else statement to change to actual year
df['year'] = df['yr'].apply(lambda x: '2011' if x==0 else '2012')
df['Month'] = df['year'] + "-" + df['mnth'].map(str)

In [9]:
df.head()

Unnamed: 0,instant,dteday,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt,year,Month
0,1,2011-01-01,1,0,1,0,6,0,2,0.344167,0.363625,0.805833,0.160446,331,654,985,2011,2011-1
1,2,2011-01-02,1,0,1,0,0,0,2,0.363478,0.353739,0.696087,0.248539,131,670,801,2011,2011-1
2,3,2011-01-03,1,0,1,0,1,1,1,0.196364,0.189405,0.437273,0.248309,120,1229,1349,2011,2011-1
3,4,2011-01-04,1,0,1,0,2,1,1,0.2,0.212122,0.590435,0.160296,108,1454,1562,2011,2011-1
4,5,2011-01-05,1,0,1,0,3,1,1,0.226957,0.22927,0.436957,0.1869,82,1518,1600,2011,2011-1


In [10]:
#make bar chart. group by month and reset index
bar_data = df[['Month','cnt']].groupby(by=["Month"]).sum().reset_index()


In [11]:
bar_data.head(2)

Unnamed: 0,Month,cnt
0,2011-1,38189
1,2011-10,123511


In [12]:
bar_data.columns = ["Month of Rental","Count of Rentals"]


In [13]:
bar_data.head(2)

Unnamed: 0,Month of Rental,Count of Rentals
0,2011-1,38189
1,2011-10,123511


# **Basic Bar Plot**

In [14]:
#px. to make plot
import plotly.express as px
fig = px.bar(bar_data, x='Month of Rental', y='Count of Rentals')
fig.show()

# **Improved Plot**

In [15]:
fig = px.bar(bar_data, x='Month of Rental', y='Count of Rentals')
fig.update_layout(
    #space between bars
    bargap = 0.0075,
    title = 'Bike Rentals By Month',
    width = 1000,
    height = 500,
    xaxis_title = 'Month',
    yaxis_title = 'Bike Rentals',
    template="simple_white",
    #grey to white
    hoverlabel=dict(
        bgcolor="white",
        font_size=12,
        font_family="Arial"
    )
)
fig.update_traces(marker_color='#AAAAAA')
fig.show()

# **Histogram**

In [16]:
fig = px.histogram(df, x = 'cnt',color='season')
fig.show()

# **Upgraded Histogram with Styling Shape and Annotation**

In [17]:
# Histogram plot
# https://plotly.com/python/builtin-colorscales/ - info on colour scheme
#colour scheme as colour discrete seq
average = df['cnt'].mean()
fig = px.histogram(df, x = 'cnt',color='season', color_discrete_sequence=px.colors.qualitative.Dark24)

fig.update_layout(
    bargap = 0.005,
    title = 'Rentals',
    width = 1000,
    height = 500,
    xaxis_title = 'Count of Bike Rentals',
    yaxis_title = 'Count of Days',
    template="simple_white")
#add shape
fig.add_shape(type="circle",
    xref="x", yref="y",
    fillcolor="PaleTurquoise",
    #placement of circle
    x0=3500, y0=70, x1=5500, y1=90,
    line_color="LightSeaGreen",
)
#annotation on circle, placement chosen
fig.add_annotation(x=4500, y=91,
            text="Highest Frequency @ Approx 4500",
            showarrow=True,
            arrowhead=4)


fig.show()

# **Simple Line Chart**

In [18]:
fig = px.line(df, x = 'dteday', y = 'cnt')
fig.show()

# **Upgraded chart**

In [19]:
#for annotations
avg = df['cnt'].mean()
time = df['dteday'].min()

fig = px.line(df, x = 'dteday',y='cnt')

fig.update_layout(
    title = 'Bike Rentals 2011 / 2012',
    width = 1000,
    height = 500,
    xaxis_title = 'Count of Bike Rentals',
    yaxis_title = 'Date',
    template="simple_white")
#update colour of line
fig['data'][0]['line']['color']='#AAAAAA'
#add shape
fig.add_shape( # add a horizontal "target" line
    type="line", line_color="black", line_width=3, opacity=1, line_dash="dot",
    x0=0, x1=1, xref="paper", y0=avg, y1=avg, yref="y"
)
#adding annotation at y = avg and x = time
fig.add_annotation(x=time, y=avg,
            text="Average Rentals",
            showarrow=False,
            arrowhead=4,
            xshift = 70,
            yshift = 10)


fig.show()

# **Multiple line plots from the same dataset**

In [20]:
#y axis as list
fig = px.line(df, x = 'dteday',y=['casual','registered'])
#update colour for 2 data points
fig['data'][0]['line']['color']="#F2CC8F"
fig['data'][1]['line']['color']="#033F63"

fig.update_layout(
    title = 'Casual & Registered Bike Rentals 2011 / 2012',
    width = 1000,
    height = 500,
    xaxis_title = 'Count of Bike Rentals',
    yaxis_title = 'Date',
    template="simple_white")
fig.show()