# EDA With Bikes Dataset

>### Features Description
>* datetime: Timestamp indicating when the data was recorded.
>* season: Categorical variable representing the season (e.g., winter, spring, summer, fall).
>* weather: Categorical variable describing the weather conditions (e.g., clear, cloudy, rainy).
>* temp: Numeric variable representing the temperature in degrees Celsius.
>* humidity: Numeric variable representing the relative humidity as a percentage.
>* windspeed: Numeric variable representing the wind speed in meters per second.
>* casual: Number of bikes rented by casual users.
>* registered: Number of bikes rented by registered users.
>* rented_bikes_count: Total number of bikes rented (sum of casual and registered).
>* Profit: Profit generated from bike rentals.

### Import Libraries

In [14]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
import streamlit as st

### Read Dataset

In [15]:
df= pd.read_csv('bikes.csv')

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10886 entries, 0 to 10885
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   datetime            10886 non-null  object 
 1   season              10886 non-null  object 
 2   holiday             10030 non-null  float64
 3   workingday          9388 non-null   float64
 4   weather             10886 non-null  object 
 5   temp                10886 non-null  float64
 6   humidity            10886 non-null  float64
 7   windspeed           10886 non-null  float64
 8   casual              10886 non-null  int64  
 9   registered          10886 non-null  int64  
 10  rented_bikes_count  10886 non-null  int64  
 11  Profit              10886 non-null  float64
dtypes: float64(6), int64(3), object(3)
memory usage: 1020.7+ KB


### Data OverView and Info

In [17]:
df.isnull().mean()*100

datetime               0.000000
season                 0.000000
holiday                7.863311
workingday            13.760794
weather                0.000000
temp                   0.000000
humidity               0.000000
windspeed              0.000000
casual                 0.000000
registered             0.000000
rented_bikes_count     0.000000
Profit                 0.000000
dtype: float64

In [18]:
df.duplicated().sum()

np.int64(0)

In [19]:
for i in df.columns:
  print(i)
  print(df[i].unique())
  print('********************************************************************************************')

datetime
['1/1/2011 0:00' '1/1/2011 1:00' '1/1/2011 2:00' ... '12/19/2012 21:00'
 '12/19/2012 22:00' '12/19/2012 23:00']
********************************************************************************************
season
['Spring' 'Winter' 'Summer' 'Fall']
********************************************************************************************
holiday
[ 0. nan  1.]
********************************************************************************************
workingday
[ 0. nan  1.]
********************************************************************************************
weather
['Clear' 'Mist' 'Rainy' 'Snowy']
********************************************************************************************
temp
[10.  9. 20.  8. 16. 15. 19. 18. 17. 14. 13. 12. 11.  7.  6.  5.  4.  3.
  2. 21. 23. 24. 25. 22. 27. 26. 28. 30. 31. 29. 34. 35. 37. 33. 32. 36.
 38. 39.  1. 41.]
********************************************************************************************
humidity
[ 81.  80.  

In [20]:
df['datetime'] = pd.to_datetime(df['datetime'])

In [21]:
df['day'] = df['datetime'].dt.day.astype(str)
df['month'] = df['datetime'].dt.month_name().astype(str)
df['year'] = df['datetime'].dt.year.astype(str)
df['hour'] = df['datetime'].dt.hour.astype(str)

In [22]:
df['day']

0         1
1         1
2         1
3         1
4         1
         ..
10881    19
10882    19
10883    19
10884    19
10885    19
Name: day, Length: 10886, dtype: object

### Univariate Analysis

In [23]:
px.histogram(data_frame=df,x='day',y='Profit',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [24]:
px.histogram(data_frame=df,x='month',y='Profit',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [25]:
px.histogram(data_frame=df,x='year',y='Profit',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [26]:
px.histogram(data_frame=df,x='hour',y='Profit',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [27]:
px.histogram(data_frame=df,x='day',y='rented_bikes_count',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [28]:
px.histogram(data_frame=df,x='month',y='rented_bikes_count',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [29]:
px.histogram(data_frame=df,x='year',y='rented_bikes_count',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [30]:
px.histogram(data_frame=df,x='hour',y='rented_bikes_count',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [31]:
df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,humidity,windspeed,casual,registered,rented_bikes_count,Profit,day,month,year,hour
0,2011-01-01 00:00:00,Spring,0.0,0.0,Clear,10.0,81.0,13.0,3,13,16,107.328767,1,January,2011,0
1,2011-01-01 01:00:00,Spring,0.0,0.0,Clear,9.0,80.0,0.0,8,32,40,275.028767,1,January,2011,1
2,2011-01-01 02:00:00,Spring,0.0,0.0,Clear,9.0,62.0,0.0,5,27,32,201.928767,1,January,2011,2
3,2011-01-01 03:00:00,Spring,0.0,0.0,Clear,10.0,75.0,0.0,3,10,13,94.428767,1,January,2011,3
4,2011-01-01 04:00:00,Winter,0.0,0.0,Clear,20.0,75.0,13.0,0,1,1,4.128767,1,January,2011,4


In [32]:
px.histogram(data_frame=df,x='weather',y='Profit',histfunc='avg',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [33]:
px.histogram(data_frame=df,x='weather',y='registered',histfunc='avg',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [34]:
px.histogram(data_frame=df,x='weather',y='rented_bikes_count',histfunc='avg',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [35]:
px.histogram(data_frame=df,x='weather',y='casual',histfunc='avg',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [36]:
df.columns

Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'humidity', 'windspeed', 'casual', 'registered', 'rented_bikes_count',
       'Profit', 'day', 'month', 'year', 'hour'],
      dtype='object')

In [37]:
px.imshow(df[['temp','humidity','windspeed']].corr())

In [38]:
df.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,humidity,windspeed,casual,registered,rented_bikes_count,Profit,day,month,year,hour
0,2011-01-01 00:00:00,Spring,0.0,0.0,Clear,10.0,81.0,13.0,3,13,16,107.328767,1,January,2011,0
1,2011-01-01 01:00:00,Spring,0.0,0.0,Clear,9.0,80.0,0.0,8,32,40,275.028767,1,January,2011,1
2,2011-01-01 02:00:00,Spring,0.0,0.0,Clear,9.0,62.0,0.0,5,27,32,201.928767,1,January,2011,2
3,2011-01-01 03:00:00,Spring,0.0,0.0,Clear,10.0,75.0,0.0,3,10,13,94.428767,1,January,2011,3
4,2011-01-01 04:00:00,Winter,0.0,0.0,Clear,20.0,75.0,13.0,0,1,1,4.128767,1,January,2011,4


### Bivariate Analysis

In [39]:
px.histogram(data_frame=df,x='day',y='rented_bikes_count',histfunc='sum',text_auto=True).update_xaxes(categoryorder = 'total descending')

In [40]:
df.columns

Index(['datetime', 'season', 'holiday', 'workingday', 'weather', 'temp',
       'humidity', 'windspeed', 'casual', 'registered', 'rented_bikes_count',
       'Profit', 'day', 'month', 'year', 'hour'],
      dtype='object')


# Deployment using Streamlit:

In [45]:
%%file bikes.py
#Import Libraries:
import pandas as pd
import plotly.express as px
import streamlit as st

st.set_page_config(layout='wide')

df = pd.read_csv('bikes.csv')
df['datetime'] = pd.to_datetime(df['datetime'])
df['day'] = df['datetime'].dt.day.astype(str)
df['month'] = df['datetime'].dt.month_name().astype(str)
df['year'] = df['datetime'].dt.year.astype(str)
df['hour'] = df['datetime'].dt.hour.astype(str)

histfunc = st.sidebar.radio('Choose Aggregation Function', ('avg', 'sum'))
period = st.sidebar.radio('Choose Time Period', ('day', 'month','year','hour'))
def page1():
    tab1 , tab2 = st.tabs(['Profit' , 'Number of Rented Bikes'])

    with tab1:
        fig = px.histogram(data_frame=df, x=period, y='Profit', histfunc=histfunc, text_auto=True)
        fig.update_xaxes(categoryorder='total descending')
        st.plotly_chart(fig)

    with tab2:
        col1 , col2 , col3 = st.columns(3)

        with col1 :
            st.subheader('Total Rented Bikes')
            fig = px.histogram(data_frame=df, x=period, y='rented_bikes_count', histfunc=histfunc, text_auto=True)
            fig.update_xaxes(categoryorder='total descending')
            st.plotly_chart(fig)

        with col2 :
            st.subheader('Registered Members')
            fig = px.histogram(data_frame=df, x=period, y='registered', histfunc=histfunc, text_auto=True)
            fig.update_xaxes(categoryorder='total descending')
            st.plotly_chart(fig)

        with col3 :
            st.subheader('Casual Members')
            fig = px.histogram(data_frame=df, x=period, y='casual', histfunc=histfunc, text_auto=True)
            fig.update_xaxes(categoryorder='total descending')
            st.plotly_chart(fig)




def page2():
    x = st.radio('Select a Feature', ('weather','season'))
    tab1, tab2 = st.tabs(['Profit' , 'Number of Rented Bikes'])
    with tab1:
      st.plotly_chart(px.histogram(data_frame= df, x= x, y= 'Profit', histfunc= histfunc, text_auto= True).update_xaxes(categoryorder = 'total descending'))

    with tab2:
      col1, col2, col3 = st.columns(3)
      with col1:
        st.subheader('Total Rented Bikes')
        st.plotly_chart(px.histogram(data_frame= df, x= x, y='rented_bikes_count', histfunc= histfunc, text_auto= True).update_xaxes(categoryorder = 'total descending'))
      with col2:
        st.subheader('Registered Members')
        st.plotly_chart(px.histogram(data_frame=df, x= x, y= 'registered', text_auto= True, histfunc= histfunc).update_xaxes(categoryorder = 'total descending'))
      with col3:
        st.subheader('Casual Member')
        st.plotly_chart(px.histogram(data_frame=df, x=x , y= 'casual',histfunc= histfunc , text_auto= True).update_xaxes(categoryorder = 'total descending'))


def page3():
  x= st.radio('Select a Feature',('windspeed','humidity','temp'))
  tab1, tab2 = st.tabs(['Profit','Number of Rented Bikes'])
  with tab1:
    st.plotly_chart(px.histogram(data_frame=df, x= x, y= 'Profit', histfunc= histfunc, text_auto= True).update_xaxes(categoryorder = 'total descending'))
  with tab2:
    col1, col2, col3 = st.columns(3)
    with col1:
      st.subheader('Total Rented Bikes')
      st.plotly_chart(px.histogram(data_frame= df, x= x, y='rented_bikes_count', histfunc= histfunc, text_auto= True).update_xaxes(categoryorder = 'total descending'))
    with col2:
      st.subheader('Registered Members')
      st.plotly_chart(px.histogram(data_frame=df, x= x, y= 'registered', text_auto= True, histfunc= histfunc).update_xaxes(categoryorder = 'total descending'))
    with col3:
      st.subheader('Casual Member')
      st.plotly_chart(px.histogram(data_frame=df, x=x , y= 'casual',histfunc= histfunc , text_auto= True).update_xaxes(categoryorder = 'total descending'))

pages = {
    "Page 1: Time-based Analysis": page1,
    "Page 2: Weather and Season": page2,
    "Page 3: Environmental Factors": page3,
}
pg = st.sidebar.radio('Navigate between pages' , pages.keys())

pages[pg]()

Overwriting bikes.py


In [46]:
!streamlit run bikes.py

^C
