## BoomBikes - Demand predicting using Multiple Linear Regression

In [136]:
# Importing required libraries
import pandas as pd
import seaborn as sns

In [137]:
# Reading data into pandas dataframe
bike =  pd.read_csv("/Users/abhis/OneDrive/Desktop/ML/Linear regression/bike_sharing_linear_regression/data/day.csv")


In [138]:
# Dropping instant as it has no effect on the model
columns_to_drop = ['instant','dteday']
bike = bike.drop(columns_to_drop, axis=1)
bike.head()


Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,1,0,1,0,6,0,2,14.110847,18.18125,80.5833,10.749882,331,654,985
1,1,0,1,0,0,0,2,14.902598,17.68695,69.6087,16.652113,131,670,801
2,1,0,1,0,1,1,1,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,1,0,1,0,2,1,1,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,1,0,1,0,3,1,1,9.305237,11.4635,43.6957,12.5223,82,1518,1600


In [139]:
bike.describe()

Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
count,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0,730.0
mean,2.49863,0.5,6.526027,0.028767,2.99726,0.683562,1.394521,20.319259,23.726322,62.765175,12.76362,849.249315,3658.757534,4508.006849
std,1.110184,0.500343,3.450215,0.167266,2.006161,0.465405,0.544807,7.506729,8.150308,14.237589,5.195841,686.479875,1559.758728,1936.011647
min,1.0,0.0,1.0,0.0,0.0,0.0,1.0,2.424346,3.95348,0.0,1.500244,2.0,20.0,22.0
25%,2.0,0.0,4.0,0.0,1.0,0.0,1.0,13.811885,16.889713,52.0,9.04165,316.25,2502.25,3169.75
50%,3.0,0.5,7.0,0.0,3.0,1.0,1.0,20.465826,24.368225,62.625,12.125325,717.0,3664.5,4548.5
75%,3.0,1.0,10.0,0.0,5.0,1.0,2.0,26.880615,30.445775,72.989575,15.625589,1096.5,4783.25,5966.0
max,4.0,1.0,12.0,1.0,6.0,1.0,3.0,35.328347,42.0448,97.25,34.000021,3410.0,6946.0,8714.0


In [140]:
bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 730 entries, 0 to 729
Data columns (total 14 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   season      730 non-null    int64  
 1   yr          730 non-null    int64  
 2   mnth        730 non-null    int64  
 3   holiday     730 non-null    int64  
 4   weekday     730 non-null    int64  
 5   workingday  730 non-null    int64  
 6   weathersit  730 non-null    int64  
 7   temp        730 non-null    float64
 8   atemp       730 non-null    float64
 9   hum         730 non-null    float64
 10  windspeed   730 non-null    float64
 11  casual      730 non-null    int64  
 12  registered  730 non-null    int64  
 13  cnt         730 non-null    int64  
dtypes: float64(4), int64(10)
memory usage: 80.0 KB


In [141]:
# Count of all null values in each column
null_count = bike.isnull().sum().sort_values()
null_count

season        0
yr            0
mnth          0
holiday       0
weekday       0
workingday    0
weathersit    0
temp          0
atemp         0
hum           0
windspeed     0
casual        0
registered    0
cnt           0
dtype: int64

### There are no missing values in all the columns

#### Data cleaning and formatting

In [142]:
# Function to convert number to month
import calendar
def month_map(month_number):
    return calendar.month_abbr[month_number]

In [143]:
# Function to convert weekday number into actual weekday
def weekday_map(week_num):
    return calendar.day_abbr[week_num]

#### Convert numeric values into categorical variables for analysis 


In [144]:
bike['season'] = bike['season'].map({1: 'spring', 2: 'summer', 3: 'fall', 4: 'winter'})
bike['weathersit'] = bike['weathersit'].map({1: 'Clear', 2: 'Cloudy', 3: 'Light Rain', 4: 'Heavy Rain'})
bike['holiday'] = bike['holiday'].map({1: 'Yes', 0: 'No'})
bike['workingday'] = bike['workingday'].map({1: 'Yes', 0: 'No'})
bike['yr'] = bike['yr'].map({0: '2018', 1:'2019'})
bike.head()


Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,spring,2018,1,No,6,No,Cloudy,14.110847,18.18125,80.5833,10.749882,331,654,985
1,spring,2018,1,No,0,No,Cloudy,14.902598,17.68695,69.6087,16.652113,131,670,801
2,spring,2018,1,No,1,Yes,Clear,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,spring,2018,1,No,2,Yes,Clear,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,spring,2018,1,No,3,Yes,Clear,9.305237,11.4635,43.6957,12.5223,82,1518,1600


In [145]:
bike['mnth'] = bike['mnth'].apply(month_map)  # Convert mnth from numeric to month


In [146]:
bike['weekday'] = bike['weekday'].apply(weekday_map) # Convert weekday number into actual weekday

In [147]:
bike.head()

Unnamed: 0,season,yr,mnth,holiday,weekday,workingday,weathersit,temp,atemp,hum,windspeed,casual,registered,cnt
0,spring,2018,Jan,No,Sun,No,Cloudy,14.110847,18.18125,80.5833,10.749882,331,654,985
1,spring,2018,Jan,No,Mon,No,Cloudy,14.902598,17.68695,69.6087,16.652113,131,670,801
2,spring,2018,Jan,No,Tue,Yes,Clear,8.050924,9.47025,43.7273,16.636703,120,1229,1349
3,spring,2018,Jan,No,Wed,Yes,Clear,8.2,10.6061,59.0435,10.739832,108,1454,1562
4,spring,2018,Jan,No,Thu,Yes,Clear,9.305237,11.4635,43.6957,12.5223,82,1518,1600
