In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFE
from sklearn.metrics import r2_score

from statsmodels.api import OLS
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm


from scipy import stats

In [None]:
data = pd.read_csv("day.csv") #mention your path here; in my case the file is in the same directory

In [None]:
data.head()

In [None]:
data.describe()

In [None]:
data.shape

In [None]:
# Renaming column names for convenience

data.rename(columns = {"mnth" : "Month", "weathersit" : "Weather_condition", "hum" : "humidity", "cnt" : "Count"}, inplace=True)

In [None]:
data.head()

In [None]:
data.tail(20) # Checking data consistency

In [None]:
# Let's check for any null columns
data.isna().sum()

In [None]:
# It's good that we have data which has no null value

## Cleaning the data and running some Quality Checks 

In [None]:
# First we will encode certain numerics so that we can undrstand and interprete those variables

In [None]:
data[["weekday", "workingday"]]

In [None]:
# 0 Means that the day is not a working day(weekends and holdiays) and 1 means that the day is a working day

In [None]:
# From the information above we can determine the encodings for weekday and they are as follows
# The encodings for month is given to us in data dictionary

data["Month"] = data.Month.replace([1,2,3,4,5,6,7,8,9,10,11,12], ["January", "February", "March", "April", "May",
                                                                 "June", "July", "August", "September", "October",
                                                                 "Novermber", "December"])

data["weekday"] = data.weekday.replace([0,1,2,3,4,5,6], ["Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
                                                        "Sunday", "Monday"])

In [None]:
data[["dteday", "Month", "weekday"]]

In [None]:
# Let's run quality checks on month and weekday 
# Since there are no null values we should some obivous patterns such as the number of unique months cannot exceed 12 
# The number of weekdays cannot exceed 6 as we have encoded tuesday as 0

print(f"Unique months are : {data.Month.unique()} \n")

print(f"Number of unique months are :{data.Month.nunique()}")

In [None]:

print(f"Unique weekdays are : {data.weekday.unique()} \n")

print(f"Number of unique weekdays are :{data.weekday.nunique()}")

In [None]:
# We will drop all our uncessary columns at the end

In [None]:
# Now we will encode Weather_condition and season and for this we will refer our data dictionary
# This will make our data look a little cleaner

### Deciphering "Weather_condition" can be a bit tricky; according to the data dictionary provided to us the following is the decoded information
 - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
- 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
- 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
- 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog

### Just by reading it we can coin our own adjectives to better understand the data in one word
- 1 : Ideal
- 2 : Misty
- 3 : snow_rain
- 4 : heavy_snow_rain

### For seasons too we will use our data dictionary to coin them
- 1:spring 
- 2:summer
- 3:fall
- 4:winter

In [None]:
print(data.Weather_condition.unique())
print(data.season.unique())

In [None]:
# It looks like our weather_condition has only recorded 3 types of weather

data["Weather_condition"] = data["Weather_condition"].replace([1,2,3,4], ["ideal", "misty", "snow_rain", "heavy_snow_rain"])
data["season"] = data["season"].replace([1,2,3,4], ["spring", "summer", "fall", "winter"])

In [None]:
print(f"Unique Weather_conditions are : {data.Weather_condition.unique()} \n")

print(f"Number of unique Weather_conditions are :{data.Weather_condition.nunique()}")

In [None]:
print(f"Unique seasons are : {data.season.unique()} \n")

print(f"Number of unique seasons are :{data.season.nunique()}")

In [None]:
data[["Weather_condition", "season"]]

In [None]:
data.head()

In [None]:
# we will run some quality checks for our numerical columns as well
# Although there can exist negative temperatures we wanted to see if our data has any negative values 
data[data["temp"] <= 0.0]

In [None]:
data[data["atemp"] <= 0.0]

In [None]:
data[data["humidity"] <= 0.0]

In [None]:
data.loc[data["humidity"] == 0.0, "humidity"] = data["humidity"].median()

In [None]:
data[data["humidity"] <= 0.0]

In [None]:
# Windspeed cannot be negative:
data[data["windspeed"] <= 0.0]

### We ran this code just to see some varaitions of the recorded data and one can see that humidity is 0 which is not possible given the weather condtions on earth, so we can either omit the row or can impute the row i.e is fill that value with the median of that column and since it is only row we will rather impute it

### Resource : https://www.google.com/search?client=firefox-b-d&q=can+humidity+be+zero

### We have finished running  quality checks and now we will eliminate redundant columns along with columns which will cause errors in our model

### We will drop the columns which we dont need
- instant : This is just the record number and clearly is of no use
- dteday : Most of the information from this column have been derived in other columns, so this holds little to no value and would be redundant
- casual & registered : We will assume we dont know these values while the model predicts on new data and our target -value is simply the sum of these two columns so when we include these in our model, the model will memorize these value and fail to generalize on the given data, in simple term these two values will leak information of our targeted variable



In [None]:
cols_to_drop = ["instant", "dteday", "casual", "registered"]

data.drop(cols_to_drop, axis = 1, inplace = True)
data.head()

In [None]:
data

## Data visualization (EDA)
- Check for outliers
- Check for some collinearity
- Identify and deduce some patterns which helps us to understand the data

In [None]:
# Let's define a function for our uni-variate, bi-variate and multi-variate(More than two) analysis

def plots(x = None, y = None, hues = None, rotation = 0, bar_plot = False, box_plot = False, count_plot = False,
         figsisze = (17, 5), fontsize = 10, estimator = np.mean, dataframe = data):
    
    if bar_plot:
        sns.barplot(x = x, y = y, hue = hues, data = dataframe, estimator=estimator)
        plt.gcf().set_size_inches(figsisze)
        plt.title(label = f"Bar plot between {x} and {y}",fontdict = {"fontsize" : fontsize})
        plt.xlabel(xlabel = str(x), fontsize = fontsize)
        plt.ylabel(ylabel = str(y), fontsize = fontsize)
        plt.xticks(fontsize = fontsize, rotation = rotation)
        plt.yticks(fontsize = fontsize)
    
    elif box_plot :
        sns.boxplot(x = x, y = y, hue = hues, data = dataframe)
        plt.gcf().set_size_inches(figsisze)
        plt.title(label = f"Box plot between {x} and {y}",fontdict = {"fontsize" : fontsize})
        plt.xlabel(xlabel = str(x), fontsize = fontsize,rotation = rotation)
        plt.ylabel(ylabel = str(y), fontsize = fontsize)
        plt.xticks(fontsize = fontsize, rotation = rotation)
        plt.yticks(fontsize = fontsize)
 
    elif count_plot:
        sns.countplot(x = x, hue = hues, data = dataframe)
        plt.gcf().set_size_inches(figsisze)
        plt.title(label = f"Count plot between {x} and {y}",fontdict = {"fontsize" : fontsize})
        plt.xlabel(xlabel = str(x), fontsize = fontsize, rotation = rotation)
        plt.ylabel(ylabel = str(y), fontsize = fontsize)
        plt.xticks(fontsize = fontsize, rotation = rotation)
        plt.yticks(fontsize = fontsize)
    
    else :
        print("Please set the Boolean value to True for any of the following plots : \n  [bar_plot, box_plot, count_plot]")


## Checking for outliers

### Since we have only four numerical columns it will be a short work considering that there wont be annoying outliers

In [None]:
cols_for_boxplot = ["temp", "atemp", "humidity", "windspeed"]

plt.gcf().set_size_inches(10,9)

for i,j in enumerate(cols_for_boxplot):
    plt.subplot(2,2,i+1)
    sns.boxplot(data = data, y = j)

## Observations :
- There arent any annoying outlier so no need to change or delete any rows
- temp and atemp seems to be similar

## Checking for some collinearity

In [None]:
# We will use pairplot to achieve this along with heatmaps

sns.pairplot(data)

In [None]:
sns.heatmap(data.corr(), annot = True)
plt.gcf().set_size_inches(10,9)

## Observation :
- temp and atemp shows high collinearity, this could be a problem while building our model
- Also there exists some linear relationship between temp and cnt, atemp and cnt
- A positive corelation can be observed between yr and cnt as well

In [None]:
# Temp vs count and windspeed vs Count

plt.figure(figsize=(10,5))

plt.subplot(121)
sns.regplot(x = data["Count"], y = data["temp"], line_kws={"color" : "r"})

plt.subplot(122)
sns.regplot(x = data["Count"], y = data["windspeed"], line_kws={"color" : "r"})

In [None]:
sns.regplot(x = data["Count"], y = data["temp"], line_kws={"color" : "r"})

## Observations:
- A linear relationship can be seen; a positive one in the first plot and a negative one in the second plot
- Temp has a postive linear relationship with Count which can be decued from the plot and ehatmap above
- Windspeed has a negative moderately linear relationship  with Count

In [None]:
sns.regplot(x = data["Count"], y = data["humidity"], line_kws={"color" : "r"})

## Observations:
- We can see a moderately negative correlation between humidity and Count

## Let's visualise our data using bivariate and multi variate analysis

In [None]:
data.columns

In [None]:
x = "season"
y = "Count"

plots(x = x, y = y, bar_plot = True)

## Observations :
- Fall and summer shows the highest count 
- This could be due to the summer vacation which typicall lasts for 2 and a half months
- Some students also prefer riding bikes to school which can explain the rise in count in fall

In [None]:
x = "yr"
y = "Count"

plots(x = x, y = y, bar_plot = True, fontsize=15)
plt.xticks(ticks = [0,1], labels = ["2018", "2019"])
plt.show()

## Observations :
- The business is growing for Boombikes in 2019 which is a good indicator for the company

In [None]:
x = "Month"
y = "Count"

plots(x = x, y = y, bar_plot = True)

## Observations :
- summer runs from June 1 to August 31; fall (autumn) runs from September 1 to November 30 which explains the higher bike count in those months
- resource :https://www.google.com/search?client=firefox-b-d&q=summer+and+fall+in+US

In [None]:
x = "weekday"
y = "Count"
plt.subplot(121)
plots(x = x, y = y, bar_plot = True)

plt.subplot(122)
plots(x = x, y = y, box_plot = True)

## Observations:
- It seems that the count is evenly distributed accross the days of the week
- no specific apttern observed

In [None]:
x = "workingday"
y = "Count"

plots(x = x, y = y, bar_plot = True, fontsize=15, estimator=np.median)


## Observations:
- No pattern can be uncovered from this data

In [None]:
x = "holiday"
y = "Count"

plots(x = x, y = y, bar_plot = True, fontsize=15, estimator=np.median)


## Observations :
- People who tend to be working tend have a higher bike count

In [None]:
x = "Weather_condition"
y = "Count"

plots(x = x, y = y, bar_plot = True)


## Observations :
- People tend to ride bikes when the weather is misty or ideal which makes sense given that the weather condition palys a mojor role in deciding to whether ride a bike or not

In [None]:
x = "season"
y = "Count"
hue = "yr"
plots(x = x, y = y, bar_plot = True, hues = hue, fontsize=15)
labels = plt.legend()
labels.get_texts()[0].set_text("2018")
labels.get_texts()[1].set_text("2019")

## Observations:
- As we have already seen that there is year on yeat growth and this is evident across various seasons

In [None]:
x = "Month"
y = "Count"
hue = "yr"
plots(x = x, y = y, bar_plot = True, hues = hue, fontsize=15, rotation = 45)

labels = plt.legend()
labels.get_texts()[0].set_text("2018")
labels.get_texts()[1].set_text("2019")

## Observations:
- This plot shows the same results of year on year growth and the growht hasnt dipped when compared to 2019

In [None]:
x = "workingday"
y = "Count"
hue = "yr"
plots(x = x, y = y, bar_plot = True, hues = hue)

labels = plt.legend()
labels.get_texts()[0].set_text("2018")
labels.get_texts()[1].set_text("2019")



## Observations:
- The same can be said about this plot

In [None]:
x = "Weather_condition"
y = "Count"
hue = "yr"
plots(x = x, y = y, bar_plot = True, hues = hue, fontsize=15)

labels = plt.legend()
labels.get_texts()[0].set_text("2018")
labels.get_texts()[1].set_text("2019")

### We have finished with our EDA and now let's beging with building and preparing our data for the same

## Pre-processing data

In [None]:
data.head()

In [None]:
data.shape

### Creating dummy variables for categorical data :- season, Month, weekday and Weather_condition
- We do this to improve the computational time and accuracy of our model


In [None]:
dummy_season = pd.get_dummies(data["season"], drop_first = True)

dummy_Month = pd.get_dummies(data["Month"], drop_first = True)

dummy_weekday = pd.get_dummies(data["weekday"], drop_first = True)

dummy_Weather_condition = pd.get_dummies(data["Weather_condition"], drop_first = True)

In [None]:
dummy_season



In [None]:
dummy_Month

In [None]:
dummy_weekday

In [None]:
dummy_Weather_condition

In [None]:
# Let's concatenate this to our original data but we will spare changes in the orignal data
# We will create a new variable which stores all the data

data1 = pd.concat([data, dummy_season, dummy_Month, dummy_weekday, dummy_Weather_condition], axis = 1)
data1.head()

In [None]:
# It's time to drop the original columns

data1.drop(["season", "Month", "weekday", "Weather_condition"], axis = 1, inplace = True)

In [None]:
data1.columns

In [None]:
data1.head()

In [None]:
plt.figure(figsize = (25,20))

sns.heatmap(data1.corr(), annot=True)

## Observations:
- Temp and atemp are almost perfectly correlated almost and each of them have the same correlation with  our targeted variable Count which means that one of the two is enough to represent temperature
- Yr is also positively correalated with Count as we saw an increase in count as year progresses



In [None]:
data1.drop("atemp", axis= 1, inplace=True)

### Feature Scaling our numerical data:- temp, atemp, humidity, Count

In [None]:
# Let's split the data into training and test set before we scale otherwise we risk leaking data into our test set

In [None]:
train_df, test_df = train_test_split(data1, train_size = 0.7, test_size=0.3, random_state=100)

In [None]:
print(train_df.shape)
print(test_df.shape)

In [None]:
plt.figure(figsize = (25,20))

sns.heatmap(train_df.corr(), annot=True)

## Observations:
- We can see high negavtive and positive correlation
- We need to be carefull whiloe buidling our models although majority if the data seems to have low correlation;


In [None]:
scaler = MinMaxScaler() # We will use Normalization to scale the features

In [None]:
numerical_vars = ["temp", "humidity", "windspeed", "Count"]

train_df[numerical_vars] = scaler.fit_transform(train_df[numerical_vars])

In [None]:
train_df[numerical_vars].describe()

#### All the values of our numerical variables are scaled from 0 to 1

In [None]:
train_df.head()

## Data Modelling

### Splitting the data into X_train and y_train



In [None]:
y_train = train_df.pop("Count")
X_train = train_df

In [None]:
print(y_train.shape)
print(X_train.shape)

In [None]:
X_train.head()

### We will first use Recursive feature elimination(RFE) and from there we will use our manual approach to eliminate other features

In [None]:
# Let's define some functions so we dont have to write the same line of code again and again

def VIF_score(X):
    vif = pd.DataFrame()
    vif["features"] = X.columns
    vif["VIF"] = [round(variance_inflation_factor(X.values, i),2) for i in range(X.shape[1])]
    vif = vif.sort_values(by = "VIF", ascending=False)
    return vif



def OLS_model(X_train_data, y_train = y_train, return_model = False):
    
    X_train_sm = sm.add_constant(X_train_data)
    ols = OLS(y_train, X_train_sm)
    model = ols.fit()
    
    if return_model:
        return model
    else:
        return model.summary()

### For RFE we need to use Sklearn's regression model and then proceed with the estimator

In [None]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
rfe = RFE(regressor, n_features_to_select = 15, step = 0.35)
rfe_model = rfe.fit(X_train, y_train)

In [None]:
rfe_data = pd.DataFrame()
rfe_data["features"] = X_train.columns
rfe_data["Support"] = rfe_model.support_
rfe_data["Rankings"] = rfe_model.ranking_
rfe_data.sort_values(by = "Rankings")

### The features above with rank 1 and support true are selected by our RFE which is best suited for our regression but we will check the p-values and vif scores to see whether they are indeed the best or a manual approach is needed 

In [None]:
X_train.columns[rfe_model.support_] # These are features selected by RFE

In [None]:
X_train_rfe_cols = X_train.columns[rfe_model.support_]


In [None]:
X_train_rfe = X_train[X_train_rfe_cols]
X_train_rfe.head()

In [None]:
OLS_model(X_train_data = X_train_rfe)


In [None]:
VIF_score(X = X_train_rfe)

## Observations and Intepretation:
- First we will emphasize features having high p-value and eliminate them
- Eliminating certain features may alter the VIF value of some features
- After having the p-values close to zero or atleast less than 5% (0.05) we can proceed elimianting features with high VIF values

### In a broad sense we will combine RFE and manual  approach to give us the best model

## Model 1

In [None]:
# Eliminating holiday due to high p-value

X_train_rfe = X_train_rfe.drop("holiday", axis = 1)
X_train_rfe.head()

In [None]:
OLS_model(X_train_data=X_train_rfe)

In [None]:
VIF_score(X = X_train_rfe)

## Model 2

In [None]:
# Eliminating humidity due to high VIf value
X_train_rfe = X_train_rfe.drop("humidity", axis = 1)
X_train_rfe.head()

In [None]:
OLS_model(X_train_data=X_train_rfe)

In [None]:
VIF_score(X = X_train_rfe)

## Model 3

In [None]:
# Removing workingday due to its high VIF value
X_train_rfe = X_train_rfe.drop("workingday", axis = 1)
X_train_rfe.head()

In [None]:
OLS_model(X_train_data=X_train_rfe)

## Model 4

In [None]:
# Removing Monday due to its high p value

In [None]:
X_train_rfe = X_train_rfe.drop("Monday", axis = 1)
X_train_rfe.head()

In [None]:
VIF_score(X = X_train_rfe)

## Inference:
- The p-values of all the features are close to zero and the VIFs are very well within 5 so we will consider Model 4 as our final model and proceed with the same
- Also the variables are able to explain around 83% of variance which is a good sign

In [None]:
plt.figure(figsize=(20,15))
sns.heatmap(X_train_rfe.corr(), annot=True)

## Obsrvations:
- There are no sogns of high negative or positiv correlation, the benchmark for such inference is as follows
- We are considering anything beyond 60% as high correlation

In [None]:
lr_model = OLS_model(X_train_data=X_train_rfe, return_model=True)

In [None]:
lr_model.params

### We will have the following equation:
Y(COUNT) = $ 0.299477 + (yr * 0.236298) + (temp * 0.377143) - (windspeed * 0.154046) - (spring * 0.105047) + (winter * 0.047588) - (December * 0.037435) - (January * 0.058615) + (September * 0.062998) - (Tuesday * 0.047060) - (Misty * 0.077965) - (snow_rain * 0.293356) $

In [None]:
# Before we go on and test our model on the test set, we need to validate some assumptions

## Assumptions of Linear Regressions are as follows :
- There should exist some lienar relationship between X and Y
- The error terms should be normall distributed
- Independence of error terms
- The error terms must show constant variation (Homoscedasticity)

### 1) We already proved that there exists some linear relationship earlier on in the notebook


### 2) The error terms should be normally distributed around 0

In [None]:
X_train_rfe_sm = sm.add_constant(X_train_rfe)
y_train_pred = lr_model.predict(X_train_rfe_sm)
residuals = y_train - y_train_pred

In [None]:
sns.histplot(residuals, kde = True)
plt.title("Distribution of error terms")
plt.xlabel("Error terms")

plt.gcf().set_size_inches(10,5)

In [None]:
np.std(residuals)

In [None]:
np.mean(residuals)

## Observations:
- The mean of the residuals from the graph and from the calculation below validate our second assumptions

## The error terms must show constant variation (Homoscedasticity)

In [None]:
sns.regplot(x = y_train_pred, y = residuals, line_kws = {"color" : "red"})
plt.title("Residuals vs predicted values(y_train_pred)")
plt.xlabel("y_train_pred", fontdict={"fontsize" : 14})
plt.ylabel("Residuals", fontdict={"fontsize" : 14})
plt.gcf().set_size_inches(10,8)

## Observations:
- No visibile pattern can be noticed and this indicates that there is now lower or higher concentration of points and the residuals are evenly distributed
- The graph above proves our 3rd and 4th assumption of linear regressions



In [None]:
lr_model.params

## Let's interprete these features one by one
### -  Yr - For every increase in variable Yr the Count increases by 0.236298 units
### -  temp - For every increase in variable Temp the Count increases by 0.377143 units
### -  windspeed - For every increase in variable windspeed the Count decreases by 0.154046 units
### -  spring - For every increase in variable spring the Count decreases by 0.105047 units
### -  winter - For every increase in variable winter the Count increases by 0.047588 units
### -  December - For every increase in variable December the Count decreases by 0.037435 units
### -  January -  For every increase in variable January the Count decreases by 0.058615 units
### -  September - For every increase in variable September the Count increases by 0.062998 units
### -  Tuesday - For every increase in variable March the Count increases by 0.047060 units
### -  misty- For every increase in variable Ideal the Count increases by 0.077965 units
### -  snow_rain - For every increase in variable snow_rain the Count decreases by 0.293356 units


In [None]:
# The variables matches with our analysis in EDA, as the temp decreases there is a decrease in rentals and as the temp increases we see a increase in rentals
# the weather condition "snow_rain" also states tells us that people prefer riding bikes when the weather is Ideal and Misty
# We also saw a moderately negative linear relationship of windspeed vs count and this model also conveys the same
# For months we can see that w.r.t September the count actually shows signs of decrease for January and february which also compliments our analysis in EDA 
# The yr variable also shows a positive relationship with count; the same which we deduced
# Our model considering the variables should be able to explain the data well cause it compliments our analysis in EDA

### We have proved all the assumption of Linear Regression and its time to actually test our model on the test set and see how it performs

## Evaluating our Model on the Test set
### -  First we will scale all the values using the object (scaler) whci hwe used to scale the features of our training            set
### -  We will split the test set into X_test and y_test
### -  Then we will remove all the unecessary columns from our X_test and perform our final pre processing on                X-test
### -  And finally we make our model predict the value based on the data(X_test) supplied

### Scaling our Test set

In [None]:
test_df



In [None]:
test_df[numerical_vars] = scaler.transform(test_df[numerical_vars])
test_df

### Splitting our test set into X_test and y_test

In [None]:
y_test = test_df.pop("Count")
X_test = test_df

### Dropping uncessary columns

In [None]:
X_train_rfe.columns

In [None]:
X_test = X_test[X_train_rfe.columns]
X_test

### Final Pre-processing

In [None]:
X_test_sm = sm.add_constant(X_test)

### Predicting and evaluating the score

In [None]:
y_pred_test = lr_model.predict(X_test_sm)

In [None]:
r2_score(y_true=y_test, y_pred = y_pred_test)

In [None]:
print(f"The accuracy of our model is {round(r2_score(y_true=y_test, y_pred = y_pred_test), 2) * 100}%")

In [None]:
sns.regplot(x = y_test, y = y_pred_test, line_kws = {"color" : "red"})
plt.gcf().set_size_inches(10,4)
plt.title("Actual Values vs Preicted Values")
plt.xlabel("True values")
plt.ylabel('Predicted values')

## Observations:
- Our model was able to predict well, that means the model was able to generalise on the data with 80% accuracy
- Now we can use this model to conclude and achieve our buisness goals

In [None]:
lr_model.params

In [None]:
# The top three significant variables are: yr, temp and snow_rain


## Conclusion:
- BoomBikes already had a market for Bike rental which is explained by "const" which is 0.2999477 which is great for the company, so they can expect growth despite little investment, the reason for this trend could be that people are trying to reduce their carbon footprint and were encouraged to rather cycle; but there are sevral other factors as well like having fun, riding bikes to school, mountain biking wheere in most cases people tend to rent theri bikes because it porves to be cheapre to them cause they dont have to incur transport charges along with other miscellaneous charges with comes along you can refer the article below which goes along with our timeline: https://www.forbes.com/sites/timnewcomb/2020/07/13/amidst-cycling-surge-sport-of-mountain-biking-seeing-increased-sales-trail-usage/?sh=2c8fcbae3ddf

- Boom bikes should expand their business in the month of September as it shows the highest level of bikes rented

- Boom Bike should also see that they hand out lucrative offers when the weather conditions are Ideal and temperatures slightly on the higher end cause according to the data and model this will yield maximum rental bookings, the model also concludes that  BoomBikes should also focus more offers and equipments during winters as it may help drive the rental up in those months.

- From the EDA it was clear that the Fall, summer and winter had comparitively higher rental bookings, so Boom bikes must consider investing in its offers and promotion during these seasons and the months falling in these seasons.

- Last but not the least BoomBikes should increase their investments every year as we can saw that the company has the potential to grow maybe not in the pandemics as people were advised to  quarantine themselves but post covid where people are more likely and eager to enjoy the outdoors and rides, the company can see a tremendous growth coupled with the scenarios above they can successfully attract more customers.