<a href="https://colab.research.google.com/github/amanichivilkar/Bike-Sharing-Demand-Prediction/blob/main/Amani_Chivilkar_Bike_Sharing_Demand_Prediction_Capstone_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# <b> Project Title : Seoul Bike Sharing Demand Prediction </b>
----
### Currently Rental bikes are introduced in many urban cities for the enhancement of mobility comfort. It is important to make the rental bike available and accessible to the public at the right time as it lessens the waiting time. Eventually, providing the city with a stable supply of rental bikes becomes a major concern. The crucial part is the prediction of bike count required at each hour for the stable supply of rental bikes.


### **Import of Python Libraries and Defining Function**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import seaborn as sns 
%matplotlib inline
sns.set_theme(style="whitegrid")
import warnings
warnings.filterwarnings('ignore')
from datetime import datetime
from datetime import timedelta
from dateutil.relativedelta import relativedelta

#Scoring
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

# ML
from sklearn.svm import SVR
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import BaggingRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import StackingRegressor  
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet


In [None]:
df=pd.read_csv("/content/drive/MyDrive/data/SeoulBikeData.csv", encoding ="ISO-8859-1")

In [None]:
%pip install mlxtend --upgrade

Collecting mlxtend
  Downloading mlxtend-0.19.0-py2.py3-none-any.whl (1.3 MB)
[K     |████████████████████████████████| 1.3 MB 5.3 MB/s 
Installing collected packages: mlxtend
  Attempting uninstall: mlxtend
    Found existing installation: mlxtend 0.14.0
    Uninstalling mlxtend-0.14.0:
      Successfully uninstalled mlxtend-0.14.0
Successfully installed mlxtend-0.19.0


In [None]:
!pip3 install lime

Collecting lime
  Downloading lime-0.2.0.1.tar.gz (275 kB)
[?25l[K     |█▏                              | 10 kB 26.9 MB/s eta 0:00:01[K     |██▍                             | 20 kB 13.5 MB/s eta 0:00:01[K     |███▋                            | 30 kB 10.2 MB/s eta 0:00:01[K     |████▊                           | 40 kB 9.1 MB/s eta 0:00:01[K     |██████                          | 51 kB 5.2 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 5.7 MB/s eta 0:00:01[K     |████████▎                       | 71 kB 5.6 MB/s eta 0:00:01[K     |█████████▌                      | 81 kB 6.3 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 4.8 MB/s eta 0:00:01[K     |███████████▉                    | 102 kB 5.2 MB/s eta 0:00:01[K     |█████████████                   | 112 kB 5.2 MB/s eta 0:00:01[K     |██████████████▎                 | 122 kB 5.2 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 5.2 MB/s eta 0:00:01[K     |████████

In [None]:
import lime
import lime.lime_tabular

In [None]:
pip install shap

Collecting shap
  Downloading shap-0.40.0-cp37-cp37m-manylinux2010_x86_64.whl (564 kB)
[?25l[K     |▋                               | 10 kB 24.5 MB/s eta 0:00:01[K     |█▏                              | 20 kB 14.5 MB/s eta 0:00:01[K     |█▊                              | 30 kB 10.2 MB/s eta 0:00:01[K     |██▎                             | 40 kB 9.0 MB/s eta 0:00:01[K     |███                             | 51 kB 5.6 MB/s eta 0:00:01[K     |███▌                            | 61 kB 5.6 MB/s eta 0:00:01[K     |████                            | 71 kB 5.7 MB/s eta 0:00:01[K     |████▋                           | 81 kB 6.4 MB/s eta 0:00:01[K     |█████▏                          | 92 kB 6.3 MB/s eta 0:00:01[K     |█████▉                          | 102 kB 5.3 MB/s eta 0:00:01[K     |██████▍                         | 112 kB 5.3 MB/s eta 0:00:01[K     |███████                         | 122 kB 5.3 MB/s eta 0:00:01[K     |███████▌                        | 133 kB 5.3 MB/s e

In [None]:
import shap

In [None]:
# This gives a dataframe with lable Encoder Applied on the categorical featuure
from sklearn.preprocessing import LabelEncoder
class MultiColumnLabelEncoder:
    def __init__(self,columns = None):
        self.columns = columns # array of column names to encode

    def fit(self,X,y=None):
        return self # not relevant here

    def transform(self,X):
        '''
        Transforms columns of X specified in self.columns using
        LabelEncoder(). If no columns specified, transforms all
        columns in X.
        '''
        output = X.copy()
        if self.columns is not None:
            for col in self.columns:
                output[col] = LabelEncoder().fit_transform(output[col])
        else:
            for colname,col in output.iteritems():
                output[colname] = LabelEncoder().fit_transform(col)
        return output

    def fit_transform(self,X,y=None):
        return self.fit(X,y).transform(X)

In [None]:
# PLOT weight of the features for decision tree and random forest
def weight(ml):  
     ml.fit(x_train,y_train)
     y_pred = ml.predict(x_test)

     # plot
     df5=pd.DataFrame({"feature":independent,"weight":ml.feature_importances_}).sort_values('weight',ascending=False).head(10)

     plt.figure(figsize=(10,5))
    
     sns.barplot(x=df5.weight,y=df5.feature).set(title=f'Top 10 Important features for {ml}')
     

In [None]:
# Get the LIME explanation by giving ml modle and sample number to the funtion "explanation"

def explanation(ml,obs_number):
    ml.fit(x_train,y_train)
    y_pred = ml.predict(x_test)

    explanation = explainer.explain_instance(x_test.values[obs_number], ml.predict, num_features=14)
    print(f'predicted_value={y_pred[obs_number]}')
    print(f'Actual_value={y_test.values[obs_number]}')
    explanation.show_in_notebook(show_all=False)