In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv("https://raw.githubusercontent.com/MicrosoftDocs/ml-basics/master/data/daily-bike-share.csv")

data.dtypes

instant         int64
dteday         object
season          int64
yr              int64
mnth            int64
holiday         int64
weekday         int64
workingday      int64
weathersit      int64
temp          float64
atemp         float64
hum           float64
windspeed     float64
rentals         int64
dtype: object

In [2]:
data.isnull().sum()

instant       0
dteday        0
season        0
yr            0
mnth          0
holiday       0
weekday       0
workingday    0
weathersit    0
temp          0
atemp         0
hum           0
windspeed     0
rentals       0
dtype: int64

In [3]:
data = data[['season'
, 'mnth'
, 'holiday'
, 'weekday'
, 'workingday'
, 'weathersit'
, 'temp'
, 'atemp'
, 'hum'
, 'windspeed'
, 'rentals']]

In [4]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [5]:
numeric_transformer = Pipeline(steps=[
       ('imputer', SimpleImputer(strategy='mean'))
      ,('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
       ('imputer', SimpleImputer(strategy='constant'))
      ,('encoder', OrdinalEncoder())
])


In [6]:
numeric_features = ['temp', 'atemp', 'hum', 'windspeed']

categorical_features = ['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']

preprocessor = ColumnTransformer(
   transformers=[
    ('numeric', numeric_transformer, numeric_features)
   ,('categorical', categorical_transformer, categorical_features)
]) 


In [7]:
print(numeric_features)
print(categorical_features)

['temp', 'atemp', 'hum', 'windspeed']
['season', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']


In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn import set_config

pipeline = Pipeline(steps = [
    ('preprocessor', preprocessor), 
    ('regressor',RandomForestRegressor())
])

set_config(display="diagram")
pipeline

In [9]:
from sklearn.model_selection import train_test_split

X = data.drop('rentals', axis = 1)
y = data['rentals']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)

In [10]:
rf_model = pipeline.fit(X_train, y_train)
print (rf_model)


Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer()),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  ['temp', 'atemp', 'hum',
                                                   'windspeed']),
                                                 ('categorical',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(strategy='constant')),
                                                                  ('encoder',
                                                                   OrdinalEncoder())]),
                        

In [11]:
from sklearn.metrics import r2_score
predictions = rf_model.predict(X_test)

print (r2_score(y_test, predictions))

0.7641520706966082


In [12]:
from sklearn.linear_model import LogisticRegression

regressors = [
    LogisticRegression(solver='newton-cg'),
    LogisticRegression(solver='sag', C=2.0),
    RandomForestRegressor(),
    RandomForestRegressor(n_estimators=10),
    RandomForestRegressor(max_depth=20)
]


for regressor in regressors:
    pipeline = Pipeline(steps = [
               ('preprocessor', preprocessor)
              ,('regressor',regressor)
           ])
    model = pipeline.fit(X_train, y_train)
    predictions = model.predict(X_test)
    print (regressor)
    print (r2_score(y_test, predictions))

LogisticRegression(solver='newton-cg')
0.41116736948730725




LogisticRegression(C=2.0, solver='sag')
0.4725279467172786
RandomForestRegressor()
0.766106351647742
RandomForestRegressor(n_estimators=10)
0.7459260184155883
RandomForestRegressor(max_depth=20)
0.7691789686379741
