#### Case Study
1. Lakukan analisis pada data _bike sharing_ pada `bike-sharing.csv`, target column adalah `count`.
2. Gunakan model berikut sebagai percobaan awal:
 * Linear regression
 * SVM dengan kernel linear
 * MLP dengan hanya 1 _hidden layer_
3. Lakukan evaluasi dari ketiga model tersebut.
4. Apabila dibutuhkan, lakukan _feature selection_ sebelum melakukan pemodelan pada tiap-tiap metode.
#### Additional Task
5. Convert model terbaik dengan metode Serializable (Pickle atau Joblib) dan simpan model tersebut didalam folder /model.
6.Buat jupyter notebook baru / notebook kedua untuk mengimport model tersebut dan lakukan prediction (bisa menggunakan data X_test dari data yang dipakai di Notebook pertama) *Jika memungkinkan notebook kedua bisa diganti dengan membuat backend/app sederhana dengan menggunakan Flask/Fast/Streamlit API. *Jika tidak memungkinkan, bisa dipertimbangkan untuk digunakan menjadi projek akhir.


### 1.Lakukan analisis pada data bike sharing pada bike-sharing.csv, target column adalah count

In [28]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [42]:
data = pd.read_csv('..//..//data//input//bikesharing_data.csv')
data.head()

Unnamed: 0,datetime,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,2011-01-01 00:00:00,1,0,0,1,9.84,14.395,81,0.0,3,13,16
1,2011-01-01 01:00:00,1,0,0,1,9.02,13.635,80,0.0,8,32,40
2,2011-01-01 02:00:00,1,0,0,1,9.02,13.635,80,0.0,5,27,32
3,2011-01-01 03:00:00,1,0,0,1,9.84,14.395,75,0.0,3,10,13
4,2011-01-01 04:00:00,1,0,0,1,9.84,14.395,75,0.0,0,1,1


In [43]:
data.describe()


Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
count,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0,10886.0
mean,2.506614,0.028569,0.680875,1.418427,20.23086,23.655084,61.88646,12.799395,36.021955,155.552177,191.574132
std,1.116174,0.166599,0.466159,0.633839,7.79159,8.474601,19.245033,8.164537,49.960477,151.039033,181.144454
min,1.0,0.0,0.0,1.0,0.82,0.76,0.0,0.0,0.0,0.0,1.0
25%,2.0,0.0,0.0,1.0,13.94,16.665,47.0,7.0015,4.0,36.0,42.0
50%,3.0,0.0,1.0,1.0,20.5,24.24,62.0,12.998,17.0,118.0,145.0
75%,4.0,0.0,1.0,2.0,26.24,31.06,77.0,16.9979,49.0,222.0,284.0
max,4.0,1.0,1.0,4.0,41.0,45.455,100.0,56.9969,367.0,886.0,977.0


In [44]:
data= data.drop('datetime', axis=1)

In [72]:
target = 'count'
X = data.drop(target, axis=1)
y = data[target]





In [46]:
data

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered,count
0,1,0,0,1,9.84,14.395,81,0.0000,3,13,16
1,1,0,0,1,9.02,13.635,80,0.0000,8,32,40
2,1,0,0,1,9.02,13.635,80,0.0000,5,27,32
3,1,0,0,1,9.84,14.395,75,0.0000,3,10,13
4,1,0,0,1,9.84,14.395,75,0.0000,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...
10881,4,0,1,1,15.58,19.695,50,26.0027,7,329,336
10882,4,0,1,1,14.76,17.425,57,15.0013,10,231,241
10883,4,0,1,1,13.94,15.910,61,15.0013,4,164,168
10884,4,0,1,1,13.94,17.425,61,6.0032,12,117,129


In [47]:
#test pemodelan 80%
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2)

In [49]:
X

Unnamed: 0,season,holiday,workingday,weather,temp,atemp,humidity,windspeed,casual,registered
0,1,0,0,1,9.84,14.395,81,0.0000,3,13
1,1,0,0,1,9.02,13.635,80,0.0000,8,32
2,1,0,0,1,9.02,13.635,80,0.0000,5,27
3,1,0,0,1,9.84,14.395,75,0.0000,3,10
4,1,0,0,1,9.84,14.395,75,0.0000,0,1
...,...,...,...,...,...,...,...,...,...,...
10881,4,0,1,1,15.58,19.695,50,26.0027,7,329
10882,4,0,1,1,14.76,17.425,57,15.0013,10,231
10883,4,0,1,1,13.94,15.910,61,15.0013,4,164
10884,4,0,1,1,13.94,17.425,61,6.0032,12,117


In [50]:
y

0         16
1         40
2         32
3         13
4          1
        ... 
10881    336
10882    241
10883    168
10884    129
10885     88
Name: count, Length: 10886, dtype: int64

### Linear Regression

In [71]:
from sklearn.linear_model import LinearRegression

LinR = LinearRegression()
LinR.fit(X_train, y_train)

In [54]:
LinR.score(X_test, y_test)

1.0

In [55]:
LinR.coef_

array([ 1.09312861e-14, -1.84297022e-13,  1.19656473e-13,  6.26422547e-14,
       -2.85495516e-14,  2.02896460e-14, -2.02123604e-15, -7.55398637e-16,
        1.00000000e+00,  1.00000000e+00])

In [58]:
LinR.predict(X_test)

array([305., 268.,  93., ...,  71., 143.,   8.])

### SVM dengan kernel linear

In [60]:
from sklearn.svm import SVR

y_train1 = y_train.ravel()
SVM = SVR(kernel='linear', degree=3)
SVM.fit(X_train, y_train1)

In [61]:
SVM.score(X_test, y_test)

0.9999998285063599

In [64]:
SVM.coef_

array([[ 8.10629217e-04,  0.00000000e+00, -9.55115966e-04,
        -5.27459067e-05,  3.16175057e-04, -1.01504185e-04,
        -7.55772278e-05,  1.07819195e-05,  9.99528035e-01,
         9.99917780e-01]])

In [65]:
SVM.predict(X_test)

array([305.05567336, 268.06086729,  93.07680232, ...,  71.08655494,
       143.07630315,   8.09541438])

### MLP dengan hanya 1 hidden layer

In [66]:
from sklearn.neural_network import MLPRegressor

MLP = MLPRegressor(activation='tanh', hidden_layer_sizes=(100), max_iter=250)
MLP.fit(X_train, y_train)



In [67]:
MLP.predict(X_test)

array([301.00646263, 263.33746242,  89.01933968, ...,  69.63440233,
       139.9958078 ,   6.28474744])

In [68]:
MLP.score(X_test, y_test)

0.9900975007172478

### Scaler

In [74]:
from sklearn.preprocessing import MinMaxScaler
#define
scaler = MinMaxScaler()

#fit
scaler.fit(X_train, y_train)

#Transform
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

### 3.Evaluation


In [75]:
y_pred1 = LinR.predict(X_test_scaled)
y_pred2 = SVM.predict(X_test)
y_pred3 = MLP.predict(X_test_scaled)



### MAE (Mean Absolute Error)

In [76]:
from sklearn.metrics import mean_absolute_error

print ("Linear Regression:", mean_absolute_error(y_test, y_pred1))
print ("SVM:", mean_absolute_error(y_test, y_pred2))
print ("Neural Network:", mean_absolute_error(y_test, y_pred3))

Linear Regression: 189.66139956971313
SVM: 0.07028027993408802
Neural Network: 354.6771883430981


### MSE (Mean Squared Error)


In [81]:
from sklearn.metrics import mean_squared_error

print ("Linear Regression:", mean_squared_error(y_test, y_pred1))
print ("SVM:", mean_squared_error(y_test, y_pred2))
print ("Neural Network:", mean_squared_error(y_test, y_pred3))

Linear Regression: 68245.43294989587
SVM: 0.005550610796662817
Neural Network: 156835.2745283881




### RMSE

In [77]:
from sklearn.metrics import mean_squared_error

print ("Linear Regression:", mean_squared_error(y_test, y_pred1, squared=False))
print ("SVM:", mean_squared_error(y_test, y_pred2,squared=False))
print ("Neural Network:", mean_squared_error(y_test, y_pred3,squared=False))

Linear Regression: 261.23826854022724
SVM: 0.0745024214147622
Neural Network: 396.0243357779773


### MAPE (Mean Absolute Percentage Error)

In [84]:
from sklearn.metrics import mean_absolute_percentage_error

print("Linear Regression:", mean_absolute_percentage_error(y_test, y_pred1))
print("SVM:", mean_absolute_percentage_error(y_test, y_pred2))
print("Neural Network:", mean_absolute_percentage_error(y_test, y_pred3))

Linear Regression: 0.9985995102101889
SVM: 0.0042736609729552115
Neural Network: 8.856657674219477


### Serialization

In [85]:
LinR

In [86]:
import joblib

with open('model_svm.pkl','wb') as f:
    joblib.dump(LinR, f)