# model_GS_Ridge

#### Grid_Search
https://scikit-learn.org/stable/modules/model_evaluation.html#scoring-parameter  
https://www.codexa.net/hyperparameter-tuning-python/

In [1]:
# !jupyter nbconvert --to python model_GS_Ridge.ipynb

[NbConvertApp] Converting notebook model_GS_Ridge.ipynb to python
[NbConvertApp] Writing 2604 bytes to model_GS_Ridge.py


In [1]:
# warningの無視
import warnings
warnings.filterwarnings('ignore')

In [1]:
import pickle

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import RidgeClassifier

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn.metrics import f1_score

%matplotlib inline

In [3]:
def importances(model):
    """ 変数重要度
    """
    importances = model.feature_importances_
    indices = np.argsort(importances)

    plt.barh(range(len(indices[-20:])), importances[indices[-20:]] , align='center')
    plt.yticks(range(len(indices[-20:])), feature_X[indices[-20:]])
    plt.title('decision tree feature importance')
    plt.xlabel('feature importance')
    plt.ylabel('variable')
    plt.show()

#### =========要変更===========

In [4]:
path = "../data/models/"
modelName= "model_GS_Ridge"

#### ========================

In [14]:
train_X=pd.read_pickle('../data/feature/train_X.pickle')
valid_X=pd.read_pickle('../data/feature/valid_X.pickle')
test_X=pd.read_pickle('../data/feature/test_X.pickle')

train_y=pd.read_pickle('../data/feature/train_y.pickle')
valid_y=pd.read_pickle('../data/feature/valid_y.pickle')
test_y=pd.read_pickle('../data/feature/test_y.pickle')

In [None]:
# データのスケーリング
scaler =StandardScaler()
train_X=scaler.fit_transform(train_X)
valid_X=scaler.transform(valid_X)

In [10]:
"""ハイパーパラメータの max_depth(木の深さ), 
# n_estimators(決定木の数)に関してグリッドサーチを行う
# グリッドサーチに使用するパラメータの値を用意
"""
seed=1
params = {"alpha": [25,10,4,2,1.0,0.8,0.5,0.3,0.2,0.1,0.05,0.02,0.01]}

#### =========要変更===========

In [11]:
model=RidgeClassifier(random_state=seed)

#### ========================

In [12]:
grid = GridSearchCV(estimator=model,
    param_grid=params,
    n_jobs=1,
    cv=KFold(5,shuffle=True, random_state=seed),
    scoring='f1',
    verbose=3,
    return_train_score=True)

In [13]:
grid.fit(train_X,  train_y)

Fitting 5 folds for each of 13 candidates, totalling 65 fits
[CV 1/5] END .......................................alpha=25; total time=   0.0s
[CV 2/5] END .......................................alpha=25; total time=   0.0s
[CV 3/5] END .......................................alpha=25; total time=   0.0s
[CV 4/5] END .......................................alpha=25; total time=   0.0s
[CV 5/5] END .......................................alpha=25; total time=   0.0s
[CV 1/5] END .......................................alpha=10; total time=   0.0s
[CV 2/5] END .......................................alpha=10; total time=   0.0s
[CV 3/5] END .......................................alpha=10; total time=   0.0s
[CV 4/5] END .......................................alpha=10; total time=   0.0s
[CV 5/5] END .......................................alpha=10; total time=   0.0s
[CV 1/5] END ........................................alpha=4; total time=   0.0s
[CV 2/5] END ...................................

GridSearchCV(cv=KFold(n_splits=5, random_state=1, shuffle=True),
             estimator=RidgeClassifier(random_state=1), n_jobs=1,
             param_grid={'alpha': [25, 10, 4, 2, 1.0, 0.8, 0.5, 0.3, 0.2, 0.1,
                                   0.05, 0.02, 0.01]},
             return_train_score=True, scoring='f1', verbose=3)

In [None]:
print(grid.best_estimator_)

In [14]:
# 最適解でモデルを作成
model = grid.best_estimator_

In [15]:
# モデルの保存
with open(path + modelName + '.pickle', mode='wb') as f:
    pickle.dump(model, f)

#### ========================

In [35]:
pred=model.predict(test_X)

In [3]:
print(f1_score(test_y, pred))

#### ========================

In [44]:
# 変数重要度を見るため、変数名を取り出しておく
feature_X = train_X.columns
feature_y = train_y.name

In [4]:
importances(model)