In [1]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split, KFold


This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


# LightGBM

1. インスタンス
(X, y)

## ノーマル
2. fit()
3. importance
4. check_params
5. predict(self, test, test_id, name):

## k-hold
6. fit_k_hold(self, n_split=3):
7. pred_k_hold(self, test, test_id, name):

# 回帰問題|Regression

In [4]:
class model_learning(object):
    def __init__(self, X, y):
        self.y = y
        
        self.X = X
        
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X, self.y, test_size=0.3, random_state=0)
        self.lgb_train = lgb.Dataset(self.X_train, self.y_train)
        self.lgb_eval = lgb.Dataset(self.X_val, self.y_val, reference=self.lgb_train)
        
        self.params = {
            'task' : 'train',
            'boosting_type' : 'gbdt',
            'objective' : 'regression',
            'metric' : {'l2'},
            'importance_type':'gain',
            'num_leaves' : 30,
            'learning_rate' : 0.1,
            'feature_fraction' : 0.8,
            'bagging_fraction' : 0.8,
            'bagging_freq': 5,
            'zero_as_missing':True,
            'verbosity' : -1,
        }
        
    def fit(self):
        # train
        self.gbm = lgb.train(self.params,
                        self.lgb_train,
                        num_boost_round=100,
                        valid_sets=self.lgb_eval,
                        early_stopping_rounds=20)
        self.y_pred_lgb = self.gbm.predict(self.X_val, num_iteration=self.gbm.best_iteration)
        
    
    def importance(self):
        self.importance_ = pd.DataFrame({'特徴': self.X.columns, 
                                         'importance':self.gbm.feature_importance()}
                                       ).sort_values('importance', ascending=False)
        
        return self.importance_
        
    def check_params(self):
        return self.gbm.get_params()
    
    
    def predict(self, test, test_id, name):
        y_pred = self.gbm.predict(test, num_iteration=self.gbm.best_iteration)
        submit = pd.DataFrame({'id':test_id, 'keiyaku_pr':y_pred})
        submit.to_csv(name, index=None, header=None, sep='\t')
    
    
    def fit_k_hold(self, n_split=3):
        self.n_split = n_split
        kf = KFold(n_splits=self.n_split)
        
        self.score_list = []
        self.models = []
        
        for fold_, (train_index, val_index) in enumerate(kf.split(self.X, self.y)):
            X_train = self.X.iloc[train_index]
            y_train = self.y.iloc[train_index]
            
            X_val = self.X.iloc[val_index]
            y_val = self.y.iloc[val_index]
            
            lgb_train = lgb.Dataset(X_train, y_train)
            lgb_eval = lgb.Dataset(X_val, y_val, reference=self.lgb_train)
            gbm = lgb.train(self.params,
                        lgb_train,
                        num_boost_round=100,
                        valid_sets=lgb_eval,
                        early_stopping_rounds=20)
            
            y_pred_lgb = gbm.predict(X_val, num_iteration=gbm.best_iteration)
            
            ##評価関数
            #(y_val, y_pred_lgb)
            self.score_list.append()
            
            
            self.models.append(gbm)
            
        print("mean_score:", np.mean(self.score_list))
        
        self.mape = np.mean(self.score_list)
        
        
    def pred_k_hold(self, test, test_id, name):
        # testの予測
        test_pred = np.zeros((len(test), self.n_split))  # 行:len(test), 列:3のall zeroの配列を用意
        
        for fold_, gbm in enumerate(self.models):  # 学習ずみのmodelをgbmに入れる
            pred_ = gbm.predict(test, num_iteration=gbm.best_iteration)  # testの予測
            test_pred[:, fold_] = pred_  # １回目は0列目、2回目は1列目、2回目は3列目に格納
            
        pred = (np.mean(test_pred, axis=1) ).astype(int)  # 平均をとって、0と１に変換
        
        submit = pd.DataFrame({'id':test_id, 'pred':pred})
        submit.to_csv(name, index=None, header=None)

# 分類問題|Classifire