In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split




df = pd.read_csv('data.csv')
target = 'count'
feature = df.drop(['date',target],axis=1).columns

train, test = train_test_split(df,random_state=2,train_size=0.8)
train.shape,test.shape

((1086, 6), (272, 6))

In [2]:
train, val = train_test_split(train,random_state=2,train_size=0.8)
train.shape,val.shape

((868, 6), (218, 6))

In [3]:
X_train = train[feature]
y_train = train[target]
X_val = val[feature]
y_val = val[target]
X_test = test[feature]
y_test = test[target]

In [4]:
from xgboost import XGBRegressor
from sklearn.metrics import r2_score
xgboost_model = XGBRegressor()
xgboost_model.fit(X_train,y_train)


#XGBregression 평가지표 
print('훈련 평가지표')
print('xgboost_model r2 : ',r2_score(y_train,xgboost_model.predict(X_train)))
print('검증 평가지표')
print('xgboost_model r2 : ',r2_score(y_val,xgboost_model.predict(X_val)))

훈련 평가지표
xgboost_model r2 :  0.9968320696133938
검증 평가지표
xgboost_model r2 :  0.5921421838124188


In [6]:
!pip install lightgbm
from lightgbm import LGBMRegressor
lgb_model = LGBMRegressor()
lgb_model.fit(X_train, y_train)

print('훈련 평가지표')
print('lgb_model r2 : ',r2_score(y_train,lgb_model.predict(X_train)))
print('검증 평가지표')
print('xgboost_model r2 : ',r2_score(y_val,lgb_model.predict(X_val)))

Collecting lightgbm
  Downloading lightgbm-3.2.1-py3-none-win_amd64.whl (1.0 MB)
Installing collected packages: lightgbm
Successfully installed lightgbm-3.2.1
훈련 평가지표
lgb_model r2 :  0.907284767455594
검증 평가지표
xgboost_model r2 :  0.6473413429090928


In [7]:
from sklearn.model_selection import GridSearchCV


train, test = train_test_split(df,random_state=40,train_size=0.8)

X_train = train[feature]
y_train = train[target]
X_test = test[feature]
y_test = test[target]


params = {"learning_rate": (0.05, 0.10, 0.15),
          "max_depth": [ 3, 4, 5, 6, 8],        
          "min_child_weight": [ 1, 3, 5, 7],                        
          "gamma":[ 0.0, 0.1, 0.2]}

clf = GridSearchCV(estimator=xgboost_model, 
                   param_grid=params,
                   cv=3,
                   scoring='r2',
                   verbose=0, 
                   n_jobs=-1                
                  )

clf.fit(X_train, y_train)
print("Best parameters:", clf.best_params_)

grid_model = clf.best_estimator_
print('훈련 평가지표')
print('xgboost_model r2 : ',r2_score(y_train,grid_model.predict(X_train)))
print('테스트 평가지표')
print('xgboost_model r2 : ',r2_score(y_test,grid_model.predict(X_test)))

Best parameters: {'gamma': 0.0, 'learning_rate': 0.05, 'max_depth': 4, 'min_child_weight': 7}
훈련 평가지표
xgboost_model r2 :  0.775150174806909
테스트 평가지표
xgboost_model r2 :  0.6934450357209212


In [9]:
train, test = train_test_split(df,random_state=40,train_size=0.8)

X_train = train[feature]
y_train = train[target]
X_test = test[feature]
y_test = test[target]


params = {
    'num_leaves': [7, 14, 21, 28, 31, 50],
    'learning_rate': [0.1, 0.03, 0.003],
    'max_depth': [-1, 3, 5],
    'n_estimators': [50, 100, 200, 500]
}

clf = GridSearchCV(estimator=lgb_model, 
                   param_grid=params,
                   cv=3,
                   scoring='r2',
                   verbose=0, 
                   n_jobs=-1                
                  )

clf.fit(X_train, y_train)
print("Best parameters:", clf.best_params_)

grid_model = clf.best_estimator_
print('훈련 평가지표')
print('xgboost_model r2 : ',r2_score(y_train,grid_model.predict(X_train)))
print('테스트 평가지표')
print('xgboost_model r2 : ',r2_score(y_test,grid_model.predict(X_test)))

Best parameters: {'learning_rate': 0.1, 'max_depth': -1, 'n_estimators': 50, 'num_leaves': 7}
훈련 평가지표
xgboost_model r2 :  0.7511630365874991
테스트 평가지표
xgboost_model r2 :  0.6980710004798563


In [11]:
import joblib 
joblib.dump(grid_model, 'lgbm.pkl')

['lgbm.pkl']

In [12]:
model = joblib.load('lgbm.pkl')

In [22]:
model.predict(X_train.iloc[[0]])

array([3499.43894305])