### 라이브러리 선언

In [22]:
# tensorflow 모델 (인공지능) 라이브러리
import tensorflow as tf
# 학습모델 불러오기용 라이브러리
from tensorflow.keras import models
# 변수타입을 포함해 데이터 불러오기/저장하기 라이브러리
import pickle
# 스프레드시트 형태 데이터 조작 라이브러리
import pandas as pd
import numpy as np

In [None]:
from google.colab import drive
drive.mount("/content/gdrive")

### 1. 모델 및 관련 참조데이터 불러오기

In [11]:
modelPath = "/content/gdrive/MyDrive/Colab Notebooks/h5models/selloutmodel.h5"
loadedModel = models.load_model(modelPath)

In [12]:
refPath = "/content/gdrive/MyDrive/Colab Notebooks/h5models/selloutmodel.dump"

In [14]:
with open(refPath, "rb") as fr:
    loadedRef = pickle.load(fr)

### 2. 모델 재훈련

In [16]:
### 1. 추가 학습데이터를 불러와야함
### 2. 전처리를 동일하게 학습했던것처럼 해야한다.
### 3. 컴파일을 다시 해준다 (컴파일의 OPTIMIZER, LOSS는 변경 가능하다!)
### 4. 훈련 한다.

### 2-1. 추가 학습데이터 수집

In [44]:
dataUrl = "https://raw.githubusercontent.com/hyokwan/python-lecture/master/dataset/feature_regression_example.csv"
dataAllUrl = "https://raw.githubusercontent.com/hyokwan/python-lecture/master/dataset/kopo_decision_tree_all_new.csv"

In [45]:
orgDataset = pd.read_csv(dataUrl)
newDataset = pd.read_csv(dataAllUrl)

In [46]:
set(orgDataset.columns) - set(newDataset.columns)

set()

In [47]:
groupKey = ["REGIONID","PRODUCTGROUP","ITEM"]

In [72]:
cntKnob = orgDataset.shape[0]

In [73]:
# 전체데이터를 그룹키로 건수를 체크한다!
newDatasetGroup = newDataset.groupby(groupKey)["QTY"].agg(["count"]).reset_index()
# 컬럼 이름 변경
newDatasetCnt = newDatasetGroup.rename(columns={"count":"QTY_COUNT"})
# 신규 학습데이터 추출
newDatasetFiltered = newDatasetCnt.loc[ newDatasetCnt.QTY_COUNT >= cntKnob]

In [74]:
newDatasetFiltered.drop_duplicates(subset=groupKey)
newDatasetFiltered = newDatasetFiltered.reset_index(drop=True)

In [75]:
targetRegion = newDatasetFiltered.loc[0, ["REGIONID"]].values[0]
targetProduct = newDatasetFiltered.loc[0, ["PRODUCTGROUP"]].values[0]
targetItem = newDatasetFiltered.loc[0, ["ITEM"]].values[0]
print(targetRegion,targetProduct,targetItem)

A01 PG01 ITEM013


In [104]:
trainDataSet = newDataset.loc[ (newDataset.REGIONID==targetRegion) &
                (newDataset.PRODUCTGROUP==targetProduct) &
                (newDataset.ITEM==targetItem) ]

In [105]:
trainDataSet.shape

(130, 12)

### 2-2. 데이터 전처리

In [80]:
## step1: 기존 feature, label 확인
## step2: 기존 모델의 전처리함수 불러오기
## step3: feature에 대해서 전처리함수 적용

In [85]:
modelDict["features"]

['PROMOTION_LE', 'HOLIDAY_LE', 'PROMOTION_SC', 'HCLUS_SC']

In [87]:
## step1: 기존 feature, label 확인
features = loadedRef[0]
label = loadedRef[1]

In [95]:
## step2: 기존 모델의 전처리함수 불러오기
featuresPrefunc = loadedRef[2]

In [89]:
## step3: feature에 대해서 전처리함수 적용

In [None]:
orgFeatures = ["PROMOTION","HOLIDAY","PRO_PERCENT","HCLUS"]
labelEnco = featuresPrefunc[0]
trainDataSet[features[0]] = labelEnco.transform( trainDataSet.loc[ :, orgFeatures[0]])
trainDataSet[features[1]] = labelEnco.transform( trainDataSet.loc[ :, orgFeatures[1]])
scaledFunc = featuresPrefunc[2]
trainDataSet[features[2]] = scaledFunc.transform( trainDataSet.loc[:, [orgFeatures[2]]] )
scaledFunc = featuresPrefunc[3]
trainDataSet[features[3]] = scaledFunc.transform( trainDataSet.loc[:, [orgFeatures[3]]] )

In [135]:
trainDataSet

Unnamed: 0,REGIONID,PRODUCTGROUP,PRODUCT,ITEM,YEARWEEK,YEAR,WEEK,QTY,HOLIDAY,HCLUS,PROMOTION,PRO_PERCENT,PROMOTION_LE,HOLIDAY_LE,PROMOTION_SC,HCLUS_SC
419,A01,PG01,P01,ITEM013,201501,2015,1,6,Y,1,N,0.000000,0,1,0.000000,0.25
420,A01,PG01,P01,ITEM013,201502,2015,2,4,N,4,N,0.000000,0,0,0.000000,1.00
421,A01,PG01,P01,ITEM013,201503,2015,3,7,N,4,N,0.000000,0,0,0.000000,1.00
422,A01,PG01,P01,ITEM013,201504,2015,4,17,Y,2,N,0.000000,0,1,0.000000,0.50
423,A01,PG01,P01,ITEM013,201505,2015,5,7,N,4,N,0.000000,0,0,0.000000,1.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544,A01,PG01,P01,ITEM013,201730,2017,30,3,N,4,Y,0.406754,1,0,0.964128,1.00
545,A01,PG01,P01,ITEM013,201732,2017,32,3,N,4,Y,0.406754,1,0,0.964128,1.00
546,A01,PG01,P01,ITEM013,201734,2017,34,1,Y,1,Y,0.406754,1,1,0.964128,0.25
547,A01,PG01,P01,ITEM013,201736,2017,36,1,Y,1,Y,0.406754,1,1,0.964128,0.25
