In [1]:
import shutil, os, warnings, re, datetime, multiprocessing
warnings.filterwarnings(action='ignore')
import numpy as np
import pandas as pd
import tensorflow as tf
import _pickle as pickle
from gensim.models import Doc2Vec
from itertools import chain, combinations, permutations, product, combinations_with_replacement
from sklearn.model_selection import train_test_split
from collections import namedtuple, Counter
from sklearn.preprocessing import MultiLabelBinarizer, LabelEncoder
from sklearn.feature_extraction.text import CountVectorizer
warnings.filterwarnings(action='default')

In [2]:
def ItemOneHotGenerator(data, threshold = 30):
    """
    data : Data format must be pandas Series with rows having list of items.
              reset_index() required.
              example : '6601000010,6601000024,....,6601000001'
    threshold : To exclude super sparse case of cross columns
    dependencies : collections.Counter
                              sklearn.feature_extraction.text.CountVectorizer
    """    
    cross_cnt = Counter()
    for ii, jj in data.iteritems():
        litem = sorted(set(jj.split(',')))
        cross_cnt.update(litem)
    target_lcross = set({x : cross_cnt[x] for x in cross_cnt if cross_cnt[x] > threshold}.keys()) 
    
    cross_feature = []
    for ii, jj in data.iteritems():
        litem = sorted(set(jj.split(',')))
        lcross = set(litem).intersection(target_lcross)
        cross_feature.append(lcross)
        
    lcross_feature = map(lambda x:' '.join(x), cross_feature)
    vect = CountVectorizer(tokenizer=str.split)
    mcross_feature = vect.fit_transform(lcross_feature)
    with open('./ItemOneHotGenerator.pkl','wb') as f:
        pickle.dump(vect, f)
    return pd.DataFrame(mcross_feature.todense(), columns=vect.get_feature_names())

def CrossColumnGenerator(data, threshold = 5):
    """
    data : Data format must be pandas Series with rows having list of items.
              reset_index() required.
              example : '6601000010,6601000024,....,6601000001'
    threshold : To exclude super sparse case of cross columns
    dependencies : collections.Counter
                              sklearn.feature_extraction.text.CountVectorizer
    """
    cross_cnt = Counter()
    for ii, jj in data.iteritems():
        litem = sorted(set(jj.split(',')))
        cross_cnt.update([str(kk + '_' + ll) for kk, ll in combinations(litem, 2)])
        
    target_lcross = set({x : cross_cnt[x] for x in cross_cnt if cross_cnt[x] >= threshold}.keys()) 
    
    cross_feature = []
    for ii, jj in data.iteritems():
        litem = sorted(set(jj.split(',')))
        lcross = set([str(kk + '_' + ll) for kk, ll in combinations(litem, 2)]).intersection(target_lcross)
        cross_feature.append(lcross)

    lcross_feature = map(lambda x:' '.join(x), cross_feature)
    vect = CountVectorizer(tokenizer=str.split)
    mcross_feature = vect.fit_transform(lcross_feature)
    with open('./CrossColumnGenerator.pkl','wb') as f:
        pickle.dump(vect, f)
    return pd.DataFrame(mcross_feature.todense(), columns=vect.get_feature_names())

def myDoc2Vec(data, d2v_size, model_loc, epoch=None, trainTF = False):
    """
    data : DataFrame([['DocID','Item']])
    """
    data.columns = ['docID','item']
    data1 = [(str(row['item']).split(), row['docID']) for idx, row in data.iterrows()]
    data2 = namedtuple('TaggedDocument', 'words tags')
    tagged_data2 = [data2(d, [c]) for d, c in data1]
    
    if trainTF:
        model = Doc2Vec(
            dm = 0,  # 0 : PV-DBOW
            dbow_words = 0,  # 0 : train doc-vec only(faster)
            window = 8, vector_size = d2v_size, alpha = 0.025, min_alpha = 0.025, seed = 0, sample= 1e-5, min_count=3, 
            workers=multiprocessing.cpu_count(), hs = 0, negative = 10)
        model.build_vocab(tagged_data2)
        print('New model training started')
        model.train(documents  = tagged_data2, total_examples = data.shape[0], epochs = epoch)
        model.save(model_loc)
        print('New model training done. check--',model_loc)
    else:
        model=Doc2Vec.load(model_loc)
        embedding_df = pd.DataFrame(
            data = [model.infer_vector(doc.words) for doc in tagged_data2], 
            columns = ["d2v"+str(i) for i in range(d2v_size)])
        return embedding_df
    
def train_eval_filesplit(filenames, train_rate = 0.8):
    trainfiles = []
    evalfiles = []
    filecnt = len(filenames)
    trainloop = int(round(filecnt/(filecnt - int(filecnt*train_rate)),0))
    for ii in range(filecnt):
        if ii % trainloop == 0:
            evalfiles.append(filenames[ii])
        else:
            trainfiles.append(filenames[ii])
    filedict = {'train_files':trainfiles,'eval_files':evalfiles}
    return filedict

def custom_evaluation():
    print('Start Target Label Importing')
    true_label = []
    for ii in range(len(predict_filenames)):
        print(predict_filenames[ii],'is in progress...')
        tmp = np.loadtxt(fname=predict_filenames[ii],delimiter=',',dtype='str')
        true_label.extend(tmp[:,-1].tolist())
    print('Target Label Import Process DONE')

    print('Start Prediction')
    p_result = tot_model.predict(input_fn=pred_input_fn, yield_single_examples=False)
    p_result_final = []
    for num, item in enumerate(p_result):
        if num%10000==0:
            print(num)
        if num>=len(true_label):
            break
        else:
            p_result_final.append(item['probabilities'])
    print('Labe Prediction Process DONE')

    print('Start 3, 5, 10 recommendation scenario evaluation')
    list_TopN=[3,5,10]
    for ii in range(len(list_TopN)):
        TopN = list_TopN[ii]
        calc_rate = []
        for ii in range(len(true_label)):
            sort_idx = (np.argsort(np.argsort(p_result_final[ii]))>=(len(p_result_final[ii][0])-TopN)).reshape(-1)
            calc_rate.append(int(true_label[ii] in np.array(iK['y_classes'])[sort_idx].tolist()))

        print("Accuracy : %.2f" %(np.mean(calc_rate)))
        print("[%d]개를 추천하면 그 중에 [%.2f]개를 구매함" %(TopN, np.mean(calc_rate)*TopN))
    print('Scenario Evaluation Process DONE')
    
def input_fn(data_files, batch_size, predict=False): #
    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults=[[""]]*6+[[0]])
        label = columns[-1]
        del columns[-1]
        features = dict(zip(df4.columns.tolist()[:-1],columns))
        out = features, label
        return out
    
    dataset = tf.data.TextLineDataset(data_files)
    if predict==False:
        dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size = 100))
    dataset = dataset.apply(tf.contrib.data.map_and_batch(map_func=parse_csv, 
                                                          batch_size=batch_size, 
                                                          num_parallel_calls = 16))
    dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size = 50)
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

In [3]:
start = datetime.datetime.now()
주문_2014 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문_2014.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문_2015 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문_2015.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문_2016 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문_2016.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문_2017 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문_2017.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문_2018 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문_2018.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품_2014 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품_2014.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품_2015 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품_2015.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품_2016 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품_2016.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품_2017 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품_2017.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품_2018 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품_2018.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품출하_2014 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품출하_2014.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품출하_2015 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품출하_2015.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품출하_2016 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품출하_2016.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품출하_2017 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품출하_2017.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
주문상품출하_2018 = pd.read_csv('C:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가5차_180920/주문상품출하_2018.csv', encoding = 'utf-8', index_col=0, dtype = 'str')
상품마스터 = pd.read_csv('c:/Users/kim85/OneDrive/Project_AgileSoda/원본/추가6차_181005/상품마스터.csv',encoding ='utf-8',dtype = 'str')
조직마스터 = pd.read_csv('c:/users/kim85/OneDrive/Project_AgileSoda/원본/추가6차_181005/조직마스터.csv', encoding ='utf-8', dtype ='str')
사업장= pd.read_csv('c:/users/kim85/OneDrive/Project_AgileSoda/원본/추가6차_181005/사업장.csv', encoding ='utf-8', dtype ='str')
공사유형 = pd.read_csv('c:/users/kim85/OneDrive/Project_AgileSoda/원본/추가6차_181005/공사유형.csv', encoding ='utf-8', dtype ='str')

주문 = pd.concat([주문_2014, 주문_2015, 주문_2016, 주문_2017, 주문_2018])
주문상품 = pd.concat([주문상품_2014, 주문상품_2015, 주문상품_2016, 주문상품_2017, 주문상품_2018])
주문상품출하 = pd.concat([주문상품출하_2014, 주문상품출하_2015, 주문상품출하_2016, 주문상품출하_2017, 주문상품출하_2018])

# 주문 처리
주문 = 주문.reset_index()[['ORDE_IDEN_NUMB','CONS_IDEN_NAME','GROUPID','CLIENTID','BRANCHID',
                       'DELI_AREA_CODE','REGI_DATE_TIME','ORDE_USER_ID']].drop_duplicates(keep='first')
주문상품 = 주문상품.reset_index()[['ORDE_IDEN_NUMB','ORDE_SEQU_NUMB','GOOD_IDEN_NUMB',
                           'ORDE_REQU_QUAN']].drop_duplicates(keep='first')
주문상품 = 주문상품.groupby(['ORDE_IDEN_NUMB','GOOD_IDEN_NUMB'])['ORDE_REQU_QUAN'].agg('sum').reset_index()
주문상품출하 =  주문상품출하.reset_index()[['ORDE_IDEN_NUMB', 'DELI_STAT_FLAG']].drop_duplicates(keep='first')
주문상품출하 = 주문상품출하[주문상품출하.DELI_STAT_FLAG=='70'].ORDE_IDEN_NUMB.unique()

주문상품_주문 = pd.merge(주문상품, 주문, on = 'ORDE_IDEN_NUMB', how = 'left')
주문상품_주문_출하 = 주문상품_주문[주문상품_주문.ORDE_IDEN_NUMB.isin(주문상품출하)]
주문전체 = 주문상품_주문_출하[주문상품_주문_출하.GROUPID != '101'].drop_duplicates(keep='first')

# 상품 처리
상품마스터 = 상품마스터[['good_iden_numb','cate_id','good_name','good_spec','good_type','repre_good']].drop_duplicates(keep='first')
상품마스터 = 상품마스터.rename(columns = {'good_iden_numb':'GOOD_IDEN_NUMB'})
# repre_good - Y : 옵션대표상품, N : 단품, P : 옵션상품 // good_type - 10 : 일반, 20 : 지정,60 : 공구, 70 : 안전,80 : 보안

상품주문전체 = pd.merge(주문전체, 상품마스터, on = 'GOOD_IDEN_NUMB', how = 'left')
상품주문전체 = 상품주문전체[상품주문전체.repre_good=='N'] # 옵션상품 제외

# 사업장 처리
사업장 = pd.concat([
        사업장[['BRANCHID','AREATYPE','BRANCHBUSITYPE','BRANCHBUSICLAS','WORKID']].rename(columns={'BRANCHID':'BORGID'}), 
        사업장[['BRANCHCD','AREATYPE','BRANCHBUSITYPE','BRANCHBUSICLAS','WORKID']].rename(columns={'BRANCHCD':'BORGID'})
    ], axis=0).drop_duplicates(keep='first')

공사유형_사업장 = pd.merge(
    사업장, 
    공사유형[['WORKID','WORKNM']].drop_duplicates(keep='first'), 
    how = 'left', on = 'WORKID')

조직마스터 = 조직마스터[(조직마스터.BORGTYPECD == 'BCH') & (조직마스터.SVCTYPECD == 'BUY')] # 사업장레벨 및 구매사만
조직마스터 = pd.concat([
        조직마스터[['BORGID','BORGNM']], 
        조직마스터[['BORGCD','BORGNM']].rename(columns={'BORGCD':'BORGID'})
    ], axis=0).drop_duplicates(keep='first')

조직전체 = pd.merge(조직마스터, 공사유형_사업장, how = 'left', on = 'BORGID').rename(columns={'BORGID':'BRANCHID'})

df = pd.merge(상품주문전체,조직전체,how = 'left',on = 'BRANCHID')
# 불필요컬럼 제외
df = df.drop([
        'CONS_IDEN_NAME','GROUPID','CLIENTID','ORDE_USER_ID','good_name','good_spec','repre_good','WORKNM','BORGNM'
        ], axis = 1)
df = df.drop_duplicates(keep='first')
# 날짜컬럼추가
df["REGI_DATE"] = pd.to_datetime(df.REGI_DATE_TIME).dt.date
# 문자 -> 숫자화1
bptype = LabelEncoder()
df['BpType']=bptype.fit_transform(df.BRANCHBUSITYPE.tolist()).astype('str')
with open('./le_BpType.pkl','wb') as f:
    pickle.dump(bptype, f)
# 문자 -> 숫자화2
bpclass = LabelEncoder()
df['BpClass']=bpclass.fit_transform(df.BRANCHBUSICLAS.tolist()).astype('str')
with open('./le_BpClass.pkl','wb') as g:
    pickle.dump(bpclass, g)
# 널 데이터 제외(764건)
df = df.dropna()
df = df.drop(['BRANCHBUSITYPE','BRANCHBUSICLAS'],axis =1)
df.columns = ['OrderNum','ProductCode','ProductAmt','BpID', 'Deli_Region','OrderTime','ProductCategory','ProductClass','Region',
              'ConstructionType','OrderDate','BpType','BpClass']
duration = datetime.datetime.now()-start
m, s = divmod(duration.seconds, 60);h, m = divmod(m, 60);print("[%02d:%02d:%02d]" %(h, m, s))
df = df[['OrderNum','OrderTime','OrderDate','ProductCode','ProductCategory', 'ProductClass','ProductAmt', 
    'Deli_Region','BpID','Region','ConstructionType','BpType', 'BpClass']]
# 이상 데이터 불러오기 및 필요 데이터 필터링 부분.
df.to_csv('./Datasets/df_181106_new.csv', index = False)

In [46]:
# df = pd.read_csv('./Datasets/df_181106_new.csv', dtype='str', encoding = 'utf-8')
# df = df.drop(['OrderNum', 'OrderTime', 'Deli_Region'],axis = 1)
# df['OrderDate'] = pd.to_datetime(df['OrderDate']).dt.date
# df1 = df.groupby(['BpID','OrderDate','ProductCode','ProductCategory',
#             'ProductClass','Region','ConstructionType','BpType','BpClass'])['ProductAmt'].sum()
# df1 = df1.reset_index().sort_values(['BpID','OrderDate']).reset_index(drop = True)

In [5]:
# nx =  3 # 이전 기록 갯수(X)
# ny = 1 # 이후 기록 갯수(Y)

# targetdf=None
# Bplist = df1.BpID.unique()
# for ii,jj in enumerate(Bplist):
#     if ii % 50 ==0:
#         print(ii, len(Bplist))
#     if len(df1[df1.BpID ==jj].OrderDate.unique())>=nx+ny:
#         targetdata = df1[df1.BpID ==jj]
#         targetdate = targetdata.OrderDate.unique()
#         for kk in range(len(targetdate)-nx):
#             # (subset='ProductCode') 제거할지 추후 고려
#             x1 = targetdata[targetdata.OrderDate.isin(targetdate[kk:kk+nx])].drop_duplicates(subset='ProductCode')
#             y1 = targetdata[targetdata.OrderDate.isin(targetdate[kk+nx:kk+nx+ny])].drop_duplicates(subset='ProductCode')
#             x2 = pd.concat([x1]*y1.shape[0]).reset_index(drop=True)
#             y2 = pd.concat([y1[['OrderDate','ProductCode']]]*x1.shape[0]).sort_values(by='ProductCode').reset_index(drop=True)
#             try:
#                 targetdf = pd.concat([targetdf,pd.merge(x2, y2,left_index=True, right_index=True)])
#             except NameError:
#                 targetdf = pd.merge(x2, y2,left_index=True, right_index=True)
#     else:
#         continue
# # 프레딕트 할때는 최근 3일 결과만 준비하면 된다.
# # 조온나 오래걸림.
# targetdf.to_csv("./Datasets/df_181108_new.csv",encoding='utf-8') # 미리 tobe파일명 반영.

In [4]:
targetdf = pd.read_csv('df_181108_new.csv', dtype='str', encoding = 'utf-8', index_col= 0)
targetdf['DateGap'] = (pd.to_datetime(targetdf['OrderDate_y'])-pd.to_datetime(targetdf['OrderDate_x'])).dt.days
# targetdf['ProductAmt']=targetdf['ProductAmt'].astype('int64') # ProductAmt 재가공 필요 잇음. groupby에서부터
targetdf1 = targetdf.drop(['BpID','OrderDate_x','OrderDate_y','ProductAmt'],axis = 1)
targetdf2 = targetdf1[['Region','BpType','BpClass','ConstructionType','ProductCode_x','ProductCategory',
                       'ProductClass','DateGap','ProductCode_y']]

train_df, eval_df = train_test_split(targetdf2.values, test_size=0.2, shuffle=False)

np.savetxt(fname='C:/Users/aj901/Desktop/Wide&Deep_논문/SK_Telesis/new_dataset/train_target_2.csv', X=train_df, delimiter=',',encoding='utf-8',fmt="%s")
np.savetxt(fname='C:/Users/aj901/Desktop/Wide&Deep_논문/SK_Telesis/new_dataset/eval_target_2.csv', X=eval_df, delimiter=',',encoding='utf-8',fmt="%s")

model_dir = "./Trained_models/Models_WnD/folder_test"
n_classes = len(targetdf2.ProductCode_y.unique())
voca_classes = tuple(targetdf2.ProductCode_y.unique())

In [5]:
def input_fn(data_files, num_epochs, batch_size, predict=False):
    def parse_csv(value):
        columns = tf.decode_csv(value, record_defaults=[[""]]*7+[[0]]*1+[[""]])
        features = dict(zip(targetdf2.columns[:8], columns[:8]))
        label = columns[-1]
        out = features, label
        return out
    
    dataset = tf.data.TextLineDataset(data_files)
    if predict==False:
        dataset = dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size = 100, count = num_epochs))
    dataset = dataset.apply(tf.contrib.data.map_and_batch(map_func=parse_csv, 
                                                          batch_size=batch_size, 
                                                          num_parallel_calls = 16))
    dataset = dataset.cache()
    dataset = dataset.prefetch(buffer_size = 50)
    iterator = dataset.make_one_shot_iterator()
    batch_features, batch_labels = iterator.get_next()
    return batch_features, batch_labels

train_filenames = ["C:/Users/aj901/Desktop/Wide&Deep_논문/SK_Telesis/new_dataset/train_target.csv"]
test_filenames = ["C:/Users/aj901/Desktop/Wide&Deep_논문/SK_Telesis/new_dataset/test_target.csv"]

def train_input_fn():
    return input_fn(data_files=train_filenames,num_epochs=int(1e6), batch_size=64)

def eval_input_fn():
    return input_fn(data_files=test_filenames, num_epochs=1, batch_size=1)

def pred_input_fn():
    return input_fn(data_files=test_filenames,num_epochs=1,batch_size=1,predict=True)

In [6]:
Region = tf.contrib.layers.sparse_column_with_hash_bucket("Region", hash_bucket_size=300)
BpType=tf.contrib.layers.sparse_column_with_hash_bucket("BpType", hash_bucket_size=300)
BpClass=tf.contrib.layers.sparse_column_with_hash_bucket("BpClass", hash_bucket_size=300)
ConstructionType=tf.contrib.layers.sparse_column_with_hash_bucket("ConstructionType", hash_bucket_size=100)
ProductCategory=tf.contrib.layers.sparse_column_with_hash_bucket("ProductCategory", hash_bucket_size=300)
ProductClass=tf.contrib.layers.sparse_column_with_hash_bucket("ProductClass", hash_bucket_size=300)
# ProductAmt=tf.contrib.layers.real_valued_column("ProductAmt", dtype=tf.int64)

DateGap = tf.contrib.layers.real_valued_column("DateGap", dtype=tf.int64)
DateGap_buckets = tf.contrib.layers.bucketized_column(
    source_column=DateGap,
    boundaries=[5*(ii+1) for ii in range(73)])

ProductCode_x = tf.contrib.layers.sparse_column_with_hash_bucket("ProductCode_x", hash_bucket_size=20000)

wide_columns = [
    ProductCode_x, 
    DateGap_buckets,
    tf.contrib.layers.crossed_column([ProductCode_x, ConstructionType], hash_bucket_size=530000)
]
deep_columns = [
    DateGap, 
#     ProductAmt,
    tf.contrib.layers.embedding_column(BpClass, dimension=8), 
    tf.contrib.layers.embedding_column(BpType, dimension=8), 
    tf.contrib.layers.embedding_column(ConstructionType, dimension=8), 
    tf.contrib.layers.embedding_column(Region, dimension=8),
    tf.contrib.layers.embedding_column(ProductCategory, dimension=8),
    tf.contrib.layers.embedding_column(ProductClass, dimension=8)
]

  return _inspect.getargspec(target)
  return f(*args, **kwds)
  return _inspect.getargspec(target)




  _get_logger().warn(msg, *args, **kwargs)


In [8]:
# model config setup for speed up (reference : tensorflow performance guide)
config = tf.ConfigProto()
config.intra_op_parallelism_threads = 128 # 8->16->32->64....
config.inter_op_parallelism_threads = 0 # 0으로 고정
_config = tf.estimator.RunConfig(
    session_config=config, 
    save_checkpoints_secs=None, 
    save_checkpoints_steps=1000,
    keep_checkpoint_max=1)

tot_model = tf.estimator.DNNLinearCombinedClassifier(
    model_dir=model_dir, 
    linear_feature_columns=wide_columns, 
    dnn_feature_columns=deep_columns, 
    dnn_hidden_units= [256,128], 
    dnn_dropout = 0.7, 
    dnn_optimizer= tf.train.AdamOptimizer,
    n_classes = n_classes,
    label_vocabulary= voca_classes,
    batch_norm = True,
    config = _config)

INFO:tensorflow:Using config: {'_model_dir': './Trained_models/Models_WnD/folder_test', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 1000, '_save_checkpoints_secs': None, '_session_config': intra_op_parallelism_threads: 128
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000002A2B1AC1860>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [9]:
# # https://www.tensorflow.org/api_docs/python/tf/estimator/train_and_evaluate
train_spec = tf.estimator.TrainSpec(input_fn=train_input_fn)
eval_spec = tf.estimator.EvalSpec(input_fn=eval_input_fn)
tf.estimator.train_and_evaluate(tot_model, train_spec, eval_spec)

INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps 1000 or save_checkpoints_secs None.
Instructions for updating:
Use `tf.data.experimental.shuffle_and_repeat(...)`.
Instructions for updating:
Use `tf.data.experimental.map_and_batch(...)`.
INFO:tensorflow:Calling model_fn.


  return _inspect.getargspec(target)


Instructions for updating:
The default behavior of sparse_feature_cross is changing, the default
value for hash_key will change to SPARSE_FEATURE_CROSS_DEFAULT_HASH_KEY.
From that point on sparse_feature_cross will always use FingerprintCat64
to concatenate the feature fingerprints. And the underlying
_sparse_feature_cross_op.sparse_feature_cross operation will be marked
as deprecated.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.


InvalidArgumentError: Cannot parse tensor from proto: dtype: DT_FLOAT
tensor_shape {
  dim {
    size: 530000
  }
  dim {
    size: 13936
  }
}
float_val: 0.1

	 [[node head/Const (defined at <ipython-input-9-284426665103>:4)  = Const[_class=["loc:@linea...trl/Assign"], dtype=DT_FLOAT, value=<Invalid TensorProto: dtype: DT_FLOAT tensor_shape { dim { size: 530000 } dim { size: 13936 } } float_val: 0.1>, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]

Caused by op 'head/Const', defined at:
  File "C:\Users\aj901\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\aj901\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\aj901\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\Users\aj901\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\Users\aj901\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2909, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\aj901\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-9-284426665103>", line 4, in <module>
    tf.estimator.train_and_evaluate(tot_model, train_spec, eval_spec)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 471, in train_and_evaluate
    return executor.run()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 610, in run
    return self.run_local()
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\training.py", line 711, in run_local
    saving_listeners=saving_listeners)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 354, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1207, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1237, in _train_model_default
    features, labels, model_fn_lib.ModeKeys.TRAIN, self.config)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\estimator.py", line 1195, in _call_model_fn
    model_fn_results = self._model_fn(features=features, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\canned\dnn_linear_combined.py", line 439, in _model_fn
    linear_sparse_combiner=linear_sparse_combiner)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\canned\dnn_linear_combined.py", line 238, in _dnn_linear_combined_model_fn
    logits=logits)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\canned\head.py", line 239, in create_estimator_spec
    regularization_losses))
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\canned\head.py", line 873, in _create_tpu_estimator_spec
    train_op = train_op_fn(regularized_training_loss)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\estimator\canned\dnn_linear_combined.py", line 227, in _train_op_fn
    scope=linear_absolute_scope)))
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 410, in minimize
    name=name)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\training\optimizer.py", line 593, in apply_gradients
    self._create_slots(var_list)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\training\ftrl.py", line 127, in _create_slots
    self._initial_accumulator_value, dtype=v.dtype, shape=v.get_shape())
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\framework\constant_op.py", line 214, in constant
    name=name).outputs[0]
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3274, in create_op
    op_def=op_def)
  File "C:\Users\aj901\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Cannot parse tensor from proto: dtype: DT_FLOAT
tensor_shape {
  dim {
    size: 530000
  }
  dim {
    size: 13936
  }
}
float_val: 0.1

	 [[node head/Const (defined at <ipython-input-9-284426665103>:4)  = Const[_class=["loc:@linea...trl/Assign"], dtype=DT_FLOAT, value=<Invalid TensorProto: dtype: DT_FLOAT tensor_shape { dim { size: 530000 } dim { size: 13936 } } float_val: 0.1>, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]


In [None]:
tot_model.evaluate(input_fn=eval_input_fn)

INFO:tensorflow:Could not find trained model in model_dir: ./Trained_models/Models_WnD/folder_test, running initialization to evaluate.
INFO:tensorflow:Calling model_fn.


  return _inspect.getargspec(target)


INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2019-02-13-01:24:41
INFO:tensorflow:Graph was finalized.


In [12]:
# # # 커스텀 평가기준
# custom_evaluation()

In [None]:
#https://www.tensorflow.org/performance/datasets_performance
#https://www.tensorflow.org/performance/performance_guide
#https://www.tensorflow.org/performance/performance_models
"""
181107 테스트로 돌려본 모델
◆ 구조를 달리해서 데이터 임포트부터 작업시작.
◆ Row를 아이템 베이스로 작업한 버전임.(3일/1일)
◎ 결과
◆ 이후 아래 데이터 임포트로 작업한 Df파일을 기반으로 아이템의 지정여부, 카테고리등의 데이터를 추가하여 실험.
"""