In [1]:
s = '-i "data/sic_hierarchy" -o "temp_bert" -m "xmlcnn" --mode "cat" -l "categorical" --gpu "0" --bert_bottle_neck 512 --bert_trainable_layers 3 --epoch 1 --batch_size 256 --val'.replace('"','').split(' ')

# IMPORTS

In [2]:
# basic
import argparse
import os,datetime

# save things
import pandas as pd
from tensorflow.keras.callbacks import CSVLogger

# model_func
from tools.model_func import *

W0812 15:30:00.865283 140245800064832 deprecation_wrapper.py:119] From /home/angela/xmtc/tools/model_func.py:21: The name tf.keras.layers.CuDNNLSTM is deprecated. Please use tf.compat.v1.keras.layers.CuDNNLSTM instead.



# ARGPARSE

In [3]:
parser = argparse.ArgumentParser(description = 'run baseline models')
parser.add_argument('-i','--input', required = True, type = str, help = 'input directory e.g. ./data/dl_amazon_1/')
parser.add_argument('-m','--model', required = True, type = str, help = 'model, one in: xmlcnn, attentionxml, attention,')
parser.add_argument('-l','--loss', required = True, type = str, help = "loss type: categorical, binary, masked_categorical ")
parser.add_argument('-o','--output', required = True, type = str, help = 'output directory')
parser.add_argument('--mode', required = True, type = str, help = 'cat,hierarchy')
parser.add_argument('--epoch', default = 5, type = int, help = 'epochs')
parser.add_argument('--batch_size', default = 0, type = int, help = 'batch size')
parser.add_argument('--save_weights', default = True, action = 'store_true', help = 'save trained weights')
parser.add_argument('--save_model', default = True, action = 'store_true', help = 'save trained model architecture')
parser.add_argument('--save_prediction', default = True, action = 'store_true', help = 'save top 10 prediction and corresponding probabilities (not implemented yet)')
parser.add_argument('--gpu', default = '', type = str, help = 'GPU id to use')
parser.add_argument('--bert_bottle_neck', default = 512, type = int, help = 'bottle neck dim for bert, 0 implies no bottle neck layer')
parser.add_argument('--bert_trainable_layers', default = 10, type = int, help = 'number of trainable layers in bert ')
parser.add_argument('--val', default = False, action = 'store_true',help = 'use validation set')
args = parser.parse_args(s)

# argparse validation
default_batch_size = {'xmlcnn':128,'attentionxml':20,'attention':25,'bert':256,}
if not os.path.exists(args.input):
    raise Exception('Input path does not exist: {}'.format(args.input))
if args.model not in default_batch_size.keys():
    raise Exception('Unknown model: {}'.format(args.model))
if args.loss not in ['binary','categorical','masked_categorical']:
    raise Exception('Unknown loss: {}'.format(args.loss))
if args.mode not in ['cat','hierarchy']:
    raise Exception('Unknown mode: {}'.format(args.mode))

IN_DIR = args.input
if not args.batch_size:
    args.batch_size = default_batch_size[args.model]
if not os.path.exists(args.output):
    os.mkdir(args.output)
    print(Coloured("Create Output Directory: {}".format(args.output)))
OUT_DIR = os.path.join(
    args.output,
    datetime.datetime.now().strftime('%y%m%d_%H%M%S_{}'.format(args.model)),
)
if not os.path.exists(OUT_DIR):
    os.mkdir(OUT_DIR)

if args.gpu:
    print(Coloured("USE GPU: {}".format(args.gpu)))
    os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
    os.environ["CUDA_VISIBLE_DEVICES"]=args.gpu


[1;30;43m USE GPU: 0 [0m


# GPU

In [4]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

# input

In [5]:
if args.model == 'bert':
    x_trains,y_trains,x_tests,y_tests = get_bert_input(IN_DIR,args.mode)
else:
    x_trains,y_trains,x_tests,y_tests = get_input(IN_DIR,args.mode)
if args.loss.startswith('masked'):
    print(Coloured("MASKING INPUT"))
    y_trains = mask_ys(y_trains,IN_DIR)
    y_tests = mask_ys(y_tests,IN_DIR)
max_sequence_length = len(x_trains[0][0])
labels_dims = [l.shape[-1] for l in y_tests]

if args.val:
    x_trains,y_trains,x_vals,y_vals = get_unbiased_train_val_split(x_trains,y_trains,IN_DIR)
else:
    x_vals,y_vals = x_tests,y_tests

LOAD EXISTING VAL INDS


# loss

In [6]:
loss_dict = {'binary':binary_cross_entropy_with_logits,
             'categorical':categorical_cross_entropy_with_logits,
             'masked_categorical':masked_categorical_cross_entropy_with_logits,
            }

# MAIN

In [7]:
# define callbacks
callbacks = []

csv_log_dir = os.path.join(OUT_DIR,'train.log')
callbacks.append(CSVLogger(csv_log_dir,append=False))
# build model
sess = tf.Session()
if args.model == 'bert':
    model = get_bert_model(max_sequence_length, labels_dims,
                        bottle_neck = args.bert_bottle_neck,
                        trainable_layers = args.bert_trainable_layers,
                        sess = sess,
                        )
else:
    embedding_layer = get_embedding_layer(IN_DIR)
    model = get_model(model_name = args.model,
                      max_sequence_length = max_sequence_length,
                      labels_dims = labels_dims,
                      embedding_layer = embedding_layer)
# print summary
model.summary()

# compile
if args.model == 'bert':
    optimizer = tf.keras.optimizers.Adam(learning_rate=5e-5)
else:
    optimizer = 'adam'
model.compile(loss = loss_dict[args.loss],
              optimizer = optimizer,
              metrics = [pAt1,pAt5])
# train
model.fit(x_trains, y_trains,
          batch_size = args.batch_size,
          epochs = args.epoch,
          validation_data = (x_vals, y_vals),
          callbacks = callbacks,
          shuffle = True,
         )

W0812 15:30:13.389801 140245800064832 deprecation.py:506] From /home/angela/env/lib/python3.6/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 100, 300)     15000300    input_1[0][0]                    
__________________________________________________________________________________________________
conv1d (Conv1D)                 (None, 50, 128)      76928       embedding[0][0]                  
__________________________________________________________________________________________________
conv1d_1 (Conv1D)               (None, 49, 128)      153728      embedding[0][0]                  
______________________________________________________________________________________________

<tensorflow.python.keras.callbacks.History at 0x7f8ccc263208>

# evaluate

In [8]:
test_results = model.evaluate(x_tests,y_tests)
dd = {k:v for k,v in zip(model.metrics_names,test_results)}
dd['epoch']= 'evaluate'
df = pd.read_csv(csv_log_dir)
df = df.append(dd,ignore_index=True)
df.to_csv(csv_log_dir,index = False)



In [9]:
pd.read_csv(csv_log_dir)

Unnamed: 0,epoch,H0_loss,H0_pAt1,H0_pAt5,H1_loss,H1_pAt1,H1_pAt5,H2_loss,H2_pAt1,H2_pAt5,...,val_H1_loss,val_H1_pAt1,val_H1_pAt5,val_H2_loss,val_H2_pAt1,val_H2_pAt5,val_H3_loss,val_H3_pAt1,val_H3_pAt5,val_loss
0,0,1.116257,0.676747,0.923616,1.587464,0.606061,0.837904,2.405892,0.477661,0.716074,...,1.292744,0.65987,0.886177,1.952355,0.542961,0.791435,2.044484,0.522578,0.781094,6.206624
1,evaluate,1.054772,0.696482,0.932703,1.483419,0.629704,0.860027,2.210981,0.514402,0.756262,...,,,,,,,,,,


# save things

In [10]:
if args.save_weights:
    model.save_weights(os.path.join(OUT_DIR,'weights.h5'))
if args.save_model:
    with open(os.path.join(OUT_DIR,'model.json'),'w') as f:
        f.write(model.to_json())
if args.save_prediction:
    save_predictions(model,x_tests,y_tests,OUT_DIR)
pd.DataFrame.from_dict([vars(args)]).to_csv(os.path.join(OUT_DIR,'args.csv'))


SAVE PREDICTIONS TO : temp_bert/190812_153000_xmlcnn


# close session

In [11]:
sess.close()