In [None]:
import config
import models
import json
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt

Main variables

In [None]:
dataset_name = "FB13"
embedding_model = models.TransD
nb_name = 'train_test.ipynb' # necessary for saving the files in the end of notebook

Hyperparameters

In [None]:
batch_size    = None # mini-batch size (takes precedence over n_batches)
n_batches     = 200 # number of batches
n_epochs      = 1000 # epochs
learning_rate = 1.0
margin        = 1.0
k             = 100 # embedding dimension
bern          = 1 # use Bernoulli distribution for generating negative training examples
opt_method    = 'adadelta'
score_norm    = 'l2' # implemented in only TransD so far

Logging

In [None]:
log_on = 1
log_type = 'batch'
log_print = True

Create config instance and set hyperparameters

In [None]:
con = config.Config()
dataset_path = "./benchmarks/{}/".format(dataset_name)
con.set_in_path(dataset_path)
con.set_test_triple_classification(True)
con.set_log_on(log_on, log_type=log_type, log_print=log_print)
con.set_train_times(n_epochs)
con.set_batch_size(batch_size) if batch_size != None else con.set_nbatches(n_batches)
con.set_alpha(learning_rate)
con.set_bern(bern)
con.set_dimension(k)
con.set_margin(margin)
# con.set_ent_neg_rate(1)
# con.set_rel_neg_rate(0)
con.set_opt_method(opt_method)

Device settings: GPU and CPU

In [None]:
# ___CPU___
con.set_work_threads(8)

# ___GPU___
os.environ["CUDA_VISIBLE_DEVICES"]="1"

## Training

Train the model

In [None]:
# defining the export path
export_path = "./results/{}/{}/{}".format(
    dataset_name,
    embedding_model.__name__,
    int(time.time()))

# create export_path dir if it does not exist
def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
ensure_dir(export_path + '/') # without the "/" in the end the path wasn't being created

print("Model will be exported to {}. \n".format(export_path))



con.set_export_files("{}/model.pt".format(export_path))

# Model parameters will be exported to json files automatically.
con.set_out_files("{}/embedding.vec.json".format(export_path))

# Initialize experimental settings.
con.init()

# Pass the model to the configuration
con.set_model(model=embedding_model,
              score_norm=score_norm)

# Train the model.
%time con.run()

# Save training log
data = pd.DataFrame(con.learning_log)
data.to_csv("{}/learning_log.csv".format(export_path))

Plot and save the learning curve

In [None]:
if log_type == 'epoch':
    y_min = data.epoch_loss.min()
    y_max = data.epoch_loss.iloc[n_epochs // 10]

    fig, ax1 = plt.subplots()

    ax1.set(xlim=[0, n_epochs],
            ylim=[y_min, y_max],
            title='Training Loss and Validation Accuracy',
            xlabel='Epoch',
            ylabel='Loss'
           )
    ax1.plot(data.epoch, data.epoch_loss, label='Loss')
    ax1.legend(loc=2)

    ax2 = ax1.twinx()
    ax2.plot(data.epoch, data.valid_acc, label='Valid Acc', color='orange')
    ax2.set(ylabel='Accuracy')
    ax2.legend(loc=1)

    fig.set_size_inches(18, 12)
    fig.savefig("{}/learning_curve.svg".format(export_path))
    
elif log_type == 'batch':
    fig, ax1 = plt.subplots()

    ax1.set(title='Training Loss and Validation Accuracy',
            xlabel='Batch (absolute)',
            ylabel='Loss',
           )
    ax1.plot(data.batch_loss, label='Batch Loss')
    ax1.legend(loc=2)

    ax2 = ax1.twinx()
    ax2.plot(data.valid_acc, label='Valid Acc', color='orange')
    ax2.set(ylabel='Accuracy')
    ax2.legend(loc=1)

    fig.set_size_inches(18, 12)
    fig.savefig("{}/learning_curve.svg".format(export_path))

## Testing

In [None]:
con.set_model(embedding_model,
              score_norm=score_norm) # dont know why this was necessary to run test()
con.import_variables("{}/model.pt".format(export_path)) # loading model via torch.load()
con.test()

Get average accuracy and save to file

In [None]:
from ctypes import c_float
res = c_float.in_dll(con.lib, 'aveAcc')
avg_acc = res.value
print "Average accuracy in the test set is {}".format(avg_acc)

In [None]:
# save result to a file
with open("{}/test_accuracy.txt".format(export_path), 'w') as f:
    f.write("Average accuracy in the test set is {}".format(avg_acc))

## Saving history and the notebook

In [None]:
nb_name_html = nb_name.replace('.ipynb', '.html')

Save the current notebook

In [None]:
from IPython.display import display,Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))

Save an (converted) html file of this notebook

In [None]:
bashCommand = "jupyter nbconvert --to html {}".format(nb_name)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Save a history file

In [None]:
%notebook history.ipynb

Move saved files to results directory

In [None]:
bashCommand = "mv -t {}/ history.ipynb {}".format(export_path, nb_name_html)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Copy current notebook to results directory

In [None]:
bashCommand = "cp {} {}/".format(nb_name, export_path)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()