In [None]:
import config
import models
import json
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt

Main variables

In [None]:
dataset_path = "./benchmarks/FB15K/"
embedding_model = models.TransE
nb_name = 'train_test.ipynb' # necessary for saving the files in the end of notebook

Create config instance and set parameters

In [None]:
con = config.Config()
con.set_in_path(dataset_path)
# con.set_test_triple_classification(True)
con.set_log_on(1)
con.set_work_threads(8)
n_epochs = 500
con.set_train_times(n_epochs)
con.set_nbatches(100)
con.set_alpha(0.001)
con.set_bern(0)
con.set_dimension(100)
con.set_margin(1.0)
con.set_ent_neg_rate(1)
con.set_rel_neg_rate(0)
con.set_opt_method("SGD")

## Training

Train the model

In [None]:
# defining the export path
export_path = "./results/{}/{}".format(
    embedding_model.__name__,
    int(time.time()))

# create export_path dir if it does not exist
def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)
ensure_dir(export_path + '/') # without the "/" in the end the path wasn't being created

print("Model will be exported to {}. \n".format(export_path))



con.set_export_files("{}/model.pt".format(export_path))

# Model parameters will be exported to json files automatically.
con.set_out_files("{}/embedding.vec.json".format(export_path))

# Initialize experimental settings.
con.init()

# Pass the model to the configuration
con.set_model(embedding_model)

# Train the model.
%time con.run()

# Save training log
pd.DataFrame(con.learning_log, columns=['epoch', 'loss']).to_csv("{}/learning_log.csv".format(export_path))

Plot and save the learning curve

In [None]:
data = np.array(con.learning_log)

axes = plt.gca()
axes.set_xlim([0, n_epochs])
axes.set_ylim([0, 200000])
plt.plot(data[:,1])
fig = plt.gcf()
fig.set_size_inches(18, 12)
fig.savefig("{}/learning_curve.svg".format(export_path))

## Testing

In [None]:
con.set_model(embedding_model) # dont know why this was necessary to run test()
con.test()

Get average accuracy and save to file

In [None]:
from ctypes import c_float
res = c_float.in_dll(con.lib, 'aveAcc')
avg_acc = res.value
print "Average accuracy in the test set is {}".format(avg_acc)

In [None]:
with open("{}/test_accuracy.txt".format(export_path), 'w') as f:
    f.write("Average accuracy in the test set is {}".format(avg_acc))
    # file will be closed automatically when using the `with` statement

## Saving history and the notebook

In [None]:
nb_name_html = nb_name.replace('.ipynb', '.html')

Save the current notebook

In [None]:
from IPython.display import display,Javascript
display(Javascript('IPython.notebook.save_checkpoint();'))

Save an (converted) html file of this notebook

In [None]:
bashCommand = "jupyter nbconvert --to html {}".format(nb_name)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Save a history file

In [None]:
%notebook history.ipynb

Move saved files to results directory

In [None]:
bashCommand = "mv -t {}/ history.ipynb {}".format(export_path, nb_name_html)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()

Copy current notebook to results directory

In [None]:
bashCommand = "cp {} {}/".format(nb_name, export_path)
import subprocess
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()