In [None]:
import os, pickle
import tensorflow as tf
from keras.models import *
from keras.optimizers.optimizer_experimental.adamw import AdamW

from gpt import data
from gpt.settings import *
from gpt.train.generator import *
from gpt.train.model import *
from gpt.train.callbacks import *
from gpt.utils import *

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:

	try:
		tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
		print('Using GPU :)')

	except RuntimeError as e:
		print(e)

else:
	print('Using CPU :(')

In [None]:
tokenizer, train_tokens, val_tokens = data.get_data()

print('Train size:', '{:,.0f}'.format(len(train_tokens)))
print('Val size:', '{:,.0f}'.format(len(val_tokens)))

print('\n' + tokenizer.decode(train_tokens[:MAX_CONTEXT], True))

In [None]:
train_generator = BatchGenerator(train_tokens, size = STEP_PER_EPOCH)
x, y = train_generator.__getitem__(0)

for i in range(len(x[0])):
	print(x[0][i], end = (max(len(str(x[0][i])), len(str(y[0][i]))) - len(str(x[0][i])) + 1) * ' ' + '| ')

print()

for i in range(len(y[0])):
	print(y[0][i], end = (max(len(str(x[0][i])), len(str(y[0][i]))) - len(str(y[0][i])) + 1) * ' ' + '| ')

print('\n')
print_tokens(tokenizer.decode(x[0], True, True))
print_tokens(tokenizer.decode(y[0], True, True))

In [None]:
model = create_model(len(tokenizer.vocab))

model.compile(
	optimizer = AdamW(learning_rate = 0.0, weight_decay = WEIGHT_DECAY, beta_1 = BETA_1, beta_2 = BETA_2, clipnorm = CLIP_GRADIENTS),
	loss = 'sparse_categorical_crossentropy',
	metrics = ['accuracy']
)

model.summary()

In [None]:
init_epoch = 0

if os.path.exists(os.path.join(OUTPUT_DIR, 'logs.pkl')) and os.path.exists(os.path.join(OUTPUT_DIR, 'model.h5')) and os.path.exists(os.path.join(OUTPUT_DIR, 'optimizer.pkl')):

	logs = pickle.load(open(os.path.join(OUTPUT_DIR, 'logs.pkl'), 'rb'))
	init_epoch = logs['epochs'][-1]

	if NUM_ACCUMULATIONS > 1:
		load_state(model.optimizer, os.path.join(OUTPUT_DIR, 'optimizer.pkl'))
		model.fit(BatchGenerator(train_tokens, size = NUM_ACCUMULATIONS + 1), batch_size = BATCH_SIZE, epochs = 1, shuffle = False, verbose = 0)
		model.load_weights(os.path.join(OUTPUT_DIR, 'model.h5'))
		reset_accumulator(model)
		load_state(model.optimizer, os.path.join(OUTPUT_DIR, 'optimizer.pkl'))

	else:
		model.load_weights(os.path.join(OUTPUT_DIR, 'model.h5'))
		load_state(model.optimizer, os.path.join(OUTPUT_DIR, 'optimizer.pkl'))

In [None]:
model.fit(
	train_generator,
	validation_data = BatchGenerator(val_tokens, size = VAL_STEPS),
	batch_size = BATCH_SIZE,
	validation_batch_size = BATCH_SIZE,
	epochs = NUM_EPOCHS,
	shuffle = False,
	initial_epoch = init_epoch,
	callbacks = [
		LRScheduler(),
		SaveModel(),
		SaveLogs()
	]
)

In [None]:
model.load_weights('./output/best_model.h5')

In [None]:
model.evaluate(BatchGenerator(val_tokens, size = STEP_PER_EPOCH), batch_size = BATCH_SIZE)

In [None]:
input = ""
predict(model, input, tokenizer, max_length = 200, keep_input = True, temperature = 0.7, top_p = 0.95, no_repeat = 1.0, verbose = True, max_print_line_length = 160)