In [None]:
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [None]:
# make deterministic
from mingpt.utils import set_seed
set_seed(42)

In [None]:
import numpy as np
import torch
import string
import os
from tqdm import tqdm
import torch.nn as nn
from torch.nn import functional as F
import datetime
from mingpt.md import MemData
from mingpt.marker_dataset import MarkerDataset
from mingpt.math_dataset import MathDataset
from mingpt.model import GPT, GPTConfig, GPT1Config
from torch.utils.data.dataloader import DataLoader
from mingpt.trainer import Trainer, TrainerConfig
from mingpt.examiner import Examiner
%load_ext autoreload
%autoreload 2

In [None]:
# create a dataset
!rm -rf run
!cp -r data run
fn_data = 'run/numbers__list_prime_factors.txt'

In [None]:
# Add memory data structure to training data
memory_slots = 7
MD = MemData(memory_slots)
MD.initiate_mem_slot_data(fn_data)

In [None]:
fn_test = 'run/test_numbers__list_prime_factors.txt'
fn_train = 'run/train_numbers__list_prime_factors.txt'
train_dataset = MathDataset(fname=fn_train, MD=MD)

In [None]:
print(MD.block_size)
print(MD.vocab_size)

In [None]:
# initialize a baby GPT model
mconf = GPTConfig(MD.vocab_size, MD.block_size, 
                  n_layer=2, n_head=4, n_embd=128)
model = GPT(mconf)

In [None]:
max_it = 100
main_epoch = 1
marker_epoch = 1
current_it = 0

exp_folder = datetime.datetime.now().strftime('%Y-%m-%d~%H:%M:%S')
examiner = Examiner(MD)


# Switch between main training and marker training
print("Loading Main Dataset\n")
train_dataset = MathDataset(fname=fn_train, MD=MD)
test_dataset = MathDataset(fname=fn_test, MD=MD)
epoch = main_epoch

# print("Loading Marker Dataset\n")
# train_dataset = MarkerDataset(fname=fn_train, MD=MD)
# test_dataset = MarkerDataset(fname=fn_test, MD=MD)
# epoch = marker_epoch

# Trainer Config
tconf = TrainerConfig(max_epochs=epoch, batch_size=1024, learning_rate=6e-4,
                  lr_decay=True, warmup_tokens=1024, final_tokens=50*len(train_dataset)*(14+1),
                  num_workers=0)

# Create the first training round
print("Training-------------------\n")
trainer = Trainer(model, train_dataset, test_dataset, tconf)
#trainer.train()
#trainer.save_checkpoint(exp_folder, str(current_it))


print("Exam and new dataset-------------\n")
examiner.exam(fn_train, train_dataset, trainer, 10000)
examiner.exam(fn_test, test_dataset, trainer, 10000)

train_dataset = MarkerDataset(fname=fn_train, MD=MD)
test_dataset = MarkerDataset(fname=fn_test, MD=MD)
trainer = Trainer(model, train_dataset, test_dataset, tconf)
trainer.train()

train_dataset = MathDataset(fname=fn_train, MD=MD)
test_dataset = MathDataset(fname=fn_test, MD=MD)

trainer = Trainer(model, train_dataset, test_dataset, tconf)
trainer.train()

examiner.exam(fn_train, train_dataset, trainer)
examiner.exam(fn_test, test_dataset, trainer)

In [None]:
train_dataset = MarkerDataset(fname=fn_train, MD=MD)
test_dataset = MarkerDataset(fname=fn_test, MD=MD)