In [1]:
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [2]:
# make deterministic
from mingpt.utils import set_seed
set_seed(42)

In [3]:
import numpy as np
import torch
import string
import os
from tqdm import tqdm
import torch.nn as nn
from torch.nn import functional as F
import datetime
from mingpt.md import MemData
from mingpt.math_dataset import MathDataset
from mingpt.model import GPT, GPTConfig, GPT1Config
from torch.utils.data.dataloader import DataLoader
from mingpt.trainer import Trainer, TrainerConfig
from mingpt.adaptive_examiner import AdaptiveExaminer
%load_ext autoreload
%autoreload 2

In [4]:
#create a dataset
!rm -rf run models
!cp -r data run
#!mkdir models
fn_data = 'run/numbers__list_prime_factors.txt'

In [5]:
# Add memory data structure to training data
memory_slots = 7
MD = MemData(memory_slots, debug=500)
MD.initiate_mem_slot_data(fn_data)

In [6]:
fn_test = 'run/test_numbers__list_prime_factors.txt'
fn_train = 'run/train_numbers__list_prime_factors.txt'
train_dataset = MathDataset(fname=fn_train, MD=MD, marker_data=0.5)

In [7]:
#MD.tensor2string(train_dataset[4][1])

In [8]:
print(MD.block_size)
print(MD.vocab_size)
print(MD.max_trg)

262
102
27


In [9]:
# initialize a baby GPT model
#mconf = GPTConfig(MD.vocab_size, MD.block_size,
#                  n_layer=4, n_head=8, n_embd=256)
#model = GPT(mconf)
model = torch.load('9.pth')

In [None]:
max_it = 100
current_it = 7
batch_size = 10

exp_folder = 'models/' + datetime.datetime.now().strftime('%Y-%m-%d~%H:%M:%S')
examiner = AdaptiveExaminer(MD, ac=10, max_batch=batch_size)

while(current_it < max_it):
    
    # Wait until the working memory is filled, then use 5 epochs
    epoch = 1 if current_it < 7 else 10
    # Use marker data once the working memory is full
    marker_data = 0.0 if current_it < 7 else 0.2
    
    # Switch between main training and marker training
    print("Marker Data: ", str(marker_data))
    train_dataset = MathDataset(fname=fn_train, MD=MD, marker_data=marker_data)
    test_dataset = MathDataset(fname=fn_test, MD=MD, marker_data=0.0)
    
    # Trainer Config
    tconf = TrainerConfig(max_epochs=epoch, batch_size=batch_size, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=1024, final_tokens=epoch*len(train_dataset)*(MD.vocab_size+1),
                      num_workers=6)
    
    # Create the first training round
    print("Training: ", str(current_it))
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()
    trainer.save_checkpoint(exp_folder, str(current_it))
    
    # Examine the model and create new dataset
    
    print("Exam and new dataset-------------\n")
    print("Training exam \n")
    examiner.exam(fn_train, trainer)
    print("Test exam \n")
    #examiner.exam(fn_test, trainer, test=True)
    
    current_it += 1

12/20/2020 08:57:39 - INFO - mingpt.trainer -   saving models/2020-12-20~08:57:39/7.pth
  0%|          | 0/450 [00:00<?, ?it/s]

Marker Data:  0.2
Training:  7
Exam and new dataset-------------

Training exam 

450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 104.86it/s]
Iiter 12 Score: 0/13:   3%|▎         | 13/450 [00:00<00:05, 79.79it/s]

Adaptive Compute Iteration:  0
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 110.05it/s]
Iiter 12 Score: 0/13:   3%|▎         | 13/450 [00:00<00:05, 79.13it/s]

Adaptive Compute Iteration:  1
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 111.06it/s]
Iiter 12 Score: 0/13:   3%|▎         | 13/450 [00:00<00:05, 78.19it/s]

Adaptive Compute Iteration:  2
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 111.45it/s]
Iiter 12 Score: 0/13:   3%|▎         | 13/450 [00:00<00:05, 74.81it/s]

Adaptive Compute Iteration:  3
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 108.55it/s]
Iiter 12 Score: 0/13:   3%|▎         | 13/450 [00:00<00:06, 68.56it/s]

Adaptive Compute Iteration:  4
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:04<00:00, 94.86it/s] 
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  5
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:05<00:00, 83.69it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  6
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.77it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  7
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.71it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  8
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.44it/s]
12/20/2020 08:58:28 - INFO - mingpt.trainer -   saving models/2020-12-20~08:57:39/8.pth
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  9
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
Final score: 0/450 = 0.00% correct
Test exam 

Marker Data:  0.2
Training:  8
Exam and new dataset-------------

Training exam 

450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.59it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  0
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 71.98it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  1
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.49it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  2
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.01it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  3
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.42it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  4
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.57it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  5
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 71.71it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  6
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.17it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  7
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.24it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  8
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.37it/s]
12/20/2020 08:59:31 - INFO - mingpt.trainer -   saving models/2020-12-20~08:57:39/9.pth
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  9
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
Final score: 0/450 = 0.00% correct
Test exam 

Marker Data:  0.2
Training:  9
Exam and new dataset-------------

Training exam 

450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.32it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  0
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.24it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  1
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 449 Score: 0/450: 100%|██████████| 450/450 [00:06<00:00, 72.06it/s]
Iiter 2 Score: 0/2:   0%|          | 0/450 [00:00<?, ?it/s]

Adaptive Compute Iteration:  2
Result: 0/450 = 0.00% correct
Predictions: 0/450 = 0.00% correct
450


Iiter 272 Score: 0/273:  58%|█████▊    | 263/450 [00:03<00:02, 68.17it/s]

In [None]:
int(True == True)

In [None]:
cat = True
hat = False
print(f"This is a {cat}, and this is a {hat}")

In [None]:
with open("foo.txt", "a") as f:
     f.write("3new line\n")

In [None]:
True == 1

In [None]:
min(None, 1)

In [None]:
-1 and 1