In [1]:
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [2]:
# make deterministic
from mingpt.utils import set_seed
set_seed(42)

In [3]:
import numpy as np
import torch
import string
import os
from tqdm import tqdm
import torch.nn as nn
from torch.nn import functional as F
import datetime
from mingpt.md import MemData
from mingpt.marker_dataset import MarkerDataset
from mingpt.math_dataset import MathDataset
from mingpt.model import GPT, GPTConfig, GPT1Config
from torch.utils.data.dataloader import DataLoader
from mingpt.trainer import Trainer, TrainerConfig
from mingpt.examiner import Examiner
%load_ext autoreload
%autoreload 2

In [4]:
#create a dataset
!rm -rf run models
!cp -r data run
!mkdir models
fn_data = 'run/numbers__list_prime_factors.txt'

In [5]:
# Add memory data structure to training data
memory_slots = 7
MD = MemData(memory_slots)
MD.initiate_mem_slot_data(fn_data)

In [6]:
fn_test = 'run/test_numbers__list_prime_factors.txt'
fn_train = 'run/train_numbers__list_prime_factors.txt'
train_dataset = MathDataset(fname=fn_train, MD=MD)

In [7]:
print(MD.block_size)
print(MD.vocab_size)
print(MD.max_trg)

260
102
27


In [8]:
# initialize a baby GPT model
mconf = GPTConfig(MD.vocab_size, MD.block_size,
                  n_layer=4, n_head=8, n_embd=256)
model = GPT(mconf)

12/17/2020 05:09:49 - INFO - mingpt.model -   number of parameters: 3.278336e+06


In [9]:
max_it = 100
main_epoch = 1
marker_epoch = 1
current_it = 0

exp_folder = 'models/' + datetime.datetime.now().strftime('%Y-%m-%d~%H:%M:%S')
examiner = Examiner(MD)

while(current_it < max_it):

    # Switch between main training and marker training
    if current_it % 2 == 0:
        print("Loading Main Dataset\n")
        train_dataset = MathDataset(fname=fn_train, MD=MD)
        test_dataset = MathDataset(fname=fn_test, MD=MD)
        epoch = main_epoch
    else:
        print("Loading Marker Dataset\n")
        train_dataset = MarkerDataset(fname=fn_train, MD=MD)
        test_dataset = MarkerDataset(fname=fn_test, MD=MD)
        epoch = marker_epoch
    
    # Trainer Config
    tconf = TrainerConfig(max_epochs=epoch, batch_size=256, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=1024, final_tokens=epoch*len(train_dataset)*(MD.vocab_size+1),
                      num_workers=0)
    
    # Create the first training round
    print("Training: ", str(current_it))
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    #trainer.train()
    trainer.save_checkpoint(exp_folder, str(current_it))
    
    # Examine the model and create new dataset
    if current_it % 2 == 0:
        print("Exam and new dataset-------------\n")
        print("Training exam \n")
        examiner.exam(fn_train, train_dataset, trainer, 5000)
        print("Test exam \n")
        examiner.exam(fn_test, test_dataset, trainer, 5000)
    
    current_it += 1

Loading Main Dataset

Training:  0


12/17/2020 05:09:50 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/0.pth
Iiter 2:   0%|          | 0/900 [00:00<?, ?it/s]

Exam and new dataset-------------

Training exam 



Iiter 899: 100%|██████████| 900/900 [00:02<00:00, 301.30it/s]
Iiter 17:  14%|█▍        | 14/100 [00:00<00:00, 139.74it/s]

Final score: 0/784 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 99: 100%|██████████| 100/100 [00:00<00:00, 141.82it/s]
12/17/2020 05:09:54 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/1.pth
12/17/2020 05:09:54 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/2.pth
Iiter 3:   0%|          | 0/784 [00:00<?, ?it/s]

Final score: 0/87 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  1
Loading Main Dataset

Training:  2
Exam and new dataset-------------

Training exam 



Iiter 783: 100%|██████████| 784/784 [00:02<00:00, 284.17it/s]
Iiter 17:  11%|█▏        | 10/87 [00:00<00:00, 99.83it/s]

Final score: 0/600 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 86: 100%|██████████| 87/87 [00:00<00:00, 144.90it/s]
12/17/2020 05:09:57 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/3.pth
12/17/2020 05:09:57 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/4.pth
Iiter 10:   2%|▏         | 10/600 [00:00<00:05, 99.74it/s]

Final score: 0/68 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  3
Loading Main Dataset

Training:  4
Exam and new dataset-------------

Training exam 



Iiter 599: 100%|██████████| 600/600 [00:03<00:00, 195.17it/s]
Iiter 4:   6%|▌         | 4/68 [00:00<00:02, 23.01it/s]

Final score: 0/455 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 67: 100%|██████████| 68/68 [00:00<00:00, 81.34it/s]
12/17/2020 05:10:01 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/5.pth
12/17/2020 05:10:01 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/6.pth
Iiter 3:   0%|          | 0/455 [00:00<?, ?it/s]

Final score: 0/45 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  5
Loading Main Dataset

Training:  6
Exam and new dataset-------------

Training exam 



Iiter 454: 100%|██████████| 455/455 [00:04<00:00, 110.05it/s]
Iiter 4:   9%|▉         | 4/45 [00:00<00:01, 24.11it/s]

Final score: 0/415 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 44: 100%|██████████| 45/45 [00:00<00:00, 63.06it/s]
12/17/2020 05:10:06 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/7.pth
12/17/2020 05:10:06 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/8.pth
Iiter 4:   0%|          | 0/415 [00:00<?, ?it/s]

Final score: 0/27 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  7
Loading Main Dataset

Training:  8
Exam and new dataset-------------

Training exam 



Iiter 414: 100%|██████████| 415/415 [00:03<00:00, 112.47it/s]
Iiter 4:  15%|█▍        | 4/27 [00:00<00:01, 22.57it/s]

Final score: 0/295 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 26: 100%|██████████| 27/27 [00:00<00:00, 50.83it/s]
12/17/2020 05:10:10 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/9.pth
12/17/2020 05:10:10 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/10.pth
Iiter 3:   0%|          | 0/295 [00:00<?, ?it/s]

Final score: 0/11 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  9
Loading Main Dataset

Training:  10
Exam and new dataset-------------

Training exam 



Iiter 294: 100%|██████████| 295/295 [00:02<00:00, 110.94it/s]
Iiter 4:  36%|███▋      | 4/11 [00:00<00:00, 22.77it/s]

Final score: 0/141 = 0.00% correct
Saving new files to disk...
Test exam 



Iiter 10: 100%|██████████| 11/11 [00:00<00:00, 41.71it/s]
12/17/2020 05:10:13 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/11.pth
12/17/2020 05:10:13 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/12.pth
Iiter 3:   0%|          | 0/141 [00:00<?, ?it/s]

Final score: 0/3 = 0.00% correct
Saving new files to disk...
Loading Marker Dataset

Training:  11
Loading Main Dataset

Training:  12
Exam and new dataset-------------

Training exam 



Iiter 140: 100%|██████████| 141/141 [00:01<00:00, 86.28it/s]
Iiter 2: 100%|██████████| 3/3 [00:00<00:00, 36.93it/s]
12/17/2020 05:10:15 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/13.pth
12/17/2020 05:10:15 - INFO - mingpt.trainer -   saving models/2020-12-17~05:09:49/14.pth
Iiter 1:   0%|          | 0/60 [00:00<?, ?it/s]

torch.Size([1, 178])
torch.Size([19, 176])
tensor([[61, 84, 94, 95,  7, 95, 83, 80,  7, 91, 93, 84, 88, 80,  7, 81, 76, 78,
         95, 90, 93, 94,  7, 90, 81,  7, 42, 44, 44, 46, 43, 41, 44, 40, 21,  1,
         73, 73, 73, 73, 34, 73, 34, 73, 73, 34, 73, 34, 73, 34, 73, 73, 34, 73,
         34,  2, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 34, 73,
         34, 73, 34,  2, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73,
         73, 73, 73, 73, 73,  2, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
         15, 15, 95, 95, 95, 95, 95,  2, 95, 95, 15, 21, 15, 21, 15, 21, 15, 21,
         15, 21, 15, 21, 15, 21, 15, 21, 15,  2, 95, 95, 44, 95, 21, 59, 38, 98,
         44, 15, 21, 59, 38, 98, 44, 15, 21, 15, 21,  2,  2,  3, 42, 19,  7, 43,
         19,  7, 45, 19,  7, 41, 43, 19,  7, 47, 49, 19,  7, 43, 49, 47]])
tensor([[61, 84, 94,  ...,  0,  0,  0],
        [61, 84, 94,  ...,  0,  0,  0],
        [61, 84, 94,  ...,  0,  0,  0],
        ...,
        [61, 84, 94,

Iiter 59: 100%|██████████| 60/60 [00:01<00:00, 57.07it/s]
Iiter 0: 100%|██████████| 1/1 [00:00<00:00, 1392.53it/s]

torch.Size([1, 188])
torch.Size([1, 191])
tensor([[61, 84, 94, 95,  7, 95, 83, 80,  7, 91, 93, 84, 88, 80,  7, 81, 76, 78,
         95, 90, 93, 94,  7, 90, 81,  7, 48, 42, 49, 41, 45, 43, 46, 21,  1, 21,
         15, 21, 15, 21, 15, 21, 15, 21, 15, 21, 15, 21, 15, 21, 15, 21, 15, 21,
          2, 21, 59, 38, 21, 59, 38, 21, 59, 38, 21, 59, 38, 21, 59, 38, 21, 59,
         38, 21,  2, 38, 59, 38, 59, 38, 21, 59, 38, 21, 59, 38, 21, 59, 38, 21,
         59, 38, 21, 59,  2, 95, 44, 99, 95, 44, 99, 95, 44, 99, 95, 99, 95, 99,
         95, 99, 95, 99, 95, 99,  2, 95, 44, 99, 44, 99, 44, 99, 44, 99, 95, 44,
         99, 95, 44, 99, 95, 44, 99, 95,  2, 15, 15, 15, 15, 15, 15, 15, 15, 15,
         15, 15, 15, 15, 15, 15, 15, 15, 15, 15,  2, 95, 44, 95, 44, 98, 44, 98,
         98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 98,  2,  3, 42, 19,  7, 41,
         41, 19,  7, 44, 47, 41, 41, 41]])
tensor([[61, 84, 94, 95,  7, 95, 83, 80,  7, 91, 93, 84, 88, 80,  7, 81, 76, 78,
         95, 90, 93, 94,


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


ValueError: max() arg is an empty sequence

In [None]:
test = list(range(10))

In [None]:
test[:20]

In [None]:
test[3:]

In [None]:
test = ['', '', 'cat', '']
for i in test:
    print(i)

In [None]:
sum([1, 2, 3, ])

In [None]:
X = torch.tensor([0.1, 0.5, -1.0, 0, 1.2, 0])

In [None]:
X = X[X > 0.1]

In [None]:
X