In [1]:
# set up logging
import logging
logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO,
)

In [2]:
# make deterministic
from mingpt.utils import set_seed
set_seed(42)

In [3]:
import numpy as np
import torch
import string
import os
from tqdm import tqdm
import torch.nn as nn
from torch.nn import functional as F
import datetime
from mingpt.md import MemData
from mingpt.math_dataset import MathDataset
from mingpt.model import GPT, GPTConfig, GPT1Config
from torch.utils.data.dataloader import DataLoader
from mingpt.trainer import Trainer, TrainerConfig
from mingpt.adaptive_examiner import AdaptiveExaminer
%load_ext autoreload
%autoreload 2

In [4]:
#create a dataset
!rm -rf run models
!cp -r data run
#!mkdir models
fn_data = 'run/numbers__list_prime_factors.txt'

In [5]:
# Add memory data structure to training data
memory_slots = 7
MD = MemData(memory_slots)
MD.initiate_mem_slot_data(fn_data)

In [6]:
fn_test = 'run/test_numbers__list_prime_factors.txt'
fn_train = 'run/train_numbers__list_prime_factors.txt'
train_dataset = MathDataset(fname=fn_train, MD=MD, marker_data=0.5)

In [7]:
#MD.tensor2string(train_dataset[4][1])

In [8]:
print(MD.block_size)
print(MD.vocab_size)
print(MD.max_trg)

268
102
28


In [9]:
# initialize a baby GPT model
#mconf = GPTConfig(MD.vocab_size, MD.block_size,
#                  n_layer=4, n_head=8, n_embd=256)
#model = GPT(mconf)
model = torch.load('9.pth')

In [10]:
max_it = 100
current_it = 7

exp_folder = 'models/' + datetime.datetime.now().strftime('%Y-%m-%d~%H:%M:%S')
examiner = AdaptiveExaminer(MD)

while(current_it < max_it):
    
    # Wait until the working memory is filled, then use 5 epochs
    epoch = 1 if current_it < 7 else 5
    # Use marker data once the working memory is full
    marker_data = 0.0 if current_it < 7 else 0.2
    
    # Switch between main training and marker training
    print("Marker Data: ", str(marker_data))
    train_dataset = MathDataset(fname=fn_train, MD=MD, marker_data=marker_data)
    test_dataset = MathDataset(fname=fn_test, MD=MD, marker_data=0.0)
    
    # Trainer Config
    tconf = TrainerConfig(max_epochs=epoch, batch_size=358, learning_rate=6e-4,
                      lr_decay=True, warmup_tokens=1024, final_tokens=epoch*len(train_dataset)*(MD.vocab_size+1),
                      num_workers=6)
    
    # Create the first training round
    print("Training: ", str(current_it))
    trainer = Trainer(model, train_dataset, test_dataset, tconf)
    trainer.train()
    trainer.save_checkpoint(exp_folder, str(current_it))
    
    # Examine the model and create new dataset
    
    print("Exam and new dataset-------------\n")
    print("Training exam \n")
    examiner.exam(fn_train, trainer, 5000)
    print("Test exam \n")
    #examiner.exam(fn_test, trainer)
    
    current_it += 1

Marker Data:  0.2
Training:  7


epoch 1 iter 25: train loss 1.51930. lr 5.991565e-04: 100%|██████████| 26/26 [00:11<00:00,  2.21it/s]
12/18/2020 12:39:49 - INFO - mingpt.trainer -   test loss: 1.480862


Best Loss:  1.480862021446228


epoch 2 iter 25: train loss 1.38583. lr 5.965892e-04: 100%|██████████| 26/26 [00:11<00:00,  2.25it/s]
12/18/2020 12:40:01 - INFO - mingpt.trainer -   test loss: 1.388755


Best Loss:  1.388754924138387


epoch 3 iter 25: train loss 1.39443. lr 5.922484e-04: 100%|██████████| 26/26 [00:11<00:00,  2.24it/s]
12/18/2020 12:40:14 - INFO - mingpt.trainer -   test loss: 1.356751


Best Loss:  1.3567509253819783


epoch 4 iter 25: train loss 1.25347. lr 5.862644e-04: 100%|██████████| 26/26 [00:11<00:00,  2.23it/s]
12/18/2020 12:40:26 - INFO - mingpt.trainer -   test loss: 1.349944


Best Loss:  1.349943995475769


epoch 5 iter 25: train loss 1.42814. lr 5.786254e-04: 100%|██████████| 26/26 [00:11<00:00,  2.23it/s]
12/18/2020 12:40:38 - INFO - mingpt.trainer -   test loss: 1.322997
12/18/2020 12:40:38 - INFO - mingpt.trainer -   saving models/2020-12-18~12:39:36/7.pth
  0%|          | 0/9000 [00:00<?, ?it/s]

Best Loss:  1.3229968547821045
Exam and new dataset-------------

Training exam 



Iiter 8999 Score: 0/1346: 100%|██████████| 9000/9000 [00:04<00:00, 2093.13it/s]
  0%|          | 0/1 [00:00<?, ?it/s]

Adaptive Compute Iteration:  0
Result: 0/1346 = 0.00% correct
Predictions: 0/1346 = 0.00% correct


Iiter 0 Score: 0/1346: 100%|██████████| 1/1 [00:02<00:00,  2.82s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Adaptive Compute Iteration:  1
Result: 0/1346 = 0.00% correct
Predictions: 0/1346 = 0.00% correct


Iiter 0 Score: 0/2692: 100%|██████████| 1/1 [00:05<00:00,  5.64s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Adaptive Compute Iteration:  2
Result: 0/2692 = 0.00% correct
Predictions: 0/2692 = 0.00% correct


Iiter 0 Score: 0/5384: 100%|██████████| 1/1 [00:11<00:00, 11.93s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

Adaptive Compute Iteration:  3
Result: 0/5384 = 0.00% correct
Predictions: 0/5384 = 0.00% correct


Iiter 1 Score: 0/1764: 100%|██████████| 2/2 [00:04<00:00,  2.42s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

Adaptive Compute Iteration:  4
Result: 0/1764 = 0.00% correct
Predictions: 0/1764 = 0.00% correct


Iiter 1 Score: 0/3526: 100%|██████████| 2/2 [00:08<00:00,  4.49s/it]
  0%|          | 0/2 [00:00<?, ?it/s]

Adaptive Compute Iteration:  5
Result: 0/3526 = 0.00% correct
Predictions: 0/3526 = 0.00% correct


Iiter 1 Score: 0/7050: 100%|██████████| 2/2 [00:18<00:00,  9.04s/it]
  0%|          | 0/3 [00:00<?, ?it/s]

Adaptive Compute Iteration:  6
Result: 0/7050 = 0.00% correct
Predictions: 0/7050 = 0.00% correct


Iiter 2 Score: 0/14098: 100%|██████████| 3/3 [00:40<00:00, 13.61s/it]
  0%|          | 0/5 [00:00<?, ?it/s]

Adaptive Compute Iteration:  7
Result: 0/14098 = 0.00% correct
Predictions: 0/14098 = 0.00% correct


 80%|████████  | 4/5 [00:03<00:00,  1.18it/s]


RuntimeError: CUDA out of memory. Tried to allocate 5.38 GiB (GPU 0; 23.65 GiB total capacity; 12.14 GiB already allocated; 2.30 GiB free; 20.69 GiB reserved in total by PyTorch)

In [None]:
int(True == True)

In [None]:
cat = True
hat = False
print(f"This is a {cat}, and this is a {hat}")

In [None]:
with open("foo.txt", "a") as f:
     f.write("3new line\n")

In [None]:
True == 1

In [None]:
min(None, 1)

In [None]:
-1 and 1