In [1]:
import math
import time

import torch.utils.data
from erutils.command_line_interface import fprint

from modules.models import PGT
from utils.utils import DatasetPGT, make2d, save_model, get_config_by_name

In [11]:

batch = 4
percentage = 0.15
MODEL = 'PGT-Cs'
prp = torch.cuda.get_device_properties("cuda")
fprint(
    f'DEVICES : {torch.cuda.get_device_name()} | {prp.name} |'
    f' {prp.total_memory / 1e9} GB Memory')

data_path = 'data/PGT-DATA.txt'
dataset = DatasetPGT(batch_size=batch)
Config = get_config_by_name(MODEL, dataset.vocab_size)
Config.load = False

Config.data_path = data_path

data = open(Config.data_path, 'r', encoding="utf8").read()
tvl = len(data)
use_tvl = tvl * percentage
print(f'TOTAL DATA : {tvl}')
print(f'SELECTED DATA : {int(use_tvl)}')
selected_data = data[:int(use_tvl)]

with open('selected.txt', 'w', encoding='utf8') as wr:
    wr.write(selected_data)

[1;36mDEVICES : NVIDIA GeForce GTX 1050 | NVIDIA GeForce GTX 1050 | 2.147221504 GB Memory
TOTAL DATA : 1432801
SELECTED DATA : 214920


In [12]:

dataset.chunk = Config.chunk
dataset.src = selected_data
dataset.init()

100%|██████████| 831/831 [08:53<00:00,  1.56it/s]


In [15]:

Config.batch_size = batch
dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=Config.batch_size, num_workers=3,
                                         pin_memory=True)

if Config.load:
    fprint('Loading Model ...')
    model = PGT(config=Config).to('cpu')
    loaded = torch.load('model.pt', 'cpu')
    model.load_state_dict(loaded['model'])
    model = model.to(Config.device)
    fprint(f'Model Loaded With {sum(p.numel() for p in model.parameters()) / 1e6} Million Parameters')
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = model.configure_optimizer(Config)
    # optimizer = torch.optim.AdamW(model.parameters(), Config.lr)
    # optimizer = model.configure_optimizer(Config)
    optimizer.load_state_dict(loaded['optimizer'])
else:
    fprint('Creating Model ...')
    model = PGT(config=Config).to('cpu').to(Config.device)
    fprint(f'Model Created With {sum(p.numel() for p in model.parameters()) / 1e6} Million Parameters')
    criterion = torch.nn.CrossEntropyLoss(ignore_index=-1)
    optimizer = model.configure_optimizer(Config)
    loaded = None
    # optimizer = torch.optim.AdamW(model.parameters(), Config.lr)
model = torch.compile(model)

[1;36mCreating Model ...
[1;36mModel Created With 20.462394 Million Parameters


In [16]:
torch.backends.cudnn.benchmark = True

In [17]:
total_iterations = dataset.__len__() // Config.batch_size
question = dataset.encode('hello how are you').to(Config.device)
question = question['input_ids'].to(Config.device)
mxl = math.ceil(dataset.__len__() / Config.batch_size)
print('TRAINING IS ABOUT TO START')

if Config.load:
    for epoch in range(loaded['epoch'], Config.epochs):
        loss_avg = 0
        st = time.time()
        for i, (inp, label) in enumerate(dataloader):
            inp = inp.type(torch.long)
            label = label.type(torch.long)
            inp = make2d(inp).to(Config.device)
            label = make2d(label).to(Config.device)
            predict = model(inputs=inp, attention_mask=(inp != dataset.tokenizer.pad_token_id))
            optimizer.zero_grad(set_to_none=True)
            loss = criterion(predict.permute(0, 2, 1), label.view(-1, label.size(-1)))
            loss_avg += loss.item()
            loss.backward()
            optimizer.step()
            fprint(
                f'\rEPOCH : [{epoch + 1}/{Config.epochs}] | LOSS : {loss.item() / Config.batch_size} | EPOCH LOSS AVG : {(loss_avg / (i + 1)) / Config.batch_size} | ITER : {i + 1}/{mxl} | DEVICE : {Config.device} | EPOCH TIME {int(time.time() - st)} SEC',
                end='')

        print()
        if (epoch + 1) % 5 == 0:
            print()
            save_model(model=model.state_dict(), optimizer=optimizer.state_dict(), epochs=Config.epochs,
                       epoch=epoch + 1,
                       name='modified_model.pt')
            fprint('==> MODEL SAVED SUCCESSFULLY')
            predictions = model.generate(idx=question, eos=dataset.tokenizer.eos_token_id,
                                         generate=256

                                         )
            fprint(f'QUESTION : {dataset.decode(question)}')
            fprint(f'PREDICTION : {dataset.decode(predictions)}')
else:
    for epoch in range(Config.epochs):
        loss_avg = 0
        st = time.time()
        for i, (inp, label) in enumerate(dataloader):
            inp = inp.type(torch.long)
            label = label.type(torch.long)
            inp = make2d(inp).to(Config.device)
            label = make2d(label).to(Config.device)
            predict = model(inputs=inp)
            optimizer.zero_grad(set_to_none=True)
            loss = criterion(predict.permute(0, 2, 1), label.view(-1, label.size(-1)))
            loss_avg += loss.item()
            loss.backward()
            optimizer.step()
            fprint(
                f'\rEPOCH : [{epoch + 1}/{Config.epochs}] | LOSS : {loss.item() / Config.batch_size} | EPOCH LOSS AVG : {(loss_avg / (i + 1)) / Config.batch_size} | ITER : {i + 1}/{mxl} | DEVICE : {Config.device} | EPOCH TIME {int(time.time() - st)} SEC',
                end='')

        print()
        if (epoch + 1) % 5 == 0:
            print()
            save_model(model=model.state_dict(), optimizer=optimizer.state_dict(), epochs=Config.epochs,
                       epoch=epoch + 1,
                       name='model.pt')
            fprint('==> MODEL SAVED SUCCESSFULLY')
            predictions = model.generate(idx=question, eos=dataset.tokenizer.eos_token_id,
                                         generate=256

                                         )
            fprint(f'QUESTION : {dataset.decode(question)}')
            fprint(f'PREDICTION : {dataset.decode(predictions)}')


TRAINING IS ABOUT TO START
EPOCH : [1/1000] | LOSS : 1.217929720878601 | EPOCH LOSS AVG : 1.4383814415106406 | ITER : 208/208 | DEVICE : cuda | EPOCH TIME 119 SECC[1;36m6m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[1;36m[

KeyboardInterrupt: 

In [33]:
inc = dataset.encode('hello how are you are you ok ?')['input_ids'].cuda()
mask = (inc != 0)
voac = torch.zeros((1, Config.chunk)).type(torch.long).cuda()
voam = torch.zeros((1, Config.chunk)).type(torch.long).cuda()
voac[0, :inc.size(-1)] = inc
voam[0, :mask.size(-1)] = mask
print(f'inc : {voac.shape}')
print(f'Mask : {voam.shape}')
predictions = model.generate(idx=voac, eos=dataset.tokenizer.eos_token_id, attention_mask=voam,
                             generate=256)

inc : torch.Size([1, 256])
Mask : torch.Size([1, 256])


In [22]:
dataset.encode('hello how are you are you ok ?')['input_ids']

tensor([[ 101, 7592, 2129, 2024, 2017, 2024, 2017, 7929, 1029,  102]])

In [34]:
print(dataset.decode(predictions))

big nba? that is an basics stuff, i's hard actually stuff, when it's an athlete do you politics. i's politics when the republican players, i guess so, but that! yes, crazy! so i's trend through took, when the ocean do appreciate dance is hard to do you go to us much amazon much instead that delicious following in 2018've am so do you a lot that delicious bird hello! did you like comic books now i guess the news, so crazy! it looks like the prequels i like amazon prime. i do. i also feel pooh i hello! i only fish when the black to hear about the cloned i can've tried to copyright that. that was just a cat was copy cat million living was just recently that in the republican being there any 100 thought of seasons. i guess chan patent on a trend for 100 " copy cat following his i guess people that would be true white house. dogs were able to tell me as some though it lol so different times! people are so what do you a republican party was a free himself? hello! hello! hello! hello! wow tha