In [1]:
from simulator import blogm, bSqc, Neg
from Llama2 import LlamaPredictor
import torch
from math import prod
from functools import reduce
import pandas
from utils import dtype, device, pauli, basis, torch_data

In [2]:
seed = 0
test = True
file = f'seed{seed}'
train_ratio = 8/9
batch = 500

mdl = LlamaPredictor(L_max=35,
                     L=4,
                     n_embd=12, 
                     n_layer=6, 
                     n_head=6, 
                     vocab_size=3, 
                     dropout_prob=0.0).to(device)
total=0 # find size of the model
for p in mdl.parameters():
    total+=prod(p.shape)
total#, True_fid(mdl, psi)

6692

In [3]:
for N in range(4, 36, 2):
    torch.manual_seed(seed)
    prepseq, shadow_state, rhoS = torch_data(f'../data/data_{N}na.pickle', shuffle=True)
    train_size = int(prepseq.shape[0]*train_ratio)
    test_size = prepseq.shape[0]-train_size
    
    prepseq = torch.cat([prepseq+1, torch.zeros(prepseq.shape[0],1).to(prepseq.dtype).to(device)], -1)
    
    prepseq_train, prepseq_test = prepseq[:train_size], prepseq[train_size:]
    shadow_state_train, shadow_state_test = shadow_state[:train_size], shadow_state[train_size:]
    rhoS_train, rhoS_test = rhoS[:train_size], rhoS[train_size:]
    
    # split in batches
    prepseq_train = prepseq_train.view(-1, batch, N-2)
    shadow_state_train = shadow_state_train.view(-1, batch, 4)
    rhoS_train = rhoS_train.view(-1, batch, 4, 4)

    prepseq_test = prepseq_test.view(-1, batch, N-2)
    shadow_state_test = shadow_state_test.view(-1, batch, 4)
    rhoS_test = rhoS_test.view(-1, batch, 4, 4)
    
    mdl = LlamaPredictor(L_max=35,
                     L=prepseq_train.shape[1]-1,
                     n_embd=12, 
                     n_layer=6, 
                     n_head=6, 
                     vocab_size=3, 
                     dropout_prob=0.0).to(device)
    # load new model
    if N > 4:
        mdl.load_state_dict(torch.load(f'{file}/models/mpo_N={N-2}_na.pt'))
    # load old model
    # mdl.load_state_dict(torch.load(f'{file}/models/mpo_N={N}_na.pt'))
    
    optimizer = torch.optim.Adam(mdl.parameters(), lr=1e-3) # 0.0001
    l = {'train Sqc':[], 'test Sqc':[], 'test Neg':[], 'loss':[]}
    
    for epoch in range(5):
        # Train
        print('='*50+'   Train   '+'='*50)
        mdl.train()
        for i in range(prepseq_train.shape[0]):
            rhoC = mdl(prepseq_train[i])
            l['train Sqc'].append(bSqc(rhoS_train[i], rhoC).mean().item())
            optimizer.zero_grad()
            probs = torch.bmm(torch.bmm(shadow_state_train[i].unsqueeze(1), rhoC), shadow_state_train[i].conj().unsqueeze(-1)).view(-1).real
            loss = -probs.log().mean()
            loss.backward()
            optimizer.step()
            l['loss'].append(loss.item())
            if (i+1)%100 == 0:
                trainS = torch.tensor(l['train Sqc'])[-i:].mean().item()
                loss = torch.tensor(l['loss'])[-i:].mean().item()
                print('epoch:  %3d | step:  %3d | N:  %d | train Sqc: %.4f | loss: %.4f' %(epoch, i, N, trainS, loss))
        # Test
        if test:
            with torch.no_grad():
                print('='*50+'   Test   '+'='*50)
                mdl.eval()
                for i in range(prepseq_test.shape[0]):
                    rhoC = mdl(prepseq_test[i])
                    l['test Sqc'].append(bSqc(rhoS_test[i], rhoC).mean().item())
                    l['test Neg'].append(Neg(rhoS_test[i], rhoC).mean().item())
                    if (i+1)%100 == 0:
                        testS = torch.tensor(l['test Sqc'])[-i:].mean().item()
                        testN = torch.tensor(l['test Neg'])[-i:].mean().item()
                        print('epoch:  %3d | step:  %3d | N:  %d | test Sqc: %.4f | test Neg: %.4f' %(epoch, i, N, testS, testN))
        torch.save(l, f'{file}/record/gpt_N={N}_na.pt')
        torch.save(mdl.state_dict(), f'{file}/models/gpt_N={N}_na.pt')

epoch:    0 | step:   99 | N:  4 | train Sqc: 1.5496 | loss: 1.4015
epoch:    0 | step:  199 | N:  4 | train Sqc: 1.4735 | loss: 1.3947
epoch:    0 | step:  299 | N:  4 | train Sqc: 1.4471 | loss: 1.3922
epoch:    0 | step:  399 | N:  4 | train Sqc: 1.4336 | loss: 1.3910
epoch:    0 | step:  499 | N:  4 | train Sqc: 1.4251 | loss: 1.3901
epoch:    0 | step:  599 | N:  4 | train Sqc: 1.4196 | loss: 1.3896
epoch:    0 | step:  699 | N:  4 | train Sqc: 1.4154 | loss: 1.3892
epoch:    0 | step:  799 | N:  4 | train Sqc: 1.4120 | loss: 1.3889
epoch:    0 | step:  899 | N:  4 | train Sqc: 1.4096 | loss: 1.3886
epoch:    0 | step:  999 | N:  4 | train Sqc: 1.4078 | loss: 1.3885
epoch:    0 | step:  1099 | N:  4 | train Sqc: 1.4059 | loss: 1.3882
epoch:    0 | step:  1199 | N:  4 | train Sqc: 1.4044 | loss: 1.3881
epoch:    0 | step:  1299 | N:  4 | train Sqc: 1.4033 | loss: 1.3880
epoch:    0 | step:  1399 | N:  4 | train Sqc: 1.4023 | loss: 1.3879
epoch:    0 | step:  1499 | N:  4 | train Sq

IndexError: index 1600 is out of bounds for dimension 0 with size 1600

In [4]:
prepseq_train.shape

torch.Size([2400, 500, 2])