In [1]:
import torch
from kwisatzHaderach import KwisatzHaderach
import json
import os
import tqdm
import numpy as np
from datagen import generate_dataset, generate_dataset_memory, generate_dataset_memory_bh

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def euclidean_distance(a, b):
    return torch.sqrt(torch.sum((a - b)**2, dim=-1) + 1e-12)

def loss_fn(pr_acc, gt_acc, num_neighbors):
    gamma = 0.5
    neighbor_scale = 1/100
    importance = torch.exp(neighbor_scale * num_neighbors) # removed minus sign to give more importance to particles with more neighbors
    importance = importance / torch.max(importance)
    euclidean_distances = euclidean_distance(pr_acc, gt_acc)
    if importance.size()[0] == 0:
        importance = 1.0
    return torch.mean(importance *
                        euclidean_distances**gamma), torch.mean(euclidean_distances)

In [3]:
def get_new_pos_vel(acc, pos, vel, dt=0.01):
    new_vel = vel + acc * dt
    new_pos = pos + new_vel * dt
    return new_pos, new_vel

In [4]:
def train_epoch(model, file, batch_size, loss_fn, optimizer, device):

    model.train()
    with open(file) as f:
        data = json.load(f)

    num_batches = len(data) // batch_size
    all_losses = []
    all_dists = []
    for i in tqdm.tqdm(range(num_batches)):
        batch = data[i*batch_size:(i+1)*batch_size]
        m = torch.tensor([b['masses'] for b in batch], dtype=torch.float32).to(device)
        pos0 = torch.tensor([b['pos'] for b in batch], dtype=torch.float32).to(device)
        vel0 = torch.tensor([b['vel'] for b in batch], dtype=torch.float32).to(device)
        pos1 = torch.tensor([b['pos_next1'] for b in batch], dtype=torch.float32).to(device)
        pos2 = torch.tensor([b['pos_next2'] for b in batch], dtype=torch.float32).to(device)

        optimizer.zero_grad()
        losses = []
        for j in range(len(batch)):
            l = 0
            sample_masses = m[j].unsqueeze(1)
            sample_pos0 = pos0[j]
            sample_vel0 = vel0[j]
            sample_pos1 = pos1[j]
            sample_pos2 = pos2[j]

            pr_pos1, pr_vel1 = model(sample_pos0, sample_vel0, sample_masses)

            loss1, dists1 = loss_fn(pr_pos1, sample_pos1, model.num_neighbors)

            all_dists.append(dists1.item()*0.5)

            l += 0.5*loss1

            pr_pos2, pr_vel2 = model(pr_pos1, pr_vel1, sample_masses)

            loss2, dists2 = loss_fn(pr_pos2, sample_pos2, model.num_neighbors)

            all_dists.append(dists2.item()*0.5)

            l += 0.5*loss2


            losses.append(l)

        total_loss = sum(losses) / len(batch)
        all_losses.append(total_loss.item())
        total_loss.backward()

        optimizer.step()

    print(f'Train Loss: {sum(all_losses)/len(all_losses)}, Train L2: {sum(all_dists)/len(all_dists)}')

def train_epoch_memory(model, data, batch_size, loss_fn, optimizer, device, use_custom_loss=False):

    model.train()

    num_batches = len(data) // batch_size
    all_losses = []
    all_dists = []
    for i in tqdm.tqdm(range(num_batches)):
        batch = data[i*batch_size:(i+1)*batch_size]
        m = torch.tensor([b['masses'] for b in batch], dtype=torch.float32).to(device)
        pos0 = torch.tensor([b['pos'] for b in batch], dtype=torch.float32).to(device)
        vel0 = torch.tensor([b['vel'] for b in batch], dtype=torch.float32).to(device)
        acc0 = torch.tensor([b['acc'] for b in batch], dtype=torch.float32).to(device)
        #acc1 = torch.tensor([b['acc_next1'] for b in batch], dtype=torch.float32).to(device)
        #acc2 = torch.tensor([b['acc_next2'] for b in batch], dtype=torch.float32).to(device)

        optimizer.zero_grad()
        losses = []
        for j in range(len(batch)):
            l = 0
            sample_masses = m[j].unsqueeze(1)
            sample_pos0 = pos0[j]
            sample_vel0 = vel0[j]
            sample_acc0 = acc0[j]
            #sample_acc1 = acc1[j]
            #sample_acc2 = acc2[j]

            pr_acc0 = model(sample_pos0, sample_vel0, sample_masses)

            if use_custom_loss:
                loss0, dists0 = loss_fn(pr_acc0, sample_acc0, model.num_neighbors)
            else:
                loss0 = torch.mean(euclidean_distance(pr_acc0, sample_acc0))
                dists0 = loss0

            all_dists.append(dists0)

            l += loss0 

            #pr_pos1, pr_vel1 = get_new_pos_vel(pr_acc0, sample_pos0, sample_vel0)

            #pr_acc1 = model(pr_pos1, pr_vel1, sample_masses)

            #loss1, dists1 = loss_fn(pr_acc1, sample_acc1, model.num_neighbors)

            #all_dists.append(dists1.item())

            #l += loss1 * 0.3

            #pr_pos2, pr_vel2 = get_new_pos_vel(pr_acc1, pr_pos1, pr_vel1)

            #pr_acc2 = model(pr_pos2, pr_vel2, sample_masses)

            #loss2, dists2 = loss_fn(pr_acc2, sample_acc2, model.num_neighbors)

            #all_dists.append(dists2.item())

            #l += loss2 * 0.2

            losses.append(l)

        total_loss = 128 * sum(losses) / len(batch)
        all_losses.append(total_loss.item())
        total_loss.backward()

        optimizer.step()

    print(f'Train Loss: {sum(all_losses)/len(all_losses)}, Train L2: {sum(all_dists)/len(all_dists)}')

def train_epoch_memory_black_hole_info(model, data, batch_size, loss_fn, optimizer, device,use_custom_loss=False):
    
        model.train()
    
        num_batches = len(data) // batch_size
        all_losses = []
        all_dists = []
        for i in tqdm.tqdm(range(num_batches)):
            batch = data[i*batch_size:(i+1)*batch_size]
            m = torch.tensor([b['masses'] for b in batch], dtype=torch.float32).to(device)
            pos0 = torch.tensor([b['pos'] for b in batch], dtype=torch.float32).to(device)
            vel0 = torch.tensor([b['vel'] for b in batch], dtype=torch.float32).to(device)
            acc0 = torch.tensor([b['acc'] for b in batch], dtype=torch.float32).to(device)
            acc1 = torch.tensor([b['acc_next1'] for b in batch], dtype=torch.float32).to(device)
            #acc2 = torch.tensor([b['acc_next2'] for b in batch], dtype=torch.float32).to(device)
            black_hole_indexes = torch.tensor(np.array([b['bh_index'] for b in batch]), dtype=torch.long)
    
            optimizer.zero_grad()
            losses = []
            for j in range(len(batch)):
                l = 0
                sample_masses = m[j].unsqueeze(1)
                sample_pos0 = pos0[j]
                sample_vel0 = vel0[j]
                sample_acc0 = acc0[j]
                sample_acc1 = acc1[j]
                sample_pos0_bh = sample_pos0[black_hole_indexes[j]]
                sample_vel0_bh = sample_vel0[black_hole_indexes[j]]
                sample_masses_bh = sample_masses[black_hole_indexes[j]]
    
                pr_acc0 = model(sample_pos0, sample_vel0, sample_masses, sample_pos0_bh, sample_vel0_bh, sample_masses_bh)

                if use_custom_loss:
    
                    loss0, dists0 = loss_fn(pr_acc0, sample_acc0, model.num_neighbors)

                else:
                    loss0 = torch.mean(euclidean_distance(pr_acc0, sample_acc0))
                    dists0 = loss0
    
                all_dists.append(dists0.item())
    
                l += loss0 * 0.5
    
                pr_pos1, pr_vel1 = get_new_pos_vel(pr_acc0, sample_pos0, sample_vel0)
    
                pr_acc1 = model(pr_pos1, pr_vel1, sample_masses, pr_pos1[black_hole_indexes[j]], pr_vel1[black_hole_indexes[j]], sample_masses[black_hole_indexes[j]])

                if use_custom_loss:
                    loss1, dists1 = loss_fn(pr_acc1, sample_acc1, model.num_neighbors)

                else:
                    loss1 = torch.mean(euclidean_distance(pr_acc1, sample_acc1))
                    dists1 = loss1
    
                all_dists.append(dists1.item())
    
                l += loss1 * 0.5
    
                #pr_pos2, pr_vel2 = get_new_pos_vel(pr_acc1,

                #pr_acc2 = model(pr_pos2, pr_vel2, sample_masses)

                #loss2, dists2 = loss_fn(pr_acc2, sample_acc2, model.num_neighbors)

                #all_dists.append(dists2.item())

                #l += loss2 * 0.2

                losses.append(l)

            total_loss = sum(losses) / len(batch)
            all_losses.append(total_loss.item())
            total_loss.backward()

            optimizer.step()

        print(f'Train Loss: {sum(all_losses)/len(all_losses)}, Train L2: {sum(all_dists)/len(all_dists)}')

        

def val(model, val_dir, batch_size, loss_fn, device):
    files = os.listdir(val_dir)

    model.eval()
    with torch.no_grad():
        all_losses = []
        all_dists = []
        for file in files:
            with open(os.path.join(val_dir, file)) as f:
                data = json.load(f)
            m = [b['masses'] for b in data]
            pos0 = [b['pos'] for b in data]
            vel0 = [b['vel'] for b in data]
            pos1 = [b['pos_next1'] for b in data]
            pos2 = [b['pos_next2'] for b in data]

            loss = 0
            for j in range(len(data)):
                sample_masses = torch.tensor(m[j], dtype=torch.float32).unsqueeze(1).to(device)
                sample_pos0 = torch.tensor(pos0[j], dtype=torch.float32).to(device)
                sample_vel0 = torch.tensor(vel0[j], dtype=torch.float32).to(device)
                sample_pos1 = torch.tensor(pos1[j], dtype=torch.float32).to(device)
                sample_pos2 = torch.tensor(pos2[j], dtype=torch.float32).to(device)


                pr_pos1, pr_vel1 = model(sample_pos0, sample_vel0, sample_masses)

                loss1, dist1 = loss_fn(pr_pos1, sample_pos1, model.num_neighbors)
                loss += 0.5*loss1

                all_dists.append(dist1.item())


                pr_pos2, _ = model(pr_pos1, pr_vel1, sample_masses)
                loss2, dist2 = loss_fn(pr_pos2, sample_pos2, model.num_neighbors)
                loss += 0.5*loss2

                all_dists.append(dist2.item())


            loss = loss / len(data)
            all_losses.append(loss.item())

            # clear memory
            del sample_masses
            del sample_pos0
            del sample_vel0
            del sample_pos1
            del sample_pos2
            del pr_pos1
            del pr_vel1
            del pr_pos2
            torch.cuda.empty_cache()
            


        print(f'Val Loss: {sum(all_losses)/len(all_losses)}, Val L2: {sum(all_dists)/len(all_dists)}')


            

In [5]:
def train(model, train_dir, val_dir, batch_size, loss_fn, optimizer, num_epochs, weights_dir=None, device='cuda', eval=True):

    model.to(device)

    if weights_dir is not None:
        weight_paths = os.listdir(weights_dir)
        weight_paths.sort()
        try:
            model.load_state_dict(torch.load(os.path.join(weights_dir, weight_paths[-1])))
            last_model = int(weight_paths[-1].split('_')[1].split('.')[0])
            last_model += 1
        except:
            last_model = 0
    
    train_files = os.listdir(train_dir)
    for epoch in range(num_epochs):
        print(f'Epoch {epoch}')
        for t_file in train_files:
            full_path = os.path.join(train_dir, t_file)
            train_epoch(model, full_path, batch_size, loss_fn, optimizer, device)

        if eval:
            val(model, val_dir, batch_size, loss_fn, device)

    torch.save(model.state_dict(), f'./models/model_{last_model}.pt')

    return model

def train_memory(model, train_data, val_dir, batch_size, loss_fn, optimizer, num_epochs, weights_dir=None, device='cuda', eval=True, use_custom_loss=False):
    
        model.to(device)
    
        if weights_dir is not None:
            weight_paths = os.listdir(weights_dir)
            weight_paths.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))
            try:
                model.load_state_dict(torch.load(os.path.join(weights_dir, weight_paths[-1])))
                print(f'Loaded weights from {weight_paths[-1]}')
                last_model = int(weight_paths[-1].split('_')[1].split('.')[0])
                last_model += 1
            except:
                last_model = 0
        
        for epoch in range(num_epochs):
            print(f'Epoch {epoch}')
            train_epoch_memory(model, train_data, batch_size, loss_fn, optimizer, device, use_custom_loss)
    
            if eval:
                val(model, val_dir, batch_size, loss_fn, device)
    
        torch.save(model.state_dict(), f'./models/model_{last_model}.pt')
    
        return model

def train_memory_black_hole_info(model, train_data, val_dir, batch_size, loss_fn, optimizer, num_epochs, weights_dir=None, device='cuda', eval=True, use_custom_loss=False):
        
            model.to(device)
        
            if weights_dir is not None:
                weight_paths = os.listdir(weights_dir)
                weight_paths.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))
                try:
                    model.load_state_dict(torch.load(os.path.join(weights_dir, weight_paths[-1])))
                    print(f'Loaded weights from {weight_paths[-1]}')
                    last_model = int(weight_paths[-1].split('_')[1].split('.')[0])
                    last_model += 1
                except:
                    last_model = 0
            
            for epoch in range(num_epochs):
                print(f'Epoch {epoch}')
                train_epoch_memory_black_hole_info(model, train_data, batch_size, loss_fn, optimizer, device, use_custom_loss)
        
                if eval:
                    val(model, val_dir, batch_size, loss_fn, device)
        
            torch.save(model.state_dict(), f'./modelsbh/model_{last_model}.pt')
        
            return model

In [7]:

model_files = os.listdir('./models/')
model_files.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))
try:
    last_model_id = model_files[-1].split('_')[1].split('.')[0]
except IndexError:
    last_model_id = -1
last_model_id = int(last_model_id)

model = KwisatzHaderach(activation=True, layer_channels=[32, 64, 64, 3], calc_neighbors=True)



optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.7, verbose=True)

for i in range(40):
    # remove all files from train directory
    files = os.listdir('./train')
    for file in files:
        os.remove(os.path.join('./train', file))
    
    dataset = generate_dataset_memory(2)

    
    model = train_memory(model, dataset, './val', 16, loss_fn, optimizer, 1, './models', device='cpu', eval=False, use_custom_loss=True)
    del dataset
    last_model_id += 1
    torch.save(model.state_dict(), f'./models/model_{last_model_id}.pt')

    if i % 5 == 0:
        scheduler.step()



Adjusting learning rate of group 0 to 1.0000e-02.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.70s/it]


Epoch 0


100%|██████████| 125/125 [01:55<00:00,  1.09it/s]


Train Loss: 12.99312296295166, Train L2: 895.5973510742188
Adjusting learning rate of group 0 to 7.0000e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.02s/it]


Loaded weights from model_0.pt
Epoch 0


100%|██████████| 125/125 [01:47<00:00,  1.17it/s]


Train Loss: 10.77434513092041, Train L2: 2.9191479682922363
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.19s/it]


Loaded weights from model_1.pt
Epoch 0


100%|██████████| 125/125 [02:05<00:00,  1.00s/it]


Train Loss: 8.72150765991211, Train L2: 4.118039131164551
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.02s/it]


Loaded weights from model_2.pt
Epoch 0


100%|██████████| 125/125 [02:10<00:00,  1.05s/it]


Train Loss: 8.358292507171631, Train L2: 4.102955341339111
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.12s/it]


Loaded weights from model_3.pt
Epoch 0


100%|██████████| 125/125 [02:08<00:00,  1.03s/it]


Train Loss: 10.180093845367432, Train L2: 4.6953043937683105
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:27<00:00, 13.80s/it]


Loaded weights from model_4.pt
Epoch 0


100%|██████████| 125/125 [01:44<00:00,  1.20it/s]


Train Loss: 11.061240169525146, Train L2: 2.7201366424560547
Adjusting learning rate of group 0 to 4.9000e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.83s/it]


Loaded weights from model_5.pt
Epoch 0


100%|██████████| 125/125 [01:48<00:00,  1.15it/s]


Train Loss: 9.346398872375488, Train L2: 2.762451171875
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.95s/it]


Loaded weights from model_6.pt
Epoch 0


100%|██████████| 125/125 [01:54<00:00,  1.09it/s]


Train Loss: 9.209357730865479, Train L2: 2.613548755645752
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.20s/it]


Loaded weights from model_7.pt
Epoch 0


100%|██████████| 125/125 [02:23<00:00,  1.14s/it]


Train Loss: 6.286519239425659, Train L2: 3.806878089904785
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.26s/it]


Loaded weights from model_8.pt
Epoch 0


100%|██████████| 125/125 [01:49<00:00,  1.15it/s]


Train Loss: 9.675443399429321, Train L2: 2.19943904876709
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.51s/it]


Loaded weights from model_9.pt
Epoch 0


100%|██████████| 125/125 [01:43<00:00,  1.21it/s]


Train Loss: 9.729029230117797, Train L2: 1.9369064569473267
Adjusting learning rate of group 0 to 3.4300e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:24<00:00, 12.48s/it]


Loaded weights from model_10.pt
Epoch 0


100%|██████████| 125/125 [01:46<00:00,  1.18it/s]


Train Loss: 9.66268892288208, Train L2: 2.3451273441314697
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.68s/it]


Loaded weights from model_11.pt
Epoch 0


100%|██████████| 125/125 [01:50<00:00,  1.13it/s]


Train Loss: 8.222564723968507, Train L2: 1.9699162244796753
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.83s/it]


Loaded weights from model_12.pt
Epoch 0


100%|██████████| 125/125 [02:00<00:00,  1.03it/s]


Train Loss: 8.076228811264038, Train L2: 3.0113697052001953
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.99s/it]


Loaded weights from model_13.pt
Epoch 0


100%|██████████| 125/125 [01:46<00:00,  1.17it/s]


Train Loss: 10.39714260482788, Train L2: 2.239941358566284
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:27<00:00, 13.62s/it]


Loaded weights from model_14.pt
Epoch 0


100%|██████████| 125/125 [01:55<00:00,  1.08it/s]


Train Loss: 8.030325267791747, Train L2: 2.651132106781006
Adjusting learning rate of group 0 to 2.4010e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:24<00:00, 12.45s/it]


Loaded weights from model_15.pt
Epoch 0


100%|██████████| 125/125 [01:56<00:00,  1.07it/s]


Train Loss: 7.530250862121582, Train L2: 1.77272629737854
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.81s/it]


Loaded weights from model_16.pt
Epoch 0


100%|██████████| 125/125 [01:52<00:00,  1.11it/s]


Train Loss: 8.414287223815919, Train L2: 2.382615566253662
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.37s/it]


Loaded weights from model_17.pt
Epoch 0


100%|██████████| 125/125 [02:00<00:00,  1.04it/s]


Train Loss: 8.023587375640869, Train L2: 2.6674954891204834
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:24<00:00, 12.29s/it]


Loaded weights from model_18.pt
Epoch 0


100%|██████████| 125/125 [01:50<00:00,  1.14it/s]


Train Loss: 8.04315986251831, Train L2: 1.8228856325149536
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.67s/it]


Loaded weights from model_19.pt
Epoch 0


100%|██████████| 125/125 [01:41<00:00,  1.23it/s]


Train Loss: 10.733504608154297, Train L2: 1.7445303201675415
Adjusting learning rate of group 0 to 1.6807e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:28<00:00, 14.23s/it]


Loaded weights from model_20.pt
Epoch 0


100%|██████████| 125/125 [01:55<00:00,  1.08it/s]


Train Loss: 8.138146560668945, Train L2: 2.3792786598205566
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.20s/it]


Loaded weights from model_21.pt
Epoch 0


100%|██████████| 125/125 [02:03<00:00,  1.01it/s]


Train Loss: 6.982952257156372, Train L2: 1.9964630603790283
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.98s/it]


Loaded weights from model_22.pt
Epoch 0


100%|██████████| 125/125 [01:48<00:00,  1.15it/s]


Train Loss: 7.845343364715577, Train L2: 1.8220278024673462
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.52s/it]


Loaded weights from model_23.pt
Epoch 0


100%|██████████| 125/125 [01:55<00:00,  1.09it/s]


Train Loss: 7.777977603912354, Train L2: 1.9789986610412598
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 13.00s/it]


Loaded weights from model_24.pt
Epoch 0


100%|██████████| 125/125 [01:59<00:00,  1.04it/s]


Train Loss: 9.248685935974121, Train L2: 2.409541130065918
Adjusting learning rate of group 0 to 1.1765e-03.
Generating dataset with 2 scenes...


 50%|█████     | 1/2 [00:24<00:24, 24.60s/it]


KeyboardInterrupt: 

In [39]:
from kwisatzHaderach_bh import KwisatzHaderachBH

In [40]:
'''
model_files = os.listdir('./modelsbh/')
model_files.sort(key=lambda x: int(x.split('_')[1].split('.')[0]))
try:
    last_model_id = model_files[-1].split('_')[1].split('.')[0]
except IndexError:
    last_model_id = -1
last_model_id = int(last_model_id)

model = KwisatzHaderachBH(activation=True, layer_channels=[64, 64, 32, 3], calc_neighbors=False)



optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.7, verbose=True)

for i in range(40):
    # remove all files from train directory
    files = os.listdir('./train')
    for file in files:
        os.remove(os.path.join('./train', file))
    
    dataset = generate_dataset_memory_bh(2, window_size=2)

    
    model = train_memory_black_hole_info(model, dataset, './val', 16, loss_fn, optimizer, 1, './modelsbh', device='cpu', eval=False, use_custom_loss=False)
    del dataset
    last_model_id += 1
    torch.save(model.state_dict(), f'./modelsbh/model_{last_model_id}.pt')

    if i % 5 == 0:
        scheduler.step()
       ''' 

Adjusting learning rate of group 0 to 1.0000e-02.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:35<00:00, 17.78s/it]


Epoch 0


100%|██████████| 124/124 [09:32<00:00,  4.61s/it]


Train Loss: 36337.63641502972, Train L2: 36337.63685174124
Adjusting learning rate of group 0 to 7.0000e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:27<00:00, 13.50s/it]


Loaded weights from model_0.pt
Epoch 0


100%|██████████| 124/124 [10:03<00:00,  4.87s/it]


Train Loss: 3.3436206175434973, Train L2: 3.343620580320637
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:34<00:00, 17.38s/it]


Loaded weights from model_1.pt
Epoch 0


100%|██████████| 124/124 [09:36<00:00,  4.65s/it]


Train Loss: 3.364552910289457, Train L2: 3.3645528859248564
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:34<00:00, 17.03s/it]


Loaded weights from model_2.pt
Epoch 0


100%|██████████| 124/124 [09:49<00:00,  4.75s/it]


Train Loss: 2.7546715986344124, Train L2: 2.754671580248302
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:34<00:00, 17.38s/it]


Loaded weights from model_3.pt
Epoch 0


100%|██████████| 124/124 [09:40<00:00,  4.68s/it]


Train Loss: 2.3741487841452322, Train L2: 2.3741488050699475
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:24<00:00, 12.07s/it]


Loaded weights from model_4.pt
Epoch 0


100%|██████████| 124/124 [09:33<00:00,  4.62s/it]


Train Loss: 2.3355996589506827, Train L2: 2.3355996712231106
Adjusting learning rate of group 0 to 4.9000e-03.
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:26<00:00, 13.13s/it]


Loaded weights from model_5.pt
Epoch 0


100%|██████████| 124/124 [09:12<00:00,  4.46s/it]


Train Loss: 2.854159482063786, Train L2: 2.854159466471643
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:25<00:00, 12.68s/it]


Loaded weights from model_6.pt
Epoch 0


100%|██████████| 124/124 [09:16<00:00,  4.49s/it]


Train Loss: 2.4385847785780506, Train L2: 2.4385847684686945
Generating dataset with 2 scenes...


100%|██████████| 2/2 [00:24<00:00, 12.03s/it]


Loaded weights from model_7.pt
Epoch 0


  6%|▌         | 7/124 [00:33<09:21,  4.80s/it]


KeyboardInterrupt: 