In [1]:
import torch
import os
import shutil
import numpy as np
import csv
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import torch
import pandas as pd
import lightning as L

from minerva.models.nets.time_series.gans import TTSGAN_Generator, TTSGAN_Discriminator, TTSGAN_Encoder, GAN
from GANModels import Discriminator, Generator, Encoder

from lightning.pytorch.callbacks import Callback

import random

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
experiment_number = 0
version = 0
n_epochs = 5
batch_size = 64
SAC = ['Sit', 'Stand', 'Walk', 'Upstairs', 'Downstairs', 'Run']
beta1 = 0.9
beta2 = 0.999
gen_lr = 0.0001
dis_lr = 0.0003
assimetrical_percentage = 1.0

### Testing diferences

In [3]:
random_seed = 1

def set_seed(seed: int = 42):  
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    L.seed_everything(seed)

In [4]:
set_seed(random_seed)
model_minerva = GAN(generator = TTSGAN_Generator(seq_len = 60, channels = 6),
            discriminator = TTSGAN_Discriminator(seq_len = 60, channels = 6), 
            loss_gen = torch.nn.MSELoss(),
            loss_dis = torch.nn.MSELoss(),
            assimetrical_percentage = assimetrical_percentage,
            generator_lr = gen_lr,
            discriminator_lr = dis_lr,
            beta1 = beta1,
            beta2 = beta2,
            )

Seed set to 1


In [5]:
set_seed(random_seed)
model_original = GAN(generator = Generator(seq_len = 60, channels = 6),
            discriminator = Discriminator(seq_len = 60, channels = 6), 
            loss_gen = torch.nn.MSELoss(),
            loss_dis = torch.nn.MSELoss(),
            assimetrical_percentage = assimetrical_percentage,
            generator_lr = gen_lr,
            discriminator_lr = dis_lr,
            beta1 = beta1,
            beta2 = beta2,
            )

Seed set to 1


In [6]:
model_minerva.dis.state_dict()['backbone.1.2.0.fn.1.queries.weight'][0]

tensor([-0.1054,  0.0696,  0.0651,  0.1097, -0.0539, -0.0156,  0.1353, -0.0771,
        -0.0711,  0.0347,  0.1340, -0.0310, -0.0241,  0.1115,  0.0739,  0.0278,
         0.0253,  0.0159,  0.1108,  0.0020, -0.0836, -0.0784,  0.0193, -0.0114,
         0.0876,  0.1100,  0.1261,  0.1124,  0.0176, -0.0458, -0.0304,  0.0395,
         0.0677, -0.0576,  0.1264,  0.0260, -0.0136,  0.0486,  0.1052, -0.0581,
         0.0976, -0.1115,  0.0542,  0.0449,  0.0065,  0.0874,  0.1387, -0.0968,
         0.0566,  0.0924])

In [7]:
model_minerva.gen.state_dict()['blocks.1.0.fn.1.keys.weight'][0]

tensor([ 0.2117,  0.3071, -0.2280, -0.2999, -0.2248, -0.0936,  0.1826,  0.1497,
        -0.2084,  0.2531])

In [8]:
model_minerva.dis.state_dict()

OrderedDict([('backbone.0.cls_token',
              tensor([[[ 1.1295,  0.7398, -1.5574, -0.9263,  0.3087, -0.6228, -1.2084,
                         0.6669,  0.3504,  0.9009,  0.2003, -0.6825,  0.1045,  1.1122,
                         0.2311, -1.0828,  0.7424, -0.7340, -1.8336,  0.6437,  1.3474,
                        -0.0538,  0.3054,  0.9420, -0.7003,  0.4152,  0.1127,  0.4619,
                         1.4493,  0.3032, -1.3763, -0.9452,  0.5707, -0.4768,  0.8129,
                         0.9659,  0.4636, -0.4988, -0.7091, -1.4446, -1.1530, -0.0099,
                         1.4437, -1.4314,  1.0107, -0.1559,  0.5795, -1.7134, -0.8778,
                        -0.0329]]])),
             ('backbone.0.positions',
              tensor([[ 0.3892, -0.7337, -0.7574,  0.5029,  0.2913,  0.9252, -2.5734,  1.2299,
                       -1.7022, -0.5089,  0.7091,  1.4022,  1.6235,  0.1440, -0.0924, -0.7493,
                        0.1327, -0.0086,  1.5081, -0.8705,  0.4132, -1.2860,  1.7205,  

In [9]:
model_original.dis.state_dict()

OrderedDict([('0.cls_token',
              tensor([[[ 1.1295,  0.7398, -1.5574, -0.9263,  0.3087, -0.6228, -1.2084,
                         0.6669,  0.3504,  0.9009,  0.2003, -0.6825,  0.1045,  1.1122,
                         0.2311, -1.0828,  0.7424, -0.7340, -1.8336,  0.6437,  1.3474,
                        -0.0538,  0.3054,  0.9420, -0.7003,  0.4152,  0.1127,  0.4619,
                         1.4493,  0.3032, -1.3763, -0.9452,  0.5707, -0.4768,  0.8129,
                         0.9659,  0.4636, -0.4988, -0.7091, -1.4446, -1.1530, -0.0099,
                         1.4437, -1.4314,  1.0107, -0.1559,  0.5795, -1.7134, -0.8778,
                        -0.0329]]])),
             ('0.positions',
              tensor([[ 0.3892, -0.7337, -0.7574,  0.5029,  0.2913,  0.9252, -2.5734,  1.2299,
                       -1.7022, -0.5089,  0.7091,  1.4022,  1.6235,  0.1440, -0.0924, -0.7493,
                        0.1327, -0.0086,  1.5081, -0.8705,  0.4132, -1.2860,  1.7205,  0.9241,
          

Os MODELOS são parecidos, pelomenos a olho nu

In [10]:
set_seed(random_seed)
tensor_1 = torch.rand(64, 100)
set_seed(random_seed)
tensor_2 = torch.rand(64, 6, 60)

tensor_1.shape, tensor_2.shape

Seed set to 1
Seed set to 1


(torch.Size([64, 100]), torch.Size([64, 6, 60]))

In [11]:
set_seed(random_seed)
a = model_minerva.gen(tensor_1)
set_seed(random_seed)
b = model_original.gen(tensor_1)

(a == b).squeeze().float().mean().bool().item()

Seed set to 1


Seed set to 1


True

In [12]:
set_seed(random_seed)
d = model_original.dis(tensor_2)
set_seed(random_seed)
e = model_minerva.dis(tensor_2)

(d == e).float().mean().bool().item()

Seed set to 1
Seed set to 1


True

Saidas são idênticas com a mesma seed, logo não há diferenças práticas nos modelos, agora vamos ver nos treinamentos

In [13]:
device_number = 0
if torch.cuda.is_available():
    torch.cuda.set_device(device_number)

device = (
    f"cuda:{device_number}"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
display(f"Using {device} device")



'Using cpu device'

In [14]:
torch.cuda.current_device()

RuntimeError: No CUDA GPUs are available

In [15]:
newpath = r'../../../standardize_view'

In [16]:
dataNames = os.listdir(newpath)
dataNames.sort()
dataNames

['KuHar', 'MotionSense', 'RealWorld_thigh', 'RealWorld_waist', 'UCI', 'WISDM']

In [17]:
X = []
y = []   
for dataName in dataNames:
    print(dataName)
    dfTr = pd.read_csv(newpath + '/' + dataName + '/train.csv')
    X_tr = dfTr.values[:,:360].reshape(-1,6,60)
    y_tr = dfTr.values[:,-1].astype(np.int32)
    X.append(X_tr)
    y.append(y_tr)
X = np.concatenate(X, axis=0)
y = np.concatenate(y, axis=0)
X.shape, y.shape

KuHar
MotionSense
RealWorld_thigh
RealWorld_waist
UCI
WISDM


((36788, 6, 60), (36788,))

In [18]:
set_seed()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle = True, random_state = 42)

X_train = torch.tensor(X_train.astype(np.float32), dtype=torch.float32, device=device).detach()
X_test = torch.tensor(X_test.astype(np.float32), dtype=torch.float32, device=device).detach()
y_train = torch.tensor(y_train.astype(np.float32), dtype=torch.float32, device=device).detach()
y_test = torch.tensor(y_test.astype(np.float32), dtype=torch.float32, device=device).detach()

X_train.shape, X_test.shape, y_train.shape, y_test.shape 

Seed set to 42


(torch.Size([29430, 6, 60]),
 torch.Size([7358, 6, 60]),
 torch.Size([29430]),
 torch.Size([7358]))

In [19]:
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset, random_split

class CustomDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

class CustomDataModule(L.LightningDataModule):
    def __init__(self, data, labels, batch_size=16, val_split=0.2, num_workers=8):
        super().__init__()
        self.data = data
        self.labels = labels
        self.batch_size = batch_size
        self.val_split = val_split
        self.num_workers = num_workers

    def setup(self, stage=None):
        dataset = CustomDataset(self.data, self.labels)
        val_size = int(len(dataset) * self.val_split)
        train_size = len(dataset) - val_size
        self.train_dataset, self.val_dataset = random_split(dataset, [train_size, val_size])

    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers)

    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

    def test_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, num_workers=self.num_workers)

# Supondo que X seja seu dado com forma (36788, 6, 60)
#X = np.random.rand(36788, 6, 60).astype(np.float32)  # Certifique-se de que os dados sejam do tipo float32
#y = np.random.randint(0, 2, size=(36788,)).astype(np.float32)  # Rótulos fictícios para fins de exemplo

X = X.astype(np.float32)
y = y.astype(int)
X_tensor = torch.tensor(X)
y_tensor = torch.tensor(y)

set_seed()
# Crie o DataModule
data_module = CustomDataModule(X_tensor, y_tensor, batch_size=64)

# Configure o DataModule
data_module.setup()

# Acesse os dataloaders
train_loader = data_module.train_dataloader()
val_loader = data_module.val_dataloader()

# Exemplo de iteração através do train_loader
print(len(train_loader))
print(len(val_loader))
for batch in train_loader:
    X_batch, y_batch = batch
    print(X_batch.shape, y_batch.shape)  # Deve imprimir torch.Size([64, 6, 60]) torch.Size([64])
    break

Seed set to 42


460
115
torch.Size([64, 6, 60]) torch.Size([64])


In [20]:
X_batch = X_batch.to('cuda:0')

RuntimeError: No CUDA GPUs are available

In [21]:
from lightning.pytorch.callbacks import ModelCheckpoint
from callbacks import TsneGeneratorCallback, TsneEncoderCallback, KNNValidationCallback, MyPrintingCallback
from lightning.pytorch.callbacks import Callback

save_dir = './training'
name = 'ttsgan_error_hunt'

class SamplePrintingCallback(Callback):
    #Test callback just to do a start/end test for our training
    def __init__(self, test_batch = None, seed = 42):
        super().__init__()
        self.seed = seed
        self.batch = test_batch
        self.z = torch.tensor(np.random.normal(0, 1, (batch_size, 100)), dtype=torch.float, device='cuda')

    def on_train_epoch_end(self, trainer, pl_module):
        if self.batch is None:
            set_seed(self.seed)
            self.batch = next(iter(trainer.datamodule.val_dataloader()))
        

        set_seed(self.seed)
        #print(self.z.device, self.batch.device, pl_module.device)
        print()
        #print(self.z)
        #print(self.batch)
        print(pl_module.dis(self.batch))
        print(pl_module.gen(self.z))
        
printcallback = SamplePrintingCallback(test_batch=X_batch)

RuntimeError: No CUDA GPUs are available

In [None]:
from lightning.pytorch.loggers.csv_logs import CSVLogger
version = 'minerva'
logger = CSVLogger(save_dir=save_dir, name=name, version=version)

In [None]:
if device.split(':')[0] == 'cuda':
    accelerator = 'cuda'

trainer = L.Trainer(accelerator=accelerator, devices=1,
                    callbacks=[printcallback], 
                    deterministic=True,
                    logger=logger, max_epochs=n_epochs) #max_steps=50000

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
set_seed()
trainer.fit(model = model_minerva, datamodule = data_module)

Seed set to 42
/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:268: Experiment logs directory ./training/ttsgan_error_hunt/minerva exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory ./training/ttsgan_error_hunt/minerva/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name     | Type                 | Params | Mode 
----------------------------------------------------------
0 | gen      | TTSGAN_Generator     | 65.3 K | train
1 | dis      | TTSGAN_Discriminator | 97.0 K | train
2 | loss_gen | MSELoss              | 0      | train
3 | loss_dis | MSELoss              | 0      | train
----------------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.649     Total estimated model para

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Epoch 0: 100%|██████████| 460/460 [00:37<00:00, 12.42it/s, v_num=erva]     

Seed set to 42



tensor([[0.8877],
        [1.0460],
        [0.7173],
        [1.2805],
        [0.9052],
        [0.5703],
        [0.3915],
        [0.4610],
        [0.7342],
        [0.3074],
        [1.1011],
        [0.8002],
        [0.3547],
        [0.3305],
        [0.9731],
        [0.4941],
        [0.3740],
        [0.4119],
        [0.6142],
        [0.4482],
        [0.7448],
        [0.8907],
        [1.0959],
        [0.3029],
        [0.2919],
        [0.7220],
        [0.3369],
        [0.2627],
        [0.7488],
        [0.3807],
        [0.3624],
        [1.0615],
        [0.8348],
        [0.3400],
        [0.2973],
        [0.8574],
        [0.6493],
        [0.3376],
        [0.2849],
        [1.0366],
        [0.3999],
        [0.4533],
        [0.2345],
        [0.3654],
        [0.4373],
        [0.6439],
        [0.5783],
        [0.8959],
        [0.2822],
        [0.6583],
        [0.3715],
        [1.0026],
        [0.5724],
        [0.4347],
        [0.8279],
        [

Seed set to 42



tensor([[0.8977],
        [0.9669],
        [0.8161],
        [1.1961],
        [0.8571],
        [0.6603],
        [0.3705],
        [0.3410],
        [0.6852],
        [0.3407],
        [1.0584],
        [0.9113],
        [0.3716],
        [0.3043],
        [0.9190],
        [0.4318],
        [0.4109],
        [0.4147],
        [0.5830],
        [0.3254],
        [0.5107],
        [1.1615],
        [1.0843],
        [0.3319],
        [0.3368],
        [0.9139],
        [0.3592],
        [0.3064],
        [0.8083],
        [0.3365],
        [0.3395],
        [1.0989],
        [0.5341],
        [0.3625],
        [0.2565],
        [1.1318],
        [0.7764],
        [0.5184],
        [0.3200],
        [0.7564],
        [0.3635],
        [0.3684],
        [0.3210],
        [0.3554],
        [0.3880],
        [0.6271],
        [0.7338],
        [1.1631],
        [0.3401],
        [0.6516],
        [0.3278],
        [1.4272],
        [0.6691],
        [0.4147],
        [0.9509],
        [

Seed set to 42



tensor([[0.9197],
        [0.8952],
        [0.7949],
        [1.1764],
        [0.8083],
        [0.8280],
        [0.8484],
        [0.9655],
        [0.8273],
        [1.1678],
        [0.8783],
        [1.0468],
        [0.7697],
        [1.0195],
        [0.8997],
        [1.0049],
        [0.6723],
        [1.0040],
        [0.6586],
        [0.9192],
        [1.0250],
        [0.9685],
        [1.2689],
        [0.5528],
        [1.0043],
        [0.9927],
        [0.7440],
        [0.9769],
        [0.9685],
        [1.1138],
        [0.5753],
        [1.1561],
        [0.8990],
        [1.1841],
        [0.8275],
        [0.6265],
        [0.9369],
        [0.7387],
        [0.6451],
        [1.0507],
        [0.9373],
        [0.9459],
        [0.9792],
        [1.0952],
        [0.8648],
        [0.6563],
        [0.7323],
        [1.0717],
        [1.0599],
        [0.7185],
        [0.6557],
        [1.1043],
        [0.6885],
        [1.0893],
        [1.0563],
        [

Seed set to 42



tensor([[1.0790],
        [0.9049],
        [0.7263],
        [1.2173],
        [0.9110],
        [0.7888],
        [0.8560],
        [1.0143],
        [0.9387],
        [1.0512],
        [0.8228],
        [0.9646],
        [0.8686],
        [1.0023],
        [0.7930],
        [1.0248],
        [0.9415],
        [1.0183],
        [0.6600],
        [0.9640],
        [0.6044],
        [1.0732],
        [1.2521],
        [0.6656],
        [0.9611],
        [0.8966],
        [0.7791],
        [0.9253],
        [1.0485],
        [1.0340],
        [0.7949],
        [1.2570],
        [0.5815],
        [1.0784],
        [0.9581],
        [0.7628],
        [1.0551],
        [0.7758],
        [0.9881],
        [1.0336],
        [0.9852],
        [0.9124],
        [0.9998],
        [0.9499],
        [0.9757],
        [0.5808],
        [0.6437],
        [1.0591],
        [1.0117],
        [0.8360],
        [0.8600],
        [1.1836],
        [0.3271],
        [1.0470],
        [1.1675],
        [

Seed set to 42



tensor([[ 0.9818],
        [ 1.0334],
        [ 0.3116],
        [ 1.0041],
        [ 0.9552],
        [ 0.7750],
        [ 0.9156],
        [ 1.0180],
        [ 1.1208],
        [ 1.0827],
        [ 0.7688],
        [ 0.1787],
        [ 0.8954],
        [ 1.0375],
        [ 0.9233],
        [ 1.0336],
        [ 0.9803],
        [ 1.0264],
        [ 1.0669],
        [ 0.9633],
        [ 0.5034],
        [ 0.9023],
        [ 0.7478],
        [ 0.8774],
        [ 0.9727],
        [ 0.8862],
        [ 0.8376],
        [ 0.9812],
        [ 1.1288],
        [ 0.9695],
        [ 0.8766],
        [ 1.1363],
        [ 0.4994],
        [ 1.0814],
        [ 1.0344],
        [ 0.2478],
        [ 1.2722],
        [ 0.8480],
        [ 1.1785],
        [ 1.1822],
        [ 1.0680],
        [ 0.9590],
        [ 1.0222],
        [ 0.9200],
        [ 0.9664],
        [ 0.1119],
        [-0.0739],
        [ 0.9112],
        [ 0.9962],
        [ 1.1867],
        [ 0.9789],
        [ 0.7220],
        [ 0

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 460/460 [00:36<00:00, 12.51it/s, v_num=erva]


In [None]:
version = 'original'
logger = CSVLogger(save_dir=save_dir, name=name, version=version)

In [None]:
trainer = L.Trainer(accelerator=accelerator, devices=1,
                    callbacks=[printcallback], 
                    deterministic=True,
                    logger=logger, max_epochs=n_epochs) #max_steps=50000

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


In [None]:
set_seed()
trainer.fit(model = model_original, datamodule = data_module)

Seed set to 42
/usr/local/lib/python3.10/dist-packages/lightning/fabric/loggers/csv_logs.py:268: Experiment logs directory ./training/ttsgan_error_hunt/original exists and is not empty. Previous log files in this directory will be deleted when the new ones are saved!
/usr/local/lib/python3.10/dist-packages/lightning/pytorch/callbacks/model_checkpoint.py:654: Checkpoint directory ./training/ttsgan_error_hunt/original/checkpoints exists and is not empty.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name     | Type          | Params | Mode 
---------------------------------------------------
0 | gen      | Generator     | 65.3 K | train
1 | dis      | Discriminator | 97.0 K | train
2 | loss_gen | MSELoss       | 0      | train
3 | loss_dis | MSELoss       | 0      | train
---------------------------------------------------
162 K     Trainable params
0         Non-trainable params
162 K     Total params
0.649     Total estimated model params size (MB)
138       Modules in train mode
0 

Epoch 0: 100%|██████████| 460/460 [00:37<00:00, 12.38it/s, v_num=inal]     

Seed set to 42



tensor([[0.8877],
        [1.0460],
        [0.7173],
        [1.2805],
        [0.9052],
        [0.5703],
        [0.3915],
        [0.4610],
        [0.7342],
        [0.3074],
        [1.1011],
        [0.8002],
        [0.3547],
        [0.3305],
        [0.9731],
        [0.4941],
        [0.3740],
        [0.4119],
        [0.6142],
        [0.4482],
        [0.7448],
        [0.8907],
        [1.0959],
        [0.3029],
        [0.2919],
        [0.7220],
        [0.3369],
        [0.2627],
        [0.7488],
        [0.3807],
        [0.3624],
        [1.0615],
        [0.8348],
        [0.3400],
        [0.2973],
        [0.8574],
        [0.6493],
        [0.3376],
        [0.2849],
        [1.0366],
        [0.3999],
        [0.4533],
        [0.2345],
        [0.3654],
        [0.4373],
        [0.6439],
        [0.5783],
        [0.8959],
        [0.2822],
        [0.6583],
        [0.3715],
        [1.0026],
        [0.5724],
        [0.4347],
        [0.8279],
        [

Seed set to 42



tensor([[0.8977],
        [0.9669],
        [0.8161],
        [1.1961],
        [0.8571],
        [0.6603],
        [0.3705],
        [0.3410],
        [0.6852],
        [0.3407],
        [1.0584],
        [0.9113],
        [0.3716],
        [0.3043],
        [0.9190],
        [0.4318],
        [0.4109],
        [0.4147],
        [0.5830],
        [0.3254],
        [0.5107],
        [1.1615],
        [1.0843],
        [0.3319],
        [0.3368],
        [0.9139],
        [0.3592],
        [0.3064],
        [0.8083],
        [0.3365],
        [0.3395],
        [1.0989],
        [0.5341],
        [0.3625],
        [0.2565],
        [1.1318],
        [0.7764],
        [0.5184],
        [0.3200],
        [0.7564],
        [0.3635],
        [0.3684],
        [0.3210],
        [0.3554],
        [0.3880],
        [0.6271],
        [0.7338],
        [1.1631],
        [0.3401],
        [0.6516],
        [0.3278],
        [1.4272],
        [0.6691],
        [0.4147],
        [0.9509],
        [

Seed set to 42



tensor([[0.9197],
        [0.8952],
        [0.7949],
        [1.1764],
        [0.8083],
        [0.8280],
        [0.8484],
        [0.9655],
        [0.8273],
        [1.1678],
        [0.8783],
        [1.0468],
        [0.7697],
        [1.0195],
        [0.8997],
        [1.0049],
        [0.6723],
        [1.0040],
        [0.6586],
        [0.9192],
        [1.0250],
        [0.9685],
        [1.2689],
        [0.5528],
        [1.0043],
        [0.9927],
        [0.7440],
        [0.9769],
        [0.9685],
        [1.1138],
        [0.5753],
        [1.1561],
        [0.8990],
        [1.1841],
        [0.8275],
        [0.6265],
        [0.9369],
        [0.7387],
        [0.6451],
        [1.0507],
        [0.9373],
        [0.9459],
        [0.9792],
        [1.0952],
        [0.8648],
        [0.6563],
        [0.7323],
        [1.0717],
        [1.0599],
        [0.7185],
        [0.6557],
        [1.1043],
        [0.6885],
        [1.0893],
        [1.0563],
        [

Seed set to 42



tensor([[1.0790],
        [0.9049],
        [0.7263],
        [1.2173],
        [0.9110],
        [0.7888],
        [0.8560],
        [1.0143],
        [0.9387],
        [1.0512],
        [0.8228],
        [0.9646],
        [0.8686],
        [1.0023],
        [0.7930],
        [1.0248],
        [0.9415],
        [1.0183],
        [0.6600],
        [0.9640],
        [0.6044],
        [1.0732],
        [1.2521],
        [0.6656],
        [0.9611],
        [0.8966],
        [0.7791],
        [0.9253],
        [1.0485],
        [1.0340],
        [0.7949],
        [1.2570],
        [0.5815],
        [1.0784],
        [0.9581],
        [0.7628],
        [1.0551],
        [0.7758],
        [0.9881],
        [1.0336],
        [0.9852],
        [0.9124],
        [0.9998],
        [0.9499],
        [0.9757],
        [0.5808],
        [0.6437],
        [1.0591],
        [1.0117],
        [0.8360],
        [0.8600],
        [1.1836],
        [0.3271],
        [1.0470],
        [1.1675],
        [

Seed set to 42



tensor([[ 0.9818],
        [ 1.0334],
        [ 0.3116],
        [ 1.0041],
        [ 0.9552],
        [ 0.7750],
        [ 0.9156],
        [ 1.0180],
        [ 1.1208],
        [ 1.0827],
        [ 0.7688],
        [ 0.1787],
        [ 0.8954],
        [ 1.0375],
        [ 0.9233],
        [ 1.0336],
        [ 0.9803],
        [ 1.0264],
        [ 1.0669],
        [ 0.9633],
        [ 0.5034],
        [ 0.9023],
        [ 0.7478],
        [ 0.8774],
        [ 0.9727],
        [ 0.8862],
        [ 0.8376],
        [ 0.9812],
        [ 1.1288],
        [ 0.9695],
        [ 0.8766],
        [ 1.1363],
        [ 0.4994],
        [ 1.0814],
        [ 1.0344],
        [ 0.2478],
        [ 1.2722],
        [ 0.8480],
        [ 1.1785],
        [ 1.1822],
        [ 1.0680],
        [ 0.9590],
        [ 1.0222],
        [ 0.9200],
        [ 0.9664],
        [ 0.1119],
        [-0.0739],
        [ 0.9112],
        [ 0.9962],
        [ 1.1867],
        [ 0.9789],
        [ 0.7220],
        [ 0

`Trainer.fit` stopped: `max_epochs=5` reached.


Epoch 4: 100%|██████████| 460/460 [00:37<00:00, 12.37it/s, v_num=inal]


In [None]:
model_original.state_dict()

OrderedDict([('gen.pos_embed',
              tensor([[[ 1.0248e-02,  1.0360e-02, -2.9554e-02, -4.2383e-03,  2.3925e-02,
                         9.6189e-03, -1.1730e-02,  8.1592e-03,  2.2645e-03, -5.2288e-03],
                       [ 6.2791e-03,  2.6720e-02,  7.6978e-03, -5.0326e-02,  4.1301e-02,
                        -1.4274e-02, -1.5863e-02,  2.2053e-02,  3.2037e-02, -1.2513e-02],
                       [-1.6386e-02,  4.7751e-02,  1.5294e-02, -7.9552e-03, -2.3331e-02,
                        -2.6255e-02,  5.3737e-02, -1.1874e-02,  3.3942e-02, -3.5555e-02],
                       [-5.3432e-03,  1.1627e-02, -1.2300e-02,  5.4383e-03,  1.8347e-02,
                         2.6152e-03,  2.1938e-02, -8.9914e-03,  1.0873e-02, -9.4082e-03],
                       [-1.3422e-02, -7.7636e-03, -3.4532e-02,  2.4385e-02, -5.8325e-02,
                        -1.5592e-02,  1.6240e-02, -3.9460e-02,  2.4721e-02, -2.5885e-03],
                       [-2.5097e-02, -2.0131e-02, -1.3591e-03,  3.6879e-02

In [None]:
model_minerva.state_dict()

OrderedDict([('gen.pos_embed',
              tensor([[[ 1.0248e-02,  1.0360e-02, -2.9554e-02, -4.2383e-03,  2.3925e-02,
                         9.6189e-03, -1.1730e-02,  8.1592e-03,  2.2645e-03, -5.2288e-03],
                       [ 6.2791e-03,  2.6720e-02,  7.6978e-03, -5.0326e-02,  4.1301e-02,
                        -1.4274e-02, -1.5863e-02,  2.2053e-02,  3.2037e-02, -1.2513e-02],
                       [-1.6386e-02,  4.7751e-02,  1.5294e-02, -7.9552e-03, -2.3331e-02,
                        -2.6255e-02,  5.3737e-02, -1.1874e-02,  3.3942e-02, -3.5555e-02],
                       [-5.3432e-03,  1.1627e-02, -1.2300e-02,  5.4383e-03,  1.8347e-02,
                         2.6152e-03,  2.1938e-02, -8.9914e-03,  1.0873e-02, -9.4082e-03],
                       [-1.3422e-02, -7.7636e-03, -3.4532e-02,  2.4385e-02, -5.8325e-02,
                        -1.5592e-02,  1.6240e-02, -3.9460e-02,  2.4721e-02, -2.5885e-03],
                       [-2.5097e-02, -2.0131e-02, -1.3591e-03,  3.6879e-02

Depois do treinamento são exatamente iguais, logo não há diferença neste sentido

Tem diferente funções de geração de dados

In [None]:
set_seed()
gen_z = torch.tensor(np.random.normal(0, 1, (10, 100)), dtype=torch.float, device='cuda')
set_seed()
z = torch.cuda.FloatTensor(np.random.normal(0, 1, (10, 100))).cuda(non_blocking=True)

z==gen_z


Seed set to 42
Seed set to 42
  z = torch.cuda.FloatTensor(np.random.normal(0, 1, (10, 100))).cuda(non_blocking=True)


tensor([[True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True],
        [True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True, True, True, True, True, True, True, True, 

São iguais

Tem diferente formas de gerar os otimizadores

In [None]:
set_seed()
dis_optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model_original.parameters()), dis_lr, (beta1, beta2))
set_seed()
opt_d = torch.optim.Adam(model_original.parameters(), lr=dis_lr, betas=(beta1, beta2))

list(filter(lambda p: p.requires_grad, model_original.parameters())) == list(model_original.parameters())

Seed set to 42
Seed set to 42


True

In [None]:
for param_group in opt_d.param_groups:
    print(param_group)
                
    

{'params': [Parameter containing:
tensor([[[ 1.0248e-02,  1.0360e-02, -2.9554e-02, -4.2383e-03,  2.3925e-02,
           9.6189e-03, -1.1730e-02,  8.1592e-03,  2.2645e-03, -5.2288e-03],
         [ 6.2791e-03,  2.6720e-02,  7.6978e-03, -5.0326e-02,  4.1301e-02,
          -1.4274e-02, -1.5863e-02,  2.2053e-02,  3.2037e-02, -1.2513e-02],
         [-1.6386e-02,  4.7751e-02,  1.5294e-02, -7.9552e-03, -2.3331e-02,
          -2.6255e-02,  5.3737e-02, -1.1874e-02,  3.3942e-02, -3.5555e-02],
         [-5.3432e-03,  1.1627e-02, -1.2300e-02,  5.4383e-03,  1.8347e-02,
           2.6152e-03,  2.1938e-02, -8.9914e-03,  1.0873e-02, -9.4082e-03],
         [-1.3422e-02, -7.7636e-03, -3.4532e-02,  2.4385e-02, -5.8325e-02,
          -1.5592e-02,  1.6240e-02, -3.9460e-02,  2.4721e-02, -2.5885e-03],
         [-2.5097e-02, -2.0131e-02, -1.3591e-03,  3.6879e-02, -2.3112e-02,
          -2.1271e-02,  2.5391e-02, -3.4227e-02,  7.7941e-03, -4.6964e-03],
         [-4.1610e-03, -3.2823e-02, -6.8587e-03,  2.6761e-03

Da na mesma

In [None]:
#primeiro teste com gen_batch_size = 64
a1 = [ 0.2117,  0.3071, -0.2280, -0.2999, -0.2248, -0.0936,  0.1826,  0.1497,
        -0.2084,  0.2531]
b1 = [-0.1054,  0.0696,  0.0651,  0.1097, -0.0539, -0.0156,  0.1353, -0.0771,
        -0.0711,  0.0347,  0.1340, -0.0310, -0.0241,  0.1115,  0.0739,  0.0278,
         0.0253,  0.0159,  0.1108,  0.0020, -0.0836, -0.0784,  0.0193, -0.0114,
         0.0876,  0.1100,  0.1261,  0.1124,  0.0176, -0.0458, -0.0304,  0.0395,
         0.0677, -0.0576,  0.1264,  0.0260, -0.0136,  0.0486,  0.1052, -0.0581,
         0.0976, -0.1115,  0.0542,  0.0449,  0.0065,  0.0874,  0.1387, -0.0968,
         0.0566,  0.0924]

a2= [ 0.2140,  0.3163, -0.2180, -0.3175, -0.2393, -0.0739,  0.1704,  0.1528,
        -0.2084,  0.2525]

b2 = [-0.1026,  0.0713,  0.0590,  0.1209, -0.0619, -0.0136,  0.1322, -0.0719,
        -0.0719,  0.0277,  0.1410, -0.0292, -0.0203,  0.1165,  0.0797,  0.0191,
         0.0206,  0.0099,  0.1120, -0.0021, -0.0860, -0.0783,  0.0226, -0.0063,
         0.0877,  0.1186,  0.1228,  0.1083,  0.0239, -0.0452, -0.0319,  0.0328,
         0.0773, -0.0639,  0.1307,  0.0267, -0.0176,  0.0472,  0.1010, -0.0626,
         0.0942, -0.1185,  0.0574,  0.0400,  0.0173,  0.0840,  0.1388, -0.0918,
         0.0537,  0.0962]

a1 == a2, b1 == b2

(False, False)

In [None]:
#agora com gen_batch_size = 16
c1 = [ 0.2117,  0.3071, -0.2280, -0.2999, -0.2248, -0.0936,  0.1826,  0.1497,
        -0.2084,  0.2531]
d1 = [-0.1054,  0.0696,  0.0651,  0.1097, -0.0539, -0.0156,  0.1353, -0.0771,
        -0.0711,  0.0347,  0.1340, -0.0310, -0.0241,  0.1115,  0.0739,  0.0278,
         0.0253,  0.0159,  0.1108,  0.0020, -0.0836, -0.0784,  0.0193, -0.0114,
         0.0876,  0.1100,  0.1261,  0.1124,  0.0176, -0.0458, -0.0304,  0.0395,
         0.0677, -0.0576,  0.1264,  0.0260, -0.0136,  0.0486,  0.1052, -0.0581,
         0.0976, -0.1115,  0.0542,  0.0449,  0.0065,  0.0874,  0.1387, -0.0968,
         0.0566,  0.0924]

c2 = [ 0.2153,  0.3137, -0.2262, -0.3076, -0.2292, -0.0854,  0.1762,  0.1532,
        -0.2116,  0.2511]

d2 = [-0.1049,  0.0726,  0.0616,  0.1155, -0.0549, -0.0097,  0.1389, -0.0829,
        -0.0701,  0.0359,  0.1324, -0.0351, -0.0156,  0.1165,  0.0749,  0.0201,
         0.0257,  0.0149,  0.1168, -0.0018, -0.0888, -0.0768,  0.0186, -0.0152,
         0.0907,  0.1105,  0.1205,  0.1104,  0.0096, -0.0408, -0.0294,  0.0395,
         0.0697, -0.0614,  0.1255,  0.0250, -0.0096,  0.0494,  0.1049, -0.0586,
         0.0988, -0.1117,  0.0568,  0.0460,  0.0042,  0.0872,  0.1372, -0.0949,
         0.0555,  0.0947]

c1 == a1, d1 == b1, c2 == a2, d2 == b2

(True, True, False, False)

Portanto o gen_batch_size faz diferença, não absurda mas talvez sulficiente para melhorar todos os dados

In [None]:
#mesmo gen batch size mas clipando normalmente o grad norm
#sem o norm depois de uma época
dis2 = [ 0.2140,  0.3163, -0.2180, -0.3175, -0.2393, -0.0739,  0.1704,  0.1528,
        -0.2084,  0.2525]

gen2 = [-0.1026,  0.0713,  0.0590,  0.1209, -0.0619, -0.0136,  0.1322, -0.0719,
        -0.0719,  0.0277,  0.1410, -0.0292, -0.0203,  0.1165,  0.0797,  0.0191,
         0.0206,  0.0099,  0.1120, -0.0021, -0.0860, -0.0783,  0.0226, -0.0063,
         0.0877,  0.1186,  0.1228,  0.1083,  0.0239, -0.0452, -0.0319,  0.0328,
         0.0773, -0.0639,  0.1307,  0.0267, -0.0176,  0.0472,  0.1010, -0.0626,
         0.0942, -0.1185,  0.0574,  0.0400,  0.0173,  0.0840,  0.1388, -0.0918,
         0.0537,  0.0962]

#com o norm depois de uma época
dis3 = [ 0.2156,  0.3181, -0.2187, -0.3189, -0.2399, -0.0721,  0.1690,  0.1543,
        -0.2098,  0.2511]

gen3 = [-0.1024,  0.0718,  0.0586,  0.1215, -0.0619, -0.0151,  0.1317, -0.0718,
        -0.0718,  0.0283,  0.1406, -0.0295, -0.0207,  0.1162,  0.0803,  0.0184,
         0.0203,  0.0104,  0.1130, -0.0026, -0.0860, -0.0786,  0.0226, -0.0057,
         0.0877,  0.1183,  0.1227,  0.1080,  0.0244, -0.0453, -0.0318,  0.0331,
         0.0778, -0.0626,  0.1299,  0.0268, -0.0185,  0.0469,  0.1001, -0.0631,
         0.0943, -0.1178,  0.0561,  0.0402,  0.0171,  0.0843,  0.1380, -0.0921,
         0.0538,  0.0968]

dis2 == dis3, gen2 == gen3

(False, False)

portanto o gran_norm faz diferença, porra