In [1]:
import clip
import dnnlib
import legacy
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import math
from torchvision.utils import make_grid
import pandas as pd
import PIL 
import matplotlib.pyplot as plt
import pickle
import os
import random
import torch.optim.lr_scheduler as lrs
import numpy as np

In [21]:

path = os.getcwd().replace("\\", "/") + '/MLP_dataset_cpu/Nueva carpeta/'

with open(path + "clip_embeddings_cpu.pkl", 'rb') as f:
    clip_embeddings = pickle.load(f)

with open(path + "latent_vectors_cpu.pkl", 'rb') as f:
    latent_vectors = pickle.load(f)

In [22]:
print(clip_embeddings)

tensor([[ 0.2051,  0.0146, -0.2891,  ...,  0.0631,  0.2717,  0.1092],
        [-0.0046, -0.3018, -0.3093,  ...,  0.3403,  0.2842, -0.0611],
        [ 0.0673, -0.3384, -0.5674,  ...,  0.4553,  0.3196, -0.3755],
        ...,
        [-0.1978, -0.1085, -0.3662,  ...,  0.1799, -0.2476,  0.0675],
        [ 0.2529,  0.2294,  0.2333,  ...,  0.2362, -0.2019,  0.4663],
        [ 0.4688, -0.2537, -0.3467,  ...,  0.8003, -0.1028,  0.0372]],
       dtype=torch.float16)


In [4]:

MLP_path = os.getcwd().replace("\\", "/") + '/MLP_dataset_std1/'

clip_embedding_ = []
latent_vector_ = []
for i in range(0, len(os.listdir(MLP_path))//2):
    with open(MLP_path+'clip_embeddings_' + str(i) + '.pkl', 'rb') as f:
        clip_embedding_.append(pickle.load(f))
    with open(MLP_path+'latent_vectors_' + str(i) + '.pkl', 'rb') as f:
        latent_vector_.append(pickle.load(f))

# put all the embeddings and latent vectors in one tensor
clip_embedding = torch.cat(clip_embedding_)
latent_vector = torch.cat(latent_vector_)

dataset = torch.utils.data.TensorDataset(clip_embedding, latent_vector)

# split the dataset into train, validation and test sets
train, val= torch.utils.data.random_split(dataset, [int(len(dataset)*0.85), int(len(dataset)*0.15)])
train_loader = torch.utils.data.DataLoader(train, batch_size=48, shuffle=True)
val_loader = torch.utils.data.DataLoader(val, batch_size=48, shuffle=True)

In [5]:
# define an auoenconder class 
class Autoencoder(nn.Module):
    def __init__(self, input_dim, output_dim, n_hidden=3):
        super(Autoencoder, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.hidden_dim = 256
        self = 256
        self.n_hidden = n_hidden
        self.layers = nn.ModuleList()
        if n_hidden==0:
            self.layers.append(nn.Linear(self.input_dim, self.output_dim))
        else:
            self.layers.append(nn.Linear(self.input_dim, self.hidden_dim))
            for i in range(self.n_hidden-1):
                self.layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
            self.layers.append(nn.Linear(self.hidden_dim, self.output_dim))
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
        
    def forward(self, x):
        for i in range(self.n_hidden):
            x = self.relu(self.layers[i](x))
        x = self.layers[-1](x)
        return x

In [7]:
# reduce the dimensionality of the clip_embeddings using autoencoder

class Encoder(nn.Module):
  def __init__(self):
    super(Encoder, self).__init__()
    self.layer1 = nn.Linear(512,256)
    self.layer2 = nn.Linear(256,128)
    self.layer3 = nn.Linear(128, 64)
  
  def forward(self,x):
    out = self.layer1(x)
    out = self.layer2(out)
    return self.layer3(out)
    
# Decoder definition with a fully-connected layer and 3 BN-ReLU-COnv blocks and 
class Decoder(nn.Module):
  def __init__(self):
    super(Decoder, self).__init__()
    self.layer1 = nn.Linear(64,128)
    self.layer2 = nn.Linear(128,256)
    self.layer3 = nn.Linear(256,512)

    def forward(self,x):
        out = self.layer1(x)
        out = self.layer2(out)
        return self.layer3(out)
    
  
class Autoencoder(nn.Module):
  def __init__(self):
    super(Autoencoder, self).__init__()
    # Initialize the encoder and decoder using a dimensionality out_features for the vector z
    self.encoder = Encoder()
    self.decoder = Decoder()
    self.loss = nn.MSELoss(reduction='sum')

  def forward(self,x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return encoded, decoded
  
  def backward(self, decoded, x):
    # Reconstruction loss
    recon_loss = self.loss(decoded, x)
    return recon_loss

# Print summary of the mode
print('MNIST Autoencoder Definition')
autoencoder = Autoencoder()
print(autoencoder)


MNIST Autoencoder Definition
Autoencoder(
  (encoder): Encoder(
    (layer1): Linear(in_features=512, out_features=256, bias=True)
    (layer2): Linear(in_features=256, out_features=128, bias=True)
    (layer3): Linear(in_features=128, out_features=64, bias=True)
  )
  (decoder): Decoder(
    (layer1): Linear(in_features=64, out_features=128, bias=True)
    (layer2): Linear(in_features=128, out_features=256, bias=True)
    (layer3): Linear(in_features=256, out_features=512, bias=True)
  )
  (loss): MSELoss()
)


In [13]:
# train the autoencoder
def train(model, train_loader, optimizer, epochs, log_interval=100):
    model.train()
    losses = []
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
    for epoch in range(epochs):
        loss_epoch = []
        for data in train_loader:
            data = data
            print(data.shape)
            print(type(data))
            print(data)
            optimizer.zero_grad()
            _, decoded = model(data)
            loss = model.backward(decoded, data)
            loss.backward()
            optimizer.step()
            loss_epoch.append(loss.item())
        batch_loss = np.mean(loss_epoch)
        losses.append(batch_loss)
        print('Epoch: {} \tLoss: {:.6f}'.format(epoch, batch_loss))
    return losses

In [14]:

dataset = torch.utils.data.TensorDataset(clip_embedding)
len(dataset), len(dataset[0])

(10040, 1)

In [15]:
clip_dataset = torch.utils.data.TensorDataset(clip_embedding)
train_loader = torch.utils.data.DataLoader(clip_dataset, batch_size=12)


In [16]:
clip_autoencoder = Autoencoder()
clip_autoencoder.cuda()
clip_autoencoder.train()
optimizer = torch.optim.Adam(clip_autoencoder.parameters(), lr=1e-3)
losses = train(clip_autoencoder, train_loader, optimizer, epochs=10)


AttributeError: 'list' object has no attribute 'shape'