In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torchvision
import sklearn.datasets
import sklearn.cluster
import sklearn.preprocessing
import torchvision.models
import torchvision.transforms

In [3]:
numeros = sklearn.datasets.load_digits()
imagenes = numeros['images']  # Hay 1797 digitos representados en imagenes 8x8
n_imagenes = len(imagenes)
Y = numeros['target']
print(np.shape(imagenes), np.shape(Y))

(1797, 8, 8) (1797,)


In [27]:
X_train = imagenes[:600]
Y_train = Y[:600]
train = torch.autograd.Variable(torch.Tensor(X_train).float())
targets = torch.autograd.Variable(torch.Tensor(Y_train).long())
mean = train.mean(dim=0)
std = train.std(dim=0)
std[std==0]=1.0

for i in range(len(train)):
    train[i] = (train[i]-mean)/std
np.shape(train)
x_train = train.unsqueeze(1)
np.shape(x_train)

torch.Size([600, 1, 8, 8])

In [176]:
n = torch.nn.Conv2d(1, 16, kernel_size=4, stride=1) # in-channels = 1, out-channels = 10, kernel=4
new_tensor = n(x_train)
print(new_tensor.size())

n = torch.nn.Conv2d(16,6,kernel_size=3)
feature_maps = n(new_tensor)
print(feature_maps.size())

n = torch.nn.ConvTranspose2d(6,16,kernel_size=2)
feature_maps = n(feature_maps)
print(feature_maps.size())

n = torch.nn.ConvTranspose2d(16,1,kernel_size=2, stride=2)
feature_maps = n(feature_maps)
print(feature_maps.size())


torch.Size([600, 18, 5, 5])
torch.Size([600, 5, 3, 3])
torch.Size([600, 18, 4, 4])
torch.Size([600, 1, 8, 8])


In [186]:
# define el autoencoder
class Autoencoder(torch.nn.Module):
    def __init__(self):
        super(Autoencoder,self).__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Conv2d(1, 16, kernel_size=4, stride=1),
            
            torch.nn.Conv2d(16,6,kernel_size=3))
            
        self.decoder = torch.nn.Sequential(             
            torch.nn.ConvTranspose2d(6,16,kernel_size=3),
            
            torch.nn.ConvTranspose2d(16,1,kernel_size=4, stride=1))
    def forward(self,x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [187]:
# inicializa modelo, loss y optimizador
num_epochs = 300
model = Autoencoder()
distance = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1E-3)


In [None]:
# entrenamiento
loss_ar = np.zeros(num_epochs)
for epoch in range(num_epochs):
    output = model(x_train)
    loss = distance(output, x_train)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_ar[epoch] = loss.item()
    print('epoch [{}/{}], loss:{:.4f}'.format(epoch+1, num_epochs, loss.item()))

epoch [1/300], loss:0.9367
epoch [2/300], loss:0.7831
epoch [3/300], loss:0.6829
epoch [4/300], loss:0.6621
epoch [5/300], loss:0.6010
epoch [6/300], loss:0.5758
epoch [7/300], loss:0.5541
epoch [8/300], loss:0.5261
epoch [9/300], loss:0.5032
epoch [10/300], loss:0.4888
epoch [11/300], loss:0.4654
epoch [12/300], loss:0.4382
epoch [13/300], loss:0.4224
epoch [14/300], loss:0.4133
epoch [15/300], loss:0.4018
epoch [16/300], loss:0.3873
epoch [17/300], loss:0.3741
epoch [18/300], loss:0.3653
epoch [19/300], loss:0.3578
epoch [20/300], loss:0.3484
epoch [21/300], loss:0.3389
epoch [22/300], loss:0.3310
epoch [23/300], loss:0.3237
epoch [24/300], loss:0.3154
epoch [25/300], loss:0.3069
epoch [26/300], loss:0.2995
epoch [27/300], loss:0.2932
epoch [28/300], loss:0.2872
epoch [29/300], loss:0.2813
epoch [30/300], loss:0.2755
epoch [31/300], loss:0.2703
epoch [32/300], loss:0.2657
epoch [33/300], loss:0.2610
epoch [34/300], loss:0.2564
epoch [35/300], loss:0.2524
epoch [36/300], loss:0.2489
e

In [None]:
plt.plot(np.arange(num_epochs), loss_ar)
plt.xlabel('epoch')
plt.ylabel('loss')

In [None]:
x_transform = model(x_train)
latent_space = model.encoder(x_train)

In [None]:
# muestra los resultados de las cuatro capas de convolucion
plt.figure(figsize=(14,14))
offset = 16
for i in range(5):
    plt.subplot(5,5,i+1) #imagenes originales
    plt.imshow(x_train[i+offset][0].detach().numpy())
    #lt.title(train_loader.dataset.classes[train_loader.dataset.targets[i+offset]] )
    
    j=0 # las imagenes reconstruidas por el autoencoder
    plt.subplot(5,5,(i+1)+5*(j+1))
    plt.imshow(x_transform[i+offset][0].detach().numpy())
    
    j=1 # una de las capas de la representacion latente
    plt.subplot(5,5,(i+1)+5*(j+1))
    plt.imshow(latent_space[i+offset][2].detach().numpy())
