In [1]:
# make necessary imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.utils.data
from torch.optim import *
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from pathlib import Path
import pandas as pd
import numpy as np

import sys
sys.path.append("../src")
from AE import AEwrapper
from helpers import draw_3d_plotly, make_NoisySwissRoll, create_word2vec_sample
sys.path.pop()

'../src'

# Trying out different techniques on noisy Swiss roll dataset

In [2]:
data, data_no_noise, colors = make_NoisySwissRoll(10000, 12)

In [3]:
data.shape

(10000, 15)

In [4]:
from sklearn.manifold import SpectralEmbedding

embedding = SpectralEmbedding(n_components=3)
data_reduced = embedding.fit_transform(data)

In [5]:
draw_3d_plotly(data_reduced, colors)

In [7]:
OUTPUT_dim = 3
INPUT_dim = 15
EPOCH_NUM = 20
BATCH_SIZE = 100
OBSERVATIONS = 10**4

swiss_roll_dat = torch.Tensor(data)

# Split into training and testing sets 80-20
train_set, val_set = torch.utils.data.random_split(swiss_roll_dat,
                                                   [int(OBSERVATIONS*0.8), int(OBSERVATIONS*0.2)])

# prepare data loaders
trainloader = torch.utils.data.DataLoader(train_set, batch_size = BATCH_SIZE, shuffle=True)
testloader  = torch.utils.data.DataLoader(val_set, batch_size = BATCH_SIZE, shuffle=False)

ae = AEwrapper(INPUT_dim, OUTPUT_dim, trainloader, testloader)

for epoch in range(1, EPOCH_NUM + 1):
    ae.train_AE(epoch)
    ae.test_AE(epoch)

====> Epoch: 1 Average loss: 0.1434
====> Test set loss: 0.1400
====> Epoch: 2 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 3 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 4 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 5 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 6 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 7 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 8 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 9 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 10 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 11 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 12 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 13 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 14 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 15 Average loss: 0.1410
====> Test set loss: 0.1400
====> Epoch: 16 Average loss: 0.14

In [8]:
data_reduced_ae = ae.model.encode(swiss_roll_dat).detach().numpy()

In [11]:
draw_3d_plotly(data_reduced_ae, colors)

In [10]:
draw_3d_plotly(data_no_noise, colors)

# Trying on word2vec

In [14]:
##################################################
##################################################
######### Preparing data for PyTorch #############
##################################################
##################################################

DATADIR = "../samples_word2vec"
FILE = "small_w2v.csv"
OBSERVATIONS = 10**4
PATH_TO_DATA = Path(DATADIR)/FILE
BATCH_SIZE = 100

# read the data in, convert it into PyTorch tensor
word2vec_dat_or = pd.read_csv(PATH_TO_DATA)[[str(i) for i in range(1,301)]]
word2vec_dat = torch.Tensor(word2vec_dat_or.to_numpy())

# Split into training and testing sets 80-20
train_set, val_set = torch.utils.data.random_split(word2vec_dat,
                                                   [int(OBSERVATIONS*0.8), int(OBSERVATIONS*0.2)])

# prepare data loaders
trainloader = torch.utils.data.DataLoader(train_set, batch_size = BATCH_SIZE, shuffle=True)
testloader  = torch.utils.data.DataLoader(val_set, batch_size = BATCH_SIZE, shuffle=False)

In [15]:
##################################################
##################################################
#########        Training VAE        #############
##################################################
##################################################

OUTPUT_dim = 2
INPUT_dim = 300
EPOCH_NUM = 10

ae = AEwrapper(INPUT_dim, OUTPUT_dim, trainloader, testloader)

for epoch in range(1, EPOCH_NUM + 1):
    ae.train_AE(epoch)
    ae.test_AE(epoch)

====> Epoch: 1 Average loss: 0.0006
====> Test set loss: 0.0003
====> Epoch: 2 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 3 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 4 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 5 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 6 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 7 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 8 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 9 Average loss: 0.0003
====> Test set loss: 0.0003
====> Epoch: 10 Average loss: 0.0003
====> Test set loss: 0.0003
