In [1]:
import sys
import pandas as pd
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
sys.path.append("../../")
from utils.evaluation import evaluate
from utils.metrics import Metrics


In [2]:
import pickle
from git import Repo

# Get the git root directory
repo = Repo(".", search_parent_directories=True)
git_root = repo.git.rev_parse("--show-toplevel")

# Load data
X_Train_pd = pickle.load(open(f"{git_root}/data/splits/train/X_pandas.pck", "rb"))
y_Train_pd = pickle.load(open(f"{git_root}/data/splits/train/y_pandas.pck", "rb"))

X_Val_pd = pickle.load(open(f"{git_root}/data/splits/val/X_pandas.pck", "rb"))
y_Val_pd = pickle.load(open(f"{git_root}/data/splits/val/y_pandas.pck", "rb"))

In [3]:
X_Train = torch.tensor(X_Train_pd.values, dtype=torch.float32)
y_Train = torch.tensor(y_Train_pd.values, dtype=torch.float32)

X_Val = torch.tensor(X_Val_pd.values, dtype=torch.float32)
y_Val = torch.tensor(y_Val_pd.values, dtype=torch.float32)

In [15]:
# ToDo: Hidden Sizes and Leaky ReLU as variables

class Generator(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = torch.nn.Sequential(
            torch.nn.Linear(256, 512),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(512, 1024),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(1024, 2048),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(2048, 4000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(4000, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 5045),
        )

    def forward(self, x):
        return self.layers(x)
G = Generator()

In [16]:
class Discriminator(torch.nn.Module):
    def __init__(self):
        super().__init__()

        self.layers = torch.nn.Sequential(
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 5045),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(5045, 2000),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(2000, 300),
            torch.nn.LeakyReLU(0.1),
            torch.nn.Linear(300, 1),
        )

    def forward(self, x):
        return self.layers(x)

D = Discriminator()

In [17]:
def label_from_logits(y_hat: torch.Tensor, threshold = 0.5) -> torch.Tensor:
    with torch.no_grad():
        y_pred_tensor = (torch.sigmoid(y_hat) > threshold).float()
    return y_pred_tensor

In [25]:
batch_size = 512
epochs = 2
lr = 0.001
g_optimizer = torch.optim.Adam(G.parameters(), lr=lr, betas=(0.5, 0.999))
d_optimizer = torch.optim.Adam(D.parameters(), lr=lr, betas=(0.5, 0.999))

In [26]:
train_dataset = torch.utils.data.TensorDataset(X_Train, y_Train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [27]:

criterion = nn.BCEWithLogitsLoss()

fixed_noise = torch.randn(1, 256)

# Establish convention for real and fake labels during training
real_label = 1.
fake_label = 0

In [28]:
# Training Loop
# Code adopted from https://pytorch.org/tutorials/beginner/dcgan_faces_tutorial.html
G_losses = []
D_losses = []
iters = 0

print("Starting Training Loop...")
# For each epoch
for epoch in range(epochs):
    # For each batch in the dataloader
    for data in tqdm(train_loader):
        
        ############################
        # (1) Update D network: maximize log(D(x)) + log(1 - D(G(z))) same as minimizing BCE
        ###########################
        ## Train with all-real batch
        d_optimizer.zero_grad()
        b_size = data[0].size(0)
        real_data = data[0].squeeze(1).reshape(b_size, -1) # real data
        label = torch.full((b_size,), real_label, dtype=torch.float) #provide real label: 1
        # Forward pass real batch through D
        output = D(real_data).view(-1)
        # Calculate loss on all-real batch
        errD_real = criterion(output, label) #First part of the loss
        # Calculate gradients for D in backward pass
        errD_real.backward()

        ## Train with all-fake batch
        # Generate batch of latent vectors z
        noise = torch.randn(b_size, 256)
        # Generate fake image batch with G
        fake = G(noise)
        label.fill_(fake_label) # fake labels: 0
        # Classify all fake batch with D
        #print(fake.shape)
        output = D(fake.detach()).view(-1)   #detach() is used to prevent backpropagation through G
        # Calculate D's loss on the all-fake batch
        errD_fake = criterion(output, label)
        # Calculate the gradients for this batch, accumulated (summed) with previous gradients
        errD_fake.backward()
        
        # Compute error of D as sum over the fake and the real batches
        errD = errD_real + errD_fake  #Lossfunction
        # Update D
        d_optimizer.step()
        
        ############################
        # (2) Update G network: maximize log(D(G(z))) or minimize -log(D(G(z)))
        ###########################
        # Vanishing gradient: Either the generator or the discriminator becomes too strong
        g_optimizer.zero_grad()
        label.fill_(real_label)  # fake labels are real for generator cost. Workaround for vanishing gradient problem
        # Since we just updated D, perform another forward pass of all-fake batch through D
        output = D(fake).view(-1)
        # Calculate G's loss based on this output
        errG = criterion(output, label)
        # Calculate gradients for G
        errG.backward()
        # Update G
        g_optimizer.step()
        
        G_losses.append(errG.item())
        D_losses.append(errD.item())
        
    if epoch % 10 == 1:
        torch.save(G, 'Generator_epoch_{}.pth'.format(epoch))
        print('Model saved.')

Starting Training Loop...


  0%|          | 0/143 [00:00<?, ?it/s]

KeyboardInterrupt: 