# Training Notebook

##### Import Packages

In [1]:
import pandas as pd

import GPUtil

import argparse
import json
import logging
import os

import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision
import torchvision.models
import torchvision.transforms as transforms

from typing import List

## Set up logger to get details of errors
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

##### Import Data

In [2]:
coef_df_binary = pd.read_csv("https://raw.githubusercontent.com/jcox22/Sagemaker_practice_gan/main/rank_1_curves.csv")
coef_df_binary = coef_df_binary.drop(columns = ['Unnamed: 0'])

##### Training Dataset

In [3]:
train_tensor = torch.tensor(coef_df_binary.to_numpy())
train_ds = torch.utils.data.TensorDataset(train_tensor)
train_loader = torch.utils.data.DataLoader(train_ds, batch_size=128, shuffle=True)

##### Set Parameters

In [4]:
# k is for number of nodes in each hidden layer of NN
k = 1000

# For number of inputs (32 binary digits)
input_length = 32
output_length = input_length

# Model Parameters
epochs = 1
batch_size = 256
lr = 0.001
momentum = 0.9

# Needed later on for save_model
model_dir = '/models'
data_dir = '/training'

##### Create NN Classes

In [5]:
class Generator(nn.Module):
    def __init__(self, output_length: int):
        super(Generator, self).__init__()
        self.dense_layer = nn.Linear(output_length, k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, k)
        self.dense_layer4 = nn.Linear(k, output_length)

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        l4 = self.dense_layer4(F.relu(l3))
        return F.sigmoid(l4)
    
class Discriminator(nn.Module):
    def __init__(self, input_length: int):
        super(Discriminator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, k)
        self.dense_layer4 = nn.Linear(k, 1)

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        l4 = self.dense_layer4(F.relu(l3))
        return F.sigmoid(l4)

##### Set up for training function

In [7]:
# Store on GPU else cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info("Device Type: {}".format(device))

# Call generator and discriminator
generator = Generator(output_length)
discriminator = Discriminator(input_length)

# Make sure it is on device
generator = generator.to(device)
discriminator = discriminator.to(device)

# Loss
loss = nn.BCELoss().to(device)

# Choose optimizer
optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
#optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=momentum)

##### Training Loop

In [11]:
for epoch in range(0, epochs):
    running_loss = 0.0
    for batch in train_loader:
        noise = torch.randint(0, 2, size=(batch_size, output_length)).float()
        noise = noise.to(device)
    
        # Generate examples of data
        true_labels = [1] * batch_size
        true_labels = torch.tensor(true_labels).float()
        true_labels = true_labels.to(device).resize_((batch_size, 1))
            
        true_data = coef_df_binary.sample(batch_size).values
        true_data = torch.tensor(true_data).float()
        true_data = true_data.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        #outputs = model(inputs)
        #G_of_noise = generator(noise)
        #loss = criterion(outputs, labels)
        #loss.backward()
        #optimizer.step()
        G_of_noise = generator(noise)
        D_of_G_of_noise = discriminator(G_of_noise)
        generator_loss = loss(D_of_G_of_noise, true_labels)
        generator_loss.backward()
        optimizer.step()
            
        # Train the discriminator on the true/generated data
        optimizer.zero_grad()
        true_discriminator_out = discriminator(true_data)
        true_discriminator_loss = loss(true_discriminator_out, true_labels)

        # add .detach() here think about this
        generator_discriminator_out = discriminator(G_of_noise.detach()) # introduce new d_of_g_of_noise without gradient
        generator_discriminator_loss = loss(generator_discriminator_out, torch.zeros(batch_size).to(device).resize_((batch_size, 1)))
        discriminator_loss = (true_discriminator_loss + generator_discriminator_loss) / 2
        discriminator_loss.backward()
        optimizer.step()

        # print statistics
        running_loss += generator_loss.item()
        #if batch % 2000 == 1999:  # print every 2000 mini-batches
            #print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000))
            #print(f"Loss is {generator_loss.item()}.  Running loss is {running_loss/2000}.  Discriminator loss is {discriminator_loss.item()}.  True Discriminator Loss is {true_discriminator_loss.item()}")
            #print(GPUtil.showUtilization())
            #running_loss = 0.0
            #print(torch.cuda.memory_summary(device))
            #print(torch.cuda.list_gpu_processes(device))
        # print(running_loss/10)

    print("Finished Training")

Finished Training


##### Saving the Model

In [17]:
torch.save({
        'generator_state_dict': generator.state_dict(),
        'optimizer_state_dict': optimizer.state_dict()
    }, output_model)

NameError: name 'output_model' is not defined

##### Function to change binary to decimal

In [13]:
def extract(G_of_noise):

    G_numpy = G_of_noise.detach()   

    curves = []

    for i in range(len(G_numpy)):
        c1 = int(G_numpy[i][0].round())
        c2 = ((-1)**(int(G_numpy[i][1].round())))*(int(G_numpy[i][2].round()))
        c3 = int(G_numpy[i][3].round())
        c4 = (-1)**(int(G_numpy[i][4].round()))*(int("".join([str(int(y)) for y in G_numpy[i][5:18].round()]), 2))
        c6 = (-1)**(int(G_numpy[i][12].round()))*(int("".join([str(int(y)) for y in G_numpy[i][19:].round()]), 2))
    
        coef = [c1,c2,c3,c4,c6]
        curves.append(coef)
    return curves