# Training

### Import Packages and data

In [2]:
import pandas as pd

import argparse
import json
import logging
import os

import torch
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.parallel
import torch.optim
import torch.utils.data
import torch.utils.data.distributed
import torchvision
import torchvision.models
import torchvision.transforms as transforms

from typing import List

## Set up logger to get details of errors
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

In [17]:
# Importing Data
rawdata = pd.read_csv('s3://sagemaker-studio-dfml0t4nnx4/clean_data.csv')
rawdata.drop(columns = ['lmfdb_label', 'rank'])

coef = rawdata.drop(columns = ['Unnamed: 0','lmfdb_label'])
coef['a1'] = pd.to_numeric(coef['a1'], errors='coerce')
coef['a6'] = pd.to_numeric(coef['a6'], errors='coerce')
coef = coef.dropna()

  interactivity=interactivity, compiler=compiler, result=result)


In [18]:
coef.describe()

Unnamed: 0,a1,a2,a3,a4,a6,rank
count,480517.0,480517.0,480517.0,480517.0,480517.0,480517.0
mean,0.496051,-0.059665,0.35082,-3700028000.0,4902672000000000.0,0.724176
std,0.499985,0.800867,0.477227,803268100000.0,5.447199e+18,0.648116
min,0.0,-1.0,0.0,-423406400000000.0,-9.801624e+20,0.0
25%,0.0,-1.0,0.0,-21179.0,-45263.0,0.0
50%,0.0,0.0,0.0,-814.0,6.0,1.0
75%,1.0,1.0,1.0,-12.0,52644.0,1.0
max,1.0,1.0,1.0,996148600000.0,3.353391e+21,3.0


### Making Data Binary

Making the data binary allows us to use the sigmoid function as the output layer of the nn, and it helps us make sure that the output is in whole numbers rather than decimals.  

In [19]:
# Create new dataframe to work with
binaryraw = coef

# Set limits for coefficients
binaryraw = binaryraw[binaryraw['a4'] > -(2**13)]
binaryraw = binaryraw[binaryraw['a4'] < 2**13]
binaryraw = binaryraw[binaryraw['a6'] > -(2**13)]
binaryraw = binaryraw[binaryraw['a6'] < 2**13]
binarycoef = binaryraw

In [20]:
binarycoef.describe()

Unnamed: 0,a1,a2,a3,a4,a6,rank
count,177460.0,177460.0,177460.0,177460.0,177460.0,177460.0
mean,0.465316,-0.036093,0.355889,-103.498067,28.793244,0.828626
std,0.498797,0.805042,0.478783,465.556261,2573.171074,0.682717
min,0.0,-1.0,0.0,-8141.0,-8191.0,0.0
25%,0.0,-1.0,0.0,-233.0,-547.0,0.0
50%,0.0,0.0,0.0,-52.0,2.0,1.0
75%,1.0,1.0,1.0,11.0,619.0,1.0
max,1.0,1.0,1.0,8191.0,8191.0,3.0


In [21]:
# Binary for a1
binarycoef['a1b'] = binarycoef['a1'].astype('int')

# Binary for a2
# The first coefficient is for the sign, 0 means positive, 1 means negative
binarycoef['a2b1'] = binarycoef['a2'].apply(lambda x : 0 if x != -1 else 1) 
binarycoef['a2b2'] = binarycoef['a2'].apply(lambda x : 1 if x != 0 else 0) 

# Binary for a3
binarycoef['a3b'] = binarycoef['a3'].apply(lambda x : 1 if x != 0 else 0)

# Binary for a4
### First I find whether it is positive or negative
binarycoef['a4b1'] = binarycoef['a4'].apply(lambda x : 1 if x < 0 else 0)
### Then I create a function that produces a binary list representation of an integer 
def create_binary_list_from_int(number: int) -> List[int]:
    """Creates a list of the binary representation of a positive integer

    Args:
        number: An integer

    Returns:
        The binary representation of the provided positive integer number as a list.
    """
    if number < 0 or type(number) is not int:
        raise ValueError("Only Positive integers are allowed")
    data = [int(x) for x in list(bin(number))[2:]]
    data = ([0] * (13 - len(data))) + data

    return data
### Then I run the integer to binary list function on the value of a4 and create column 'a4b'
binarycoef1 = binarycoef.copy()
binarycoef1['a4b'] = (binarycoef['a4'].abs().astype(int)).apply(create_binary_list_from_int)
### Then create columns from that list
a4binary = pd.DataFrame(binarycoef1['a4b'].to_list(), columns = ['a4b2', 'a4b3', 'a4b4', 'a4b5', 'a4b6', 'a4b7', 'a4b8', 'a4b9', 
                                                                'a4b10', 'a4b11', 'a4b12', 'a4b13', 'a4b14'])
rank_df1 = pd.concat([binarycoef.reset_index(), a4binary], axis = 1)

# Binary for a6 (similar process to a4)
rank_df1['a6b1'] = rank_df1['a6'].apply(lambda x : 1 if x < 0 else 0)
rank_values2 = binarycoef.copy(deep=False)
rank_values2['a6b'] = (binarycoef['a6'].abs().astype(int)).apply(create_binary_list_from_int)
a6binary = pd.DataFrame(rank_values2['a6b'].to_list(), columns = ['a6b2', 'a6b3', 'a6b4', 'a6b5', 'a6b6', 'a6b7', 'a6b8', 'a6b9', 
                                                                 'a6b10', 'a6b11', 'a6b12', 'a6b13', 'a6b14'])
coef_df_binary1 = pd.concat([rank_df1.reset_index(), a6binary], axis = 1)

In [22]:
coef_df_binary = coef_df_binary1[['a1b', 'a2b1', 'a2b2', 'a3b', 'a4b1', 'a4b2', 'a4b3', 'a4b4', 'a4b5', 'a4b6', 'a4b7', 'a4b8', 'a4b9', 
                                  'a4b10', 'a4b11', 'a4b12', 'a4b13', 'a4b14', 'a6b1', 'a6b2', 'a6b3', 'a6b4', 'a6b5', 'a6b6', 'a6b7', 
                                  'a6b8', 'a6b9', 'a6b10', 'a6b11', 'a6b12', 'a6b13', 'a6b14', 'rank']]

In [23]:
def BinaryToDecimal(num):
    return int(num, 2)
    

In [24]:
BinaryToDecimal('111')

7

##### Filtering data 

Making all the curves rank 1

In [25]:
coef_df_binary = coef_df_binary[coef_df_binary['rank'] == 1]
coef_df_binary = coef_df_binary.drop(columns = ['rank'])

In [26]:
coef_df_binary.describe()

Unnamed: 0,a1b,a2b1,a2b2,a3b,a4b1,a4b2,a4b3,a4b4,a4b5,a4b6,...,a6b5,a6b6,a6b7,a6b8,a6b9,a6b10,a6b11,a6b12,a6b13,a6b14
count,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,...,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0,91430.0
mean,0.471465,0.340829,0.648934,0.356294,0.685081,0.00187,0.006989,0.021295,0.112108,0.197977,...,0.303675,0.353943,0.394181,0.431937,0.457333,0.482861,0.464705,0.48401,0.419676,0.468479
std,0.499188,0.473991,0.477306,0.478906,0.464486,0.043207,0.083308,0.144367,0.315501,0.398477,...,0.459846,0.478194,0.488677,0.495348,0.498179,0.499709,0.498755,0.499747,0.493509,0.499008
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Setting Parameters

In [27]:
k = 100
input_length = 32
output_length = input_length

epochs = 1
batch_size = 16
lr = 0.001
# For optimizer, momentum of gradient descent
momentum = 0.9
# Needed later on for save_model
model_dir = '/models'
data_dir = '/training'

### Define the NN's

In [28]:
class Generator(nn.Module):
    def __init__(self, output_length: int):
        super(Generator, self).__init__()
        self.dense_layer = nn.Linear(output_length, k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, output_length)

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        return F.sigmoid(l3)
    
class Discriminator(nn.Module):
    def __init__(self, input_length: int):
        super(Discriminator, self).__init__()
        self.dense_layer = nn.Linear(int(input_length), k)
        self.dense_layer2 = nn.Linear(k, k)
        self.dense_layer3 = nn.Linear(k, 1)

    def forward(self, x):
        l1 = self.dense_layer(x)
        l2 = self.dense_layer2(F.relu(l1))
        l3 = self.dense_layer3(F.relu(l2))
        return F.sigmoid(l3)

In [3]:
# Test GPU Cuda

a_full = torch.randn(10240, 10240, dtype=torch.double, device='cuda')
b_full = torch.randn(10240, 10240, dtype=torch.double, device='cuda')
ab_full = a_full @ b_full
mean = ab_full.abs().mean()  # 80.7277

a = a_full.float()
b = b_full.float()

# Do matmul at TF32 mode.
ab_tf32 = a @ b  # takes 0.016s on GA100
error = (ab_tf32 - ab_full).abs().max()  # 0.1747
relative_error = error / mean  # 0.0022

# Do matmul with TF32 disabled.
torch.backends.cuda.matmul.allow_tf32 = False
ab_fp32 = a @ b  # takes 0.11s on GA100
error = (ab_fp32 - ab_full).abs().max()  # 0.0031
relative_error = error / mean  # 0.000039

AttributeError: 'CUDAModule' object has no attribute 'matmul'

In [4]:
torch.cuda.is_available()

True

### Define the Training Function

##### Sampling approach

In [41]:
%%time

# Store on GPU else cpu
device = "cuda" if torch.cuda.is_available() else "cpu"
logger.info("Device Type: {}".format(device))

generator = Generator(output_length)
discriminator = Discriminator(input_length)

generator = generator.to(device)
discriminator = discriminator.to(device)

loss = nn.BCELoss().to(device)

print(device)
print(str(torch.cuda.memory_allocated(device=None)))
print(torch.cuda.memory_summary(device))

# Choose optimizer
optimizer = torch.optim.Adam(generator.parameters(), lr=lr)
#optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=momentum)

for epoch in range(0, epochs):
    running_loss = 0.0
    for i in range(100000):
        # get the inputs
        #inputs, labels = data
        #inputs, labels = inputs.to(device), labels.to(device)
        noise = torch.randint(0, 2, size=(batch_size, output_length)).float()
        noise = noise.to(device)
    
        # Generate examples of data
        true_labels = [1] * batch_size
        true_labels = torch.tensor(true_labels).float()
        true_labels = true_labels.to(device)
            
        true_data = coef_df_binary.sample(16).values
        true_data = torch.tensor(true_data).float()
        true_data = true_data.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        #outputs = model(inputs)
        #G_of_noise = generator(noise)
        #loss = criterion(outputs, labels)
        #loss.backward()
        #optimizer.step()
        G_of_noise = generator(noise)
        D_of_G_of_noise = discriminator(G_of_noise)
        generator_loss = loss(D_of_G_of_noise, true_labels)
        generator_loss.backward()
        optimizer.step()
            
        # Train the discriminator on the true/generated data
        optimizer.zero_grad()
        true_discriminator_out = discriminator(true_data)
        true_discriminator_loss = loss(true_discriminator_out, true_labels)

        # add .detach() here think about this
        generator_discriminator_out = discriminator(G_of_noise.detach()) # introduce new d_of_g_of_noise without gradient
        generator_discriminator_loss = loss(generator_discriminator_out, torch.zeros(batch_size).to(device))
        discriminator_loss = (true_discriminator_loss + generator_discriminator_loss) / 2
        discriminator_loss.backward()
        optimizer.step()

        # print statistics
        running_loss += generator_loss.item()
        if i % 2000 == 1999:  # print every 2000 mini-batches
        #print("[%d, %5d] loss: %.3f" % (epoch + 1, i + 1, running_loss / 2000))
            print(f"Loss is {generator_loss.item()}.  Running loss is {running_loss/2000}.  Discriminator loss is {discriminator_loss.item()}")
            running_loss = 0.0
            print(torch.cuda.memory_summary(device))
            print(torch.cuda.list_gpu_processes(device))
        # print(running_loss/10)

    print("Finished Training")

cuda
3775389184
|                  PyTorch CUDA memory summary, device ID 0                 |
|---------------------------------------------------------------------------|
|            CUDA OOMs: 0            |        cudaMalloc retries: 0         |
|        Metric         | Cur Usage  | Peak Usage | Tot Alloc  | Tot Freed  |
|---------------------------------------------------------------------------|
| Allocated memory      |    3600 MB |    5200 MB |   74412 MB |   70811 MB |
|       from large pool |    3600 MB |    5200 MB |    6000 MB |    2400 MB |
|       from small pool |       0 MB |       0 MB |   68412 MB |   68411 MB |
|---------------------------------------------------------------------------|
| Active memory         |    3600 MB |    5200 MB |   74412 MB |   70811 MB |
|       from large pool |    3600 MB |    5200 MB |    6000 MB |    2400 MB |
|       from small pool |       0 MB |       0 MB |   68412 MB |   68411 MB |
|-----------------------------------------------

AttributeError: module 'torch.cuda' has no attribute 'list_gpu_processes'

In [32]:
!watch -n 2 nvidia-smi

[?1l>---------------------------------------------------------------------------+[2B3;1HFri Sep 24 14:53:15 2021[1;75H7[3;19H7[24;80H[1;75H9[3;19H9[24;80H[1;74H21[3;18H21[24;80H[1;75H3[3;19H3[24;80H[1;75H5[3;19H5[24;80H[1;75H7[3;19H7[24;80H[1;75H9[3;19H9[24;80H[1;74H31[3;18H31[24;80H[1;75H3[3;19H3[24;80H[1;75H5[3;19H5[24;80H[1;75H7[3;19H7[24;80H[1;75H9[3;19H9[24;80H[1;74H41[3;18H41[24;80H[1;75H4[3;19H4[24;80H[1;75H6[3;19H6[24;80H[1;75H8[3;19H8[24;80H[1;74H50[3;18H50[24;80H[1;75H2[3;19H2[24;80H[1;75H4[3;19H4[24;80H[1;75H6[3;19H6[24;80H[1;75H8[3;19H8[24;80H[1;72H4:00[3;16H4:00[24;80H[1;75H2[3;19H2[24;80H[1;75H4[3;19H4[24;80H[1;75H6[3;19H6[24;80H[1;75H8[3;19H8[24;80H[1;74H10[3;18H10[24;80H[1;75H2[3;19H2[24;80H[1;75H4[3;19H4[24;80H[1;75H6[3;19H6[24;80H[1;75H8[3;19H8[24;80H[1;74H20[3;18H20[24;80H[1;75H2[3;19H2[24;80H[1;75H5[3;19H5[24;80H[1;75H7[3;19H7[24;80H[1;75H9[3;19H9[24;80H[1

In [30]:
!nvidia-smi --loop=1

Fri Sep 24 14:50:01 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.142.00   Driver Version: 450.142.00   CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            On   | 00000000:00:1E.0 Off |                    0 |
| N/A   43C    P0    27W /  70W |   5920MiB / 15109MiB |     16%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [20]:
generator(noise).round()

tensor([[1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 

In [54]:
def extract(G_of_noise):

    G_numpy = G_of_noise.detach()   

    curves = []

    for i in range(len(G_numpy)):
        c1 = int(G_numpy[i][0].round())
        c2 = ((-1)**(int(G_numpy[i][1].round())))*(int(G_numpy[i][2].round()))
        c3 = int(G_numpy[i][3].round())
        c4 = (-1)**(int(G_numpy[i][4].round()))*(int("".join([str(int(y)) for y in G_numpy[i][5:18].round()]), 2))
        c6 = (-1)**(int(G_numpy[i][12].round()))*(int("".join([str(int(y)) for y in G_numpy[i][19:].round()]), 2))
    
        coef = [c1,c2,c3,c4,c6]
        curves.append(coef)
    return curves

In [50]:
G_numpy = G_of_noise.detach()   
c2 = ((-1)**(int(G_numpy[1][1].round())))*(int(G_numpy[1][2].round()))
c2

0

In [55]:
extract(G_of_noise)

[[0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121],
 [0, 0, 0, -7839, 5121]]

In [49]:
curves

NameError: name 'curves' is not defined

In [45]:
curves

NameError: name 'curves' is not defined

In [42]:
G_of_noise.round()

tensor([[1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0., 1., 1.],
        [1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 0., 0., 0., 

In [27]:
generator(noise).round()

tensor([[1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0.],
        [1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 1.,
         1., 0., 1., 1., 1., 0., 0., 

In [8]:
train()

NameError: name 'train' is not defined

In [9]:
generator(torch.tensor([[1.0]]).to(device))



tensor([[0.5722, 0.4668, 0.5969, 0.4940, 0.4720]], device='cuda:0',
       grad_fn=<SigmoidBackward>)

In [10]:
torch.round(generator(noise))

tensor([[1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [1., 0., 1., 0., 0.]], device='cuda:0', grad_fn=<RoundBackward>)

In [41]:
noise

tensor([[1., 0., 0., 1., 0.],
        [1., 1., 0., 1., 0.],
        [1., 1., 1., 0., 0.],
        [1., 1., 1., 0., 0.],
        [0., 0., 0., 0., 0.],
        [1., 1., 1., 1., 0.],
        [1., 0., 0., 0., 1.],
        [1., 1., 0., 1., 0.],
        [0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 1.],
        [0., 1., 0., 0., 1.],
        [1., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0.],
        [0., 1., 0., 1., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 0., 1., 1.]], device='cuda:0')

In [12]:
true_data = coef.sample(16).values
true_data = torch.tensor(true_data).float()

true_data

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00,  3.6100e+02, -1.3718e+04],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00, -7.0000e+01, -2.1600e+02],
        [ 0.0000e+00,  1.0000e+00,  0.0000e+00, -6.5000e+01, -2.1000e+02],
        [ 1.0000e+00,  1.0000e+00,  1.0000e+00, -1.2031e+07, -1.5911e+10],
        [ 1.0000e+00, -1.0000e+00,  1.0000e+00, -6.1000e+01, -9.1000e+01],
        [ 1.0000e+00, -1.0000e+00,  1.0000e+00,  2.2000e+02,  6.9500e+02],
        [ 1.0000e+00, -1.0000e+00,  1.0000e+00, -1.9400e+02, -5.4700e+02],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00, -1.0000e+00, -1.1000e+01],
        [ 1.0000e+00,  1.0000e+00,  1.0000e+00, -3.6300e+02, -2.8080e+03],
        [ 0.0000e+00,  1.0000e+00,  1.0000e+00,  9.4985e+04,  3.0376e+06],
        [ 1.0000e+00,  0.0000e+00,  1.0000e+00, -5.9000e+01, -1.8000e+01],
        [ 0.0000e+00,  1.0000e+00,  0.0000e+00, -3.8100e+02,  1.1430e+03],
        [ 0.0000e+00, -1.0000e+00,  0.0000e+00, -1.6000e+02,  8.3500e+02],
        [ 0.0000e+00,  1.

In [7]:
# For later on with larger network
def _save_model(model, model_dir):
    logger.info("Saving the model.")
    path = os.path.join(model_dir, "generator.pth")
    # recommended way from http://pytorch.org/docs/master/notes/serialization.html
    #torch.save(model.cpu().state_dict(), path)
    #torch.save(model.cpu().state_dict(),'.')


In [20]:
_train()

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


RuntimeError: Expected object of device type cuda but got device type cpu for argument #2 'mat1' in call to _th_addmm

In [8]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    # Not needed
    parser.add_argument(
        "--workers",
        type=int,
        default=2,
        metavar="W",
        help="number of data loading workers (default: 2)",
    )
    
    parser.add_argument(
        "--epochs",
        type=int,
        default=1,
        metavar="E",
        help="number of total epochs to run (default: 2)",
    )
    parser.add_argument(
        "--batch_size", type=int, default=16, metavar="BS", help="batch size (default: 16)"
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.001,
        metavar="LR",
        help="initial learning rate (default: 0.001)",
    )
    parser.add_argument(
        "--momentum", type=float, default=0.9, metavar="M", help="momentum (default: 0.9)"
    )
    # Not needed
    parser.add_argument(
        "--dist_backend", type=str, default="gloo", help="distributed backend (default: gloo)"
    )
    
    parser.add_argument("--hosts", type=json.loads, default=["algo-1","algo-2"])
    #parser.add_argument("--current-host", type=str, default=os.environ["SM_CURRENT_HOST"])
    parser.add_argument("--current-host", type=str, default='algo-1')
    #parser.add_argument("--model-dir", type=str, default=os.environ["SM_MODEL_DIR"])
    parser.add_argument("--model-dir", type=str, default='/opt/ml/model')
    #parser.add_argument("--data-dir", type=str, default=os.environ["SM_CHANNEL_TRAINING"])
    parser.add_argument("--data-dir", type=str, default='/opt/ml/input/data/training')
    parser.add_argument("--num-gpus", type=int, default=1)

    _train(parser.parse_args())

usage: __main__.py [-h] [--workers W] [--epochs E] [--batch_size BS] [--lr LR]
                   [--momentum M] [--dist_backend DIST_BACKEND]
                   [--hosts HOSTS] [--current-host CURRENT_HOST]
                   [--model-dir MODEL_DIR] [--data-dir DATA_DIR]
                   [--num-gpus NUM_GPUS]
__main__.py: error: unrecognized arguments: -f /home/ec2-user/.local/share/jupyter/runtime/kernel-5475c9eb-8d7d-4471-8228-e1041f22ee37.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
def model_fn(model_dir):
    logger.info("model_fn")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = Net()
    if torch.cuda.device_count() > 1:
        logger.info("Gpu count: {}".format(torch.cuda.device_count()))
        model = nn.DataParallel(model)

    with open(os.path.join(model_dir, "model.pth"), "rb") as f:
        model.load_state_dict(torch.load(f))
    return model.to(device)