In [50]:
# set up a GPU
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" 
os.environ["CUDA_VISIBLE_DEVICES"]="0, 1"


In [51]:
# test GPU working (output should be "Using -- cuda")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print( "Using --", device )

Using -- cuda


In [52]:
# example code from : https://medium.com/analytics-vidhya/a-simple-neural-network-classifier-using-pytorch-from-scratch-7ebb477422d2

# prepare data
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader


X, Y = make_classification(
  n_samples=100, n_features=4, n_redundant=0,
  n_informative=3,  n_clusters_per_class=2, n_classes=3
)

In [53]:
# split into training/test
X_train, X_test, Y_train, Y_test = train_test_split(
  X, Y, test_size=0.33, random_state=42)



In [54]:
print( X.shape)

(100, 4)


In [55]:
# define data loader for training data -- note the defining of two required methods (getitem and len)

class Data(Dataset):
  def __init__(self, X_train, y_train):
    # need to convert float64 to float32 else 
    # will get the following error
    # RuntimeError: expected scalar type Double but found Float
    self.X = torch.from_numpy(X_train.astype(np.float32))
    # need to convert float64 to Long else 
    # will get the following error
    # RuntimeError: expected scalar type Long but found Float
    self.y = torch.from_numpy(y_train).type(torch.LongTensor)
    self.len = self.X.shape[0]
  
  def __getitem__(self, index):
    return self.X[index], self.y[index]
  def __len__(self):
    return self.len

In [56]:
# instantiate the data loader for the actual data
traindata = Data(X_train, Y_train)

# accessing e.g. 
# print( traindata[23:25])

# load into a DataLoader iterator so that accessing will return a batch at a time

batch_size = 4
trainloader = DataLoader(traindata, batch_size=batch_size, 
                         shuffle=True, num_workers=2)




In [57]:
# build network

import torch.nn as nn

# number of features (len of X cols)
input_dim = 4

# number of hidden nodes
hidden_layers = 25

# number of classes (unique of y)
output_dim = 3
class Network(nn.Module):
  def __init__(self):
    super(Network, self).__init__()
    self.linear1 = nn.Linear(input_dim, hidden_layers)
    self.linear2 = nn.Linear(hidden_layers, output_dim)
  
  def forward_layer1( self, x ):
    x = torch.sigmoid(self.linear1(x))
    return x
  
  def forward_layer2( self, x ):
    x = self.linear2(x)
    return x 
  
  def forward(self, x):
    y = self.forward_layer1(x)
    y = self.forward_layer2(y)
    return y
    


In [58]:
# instantiate network
clf = Network()
# define loss and optimizer algorithm
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(clf.parameters(), lr=0.1)

print( clf.parameters )

## load model to GPU
clf.to(device)

<bound method Module.parameters of Network(
  (linear1): Linear(in_features=4, out_features=25, bias=True)
  (linear2): Linear(in_features=25, out_features=3, bias=True)
)>


Network(
  (linear1): Linear(in_features=4, out_features=25, bias=True)
  (linear2): Linear(in_features=25, out_features=3, bias=True)
)

In [62]:
# test_input = torch.randn(1,1,1,4)
# print( test_input )

# #print( clf( test_input ) )

# xx = clf.forward_layer1( test_input )
# print( " Output layer 1 = ", xx )

# yy = clf.forward_layer2( xx )
# print( " Output layer 2 = ", yy )

# zz = clf( test_input )
# print(" clf direct call : ", zz)

for i, inputs, labels in trainloader:
    print( inputs )
    print( labels )
    print( i )
 


ValueError: not enough values to unpack (expected 3, got 2)

In [63]:
## GPU version of dataloader / training

# training epochs
epochs = 8
for epoch in range(epochs):
  running_loss = 0.0
  # iterate over trainloader (see above)
  for inputs, labels in trainloader:
    inputs, labels = inputs.to(device), labels.to(device)
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()
    # optimize
    optimizer.step()
    running_loss += loss.item()

  # display statistics
  print(f'[ {epoch + 1} ] loss: {running_loss / 2000:.5f}')

[ 1,    18 ] loss: 0.00933
[ 2,    18 ] loss: 0.00871
[ 3,    18 ] loss: 0.00797
[ 4,    18 ] loss: 0.00759
[ 5,    18 ] loss: 0.00683
[ 6,    18 ] loss: 0.00669
[ 7,    18 ] loss: 0.00618
[ 8,    18 ] loss: 0.00587


In [27]:
# training epochs
epochs = 8
for epoch in range(epochs):
  running_loss = 0.0
  # enumerate over trainloader (see above)
  for i, data in enumerate(trainloader, 0):
    inputs, labels = data
    # set optimizer to zero grad to remove previous epoch gradients
    optimizer.zero_grad()
    # forward propagation
    outputs = clf(inputs)
    loss = criterion(outputs, labels)
    # backward propagation
    loss.backward()
    # optimize
    optimizer.step()
    running_loss += loss.item()

  # display statistics
  print(f'[ {epoch + 1}, {i + 1:5d} ] loss: {running_loss / 2000:.5f}')



[ 1,    17 ] loss: 0.00904
[ 2,    17 ] loss: 0.00813
[ 3,    17 ] loss: 0.00741
[ 4,    17 ] loss: 0.00696
[ 5,    17 ] loss: 0.00648
[ 6,    17 ] loss: 0.00621
[ 7,    17 ] loss: 0.00597
[ 8,    17 ] loss: 0.00573


In [13]:
# save network state for later re-use
PATH = '../net-states/testing-only.pth'
torch.save(clf.state_dict(), PATH)


In [15]:

# load network state 
clf = None

clf = Network()
clf.load_state_dict( torch.load(PATH))



<All keys matched successfully>

In [16]:
# instantiate data loader for test data
testdata = Data(X_test, Y_test)
testloader = DataLoader(testdata, batch_size=batch_size, 
                        shuffle=True, num_workers=2)

In [19]:
# a single run of inference / forward prop only
outputs = clf(inputs)
print( outputs )

tensor([[-0.4092, -0.1135,  0.2849],
        [-0.5373, -0.8778,  1.1463],
        [-0.4509, -1.2103,  1.4541]], grad_fn=<AddmmBackward0>)


In [20]:
# apply max over 3 tuple (outputs) -- note, indexing starts at 0, so remember to add one

__, predicted = torch.max(outputs, dim = 1)
print(predicted)


tensor([2, 2, 2])


In [21]:
# complete run through testing data set

correct, total = 0, 0
# no need to calculate gradients during inference
with torch.no_grad():
  for data in testloader:
    inputs, labels = data
    # calculate output by running through the network
    outputs = clf(inputs)
    # get the predictions
    __, predicted = torch.max(outputs.data, 1)
    # update results
    total += labels.size(0)
    correct += (predicted == labels).sum().item()
print(f'Accuracy of the network on the {len(testdata)} test data: {100 * correct // total} %')

Accuracy of the network on the 33 test data: 54 %
