In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

import os

import pandas as pd # to read csv and handle dataframe

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data
from torch.autograd import Variable


import torchvision
import torchvision.transforms as transforms

matplotlib.rcParams['figure.figsize'] = (20,10)

data = np.load('../data/cullpdb+profile_5926.npy')
data = data.reshape((-1,700,57))

data.shape
x = data[:,:,:22]
y = data[:,:,22:31] # (brug 35 hvis du vil have solvent properties med)
print('Fuldt datasæt shape:')
print('X: ', x.shape)
print('Y: ', y.shape)

x_train = x[:5430]
y_train = y[:5430]

x_test = x[5435:5690]
y_test = y[5435:5690]
print('Splittet ud i training og testing:')
print('(Train) X: ', x_train.shape)
print('(Train) Y: ', y_train.shape)
print('(Test)  X: ', x_test.shape)
print('(Test)  Y: ', y_test.shape)

torch_X_train = torch.from_numpy(x_train).type(torch.LongTensor)
torch_Y_train = torch.from_numpy(y_train).type(torch.LongTensor)
torch_X_test  = torch.from_numpy(x_test).type(torch.LongTensor)
torch_Y_test  = torch.from_numpy(y_test).type(torch.LongTensor)

Fuldt datasæt shape:
X:  (5926, 700, 22)
Y:  (5926, 700, 9)
Splittet ud i training og testing:
(Train) X:  (5430, 700, 22)
(Train) Y:  (5430, 700, 9)
(Test)  X:  (255, 700, 22)
(Test)  Y:  (255, 700, 9)


In [2]:
# Hyper Parameters
EPOCH = 1               # train the training data n times, to save time, we just train 1 epoch
BATCH_SIZE = 50
LR = 0.001              # learning rate
DOWNLOAD_MNIST = False

In [3]:
# Data Loader for easy mini-batch return in training, the image batch shape will be (50, 1, 28, 28)
#train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True)
train_loader_mine = torch.utils.data.DataLoader(dataset=torch_X_train, batch_size=BATCH_SIZE, shuffle=False)

In [91]:
# pick 2000 samples to speed up testing
# Alt det her er fra den kode jeg har stjålet fra et sted - det er ikke noget vi skal bruge til noget
test_data = torchvision.datasets.MNIST(root='./mnist/', train=False)
test_x = torch.unsqueeze(test_data.test_data, dim=1).type(torch.FloatTensor)[:2000]/255.   # shape from (2000, 28, 28) to (2000, 1, 28, 28), value in range(0,1)
test_y = test_data.test_labels[:2000]

In [8]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Sequential(         # input shape (700, 22)
            nn.Conv1d(
                in_channels=22,            # input height
                out_channels=10,           # n_filters / feature maps
                kernel_size=5,              # filter size
                stride=1,                   # filter movement/step
                padding=2,                  # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride=1
            ),                              # output shape (300, 22)
            nn.ReLU(),                      # activation
            #nn.MaxPool1d(kernel_size=2),    # choose max value in 2x2 area, output shape (300, 11)
        )
        self.conv2 = nn.Sequential(         # input shape (300, 11)
            nn.Conv1d(10, 9, 5, 1, 2),     # output shape (32, 11)
            nn.ReLU(),                      # activation
            #nn.MaxPool1d(2),                # output shape (32, 7, 7)
        )
        # Out-lag skal have softmax (check at sum=1)
#        self.out = nn.Linear(32 * 11, 9)   # fully connected layer, output 10 classes

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
#        x = x.view(x.size(0), -1)           # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        output = self.out(x)
        return output, x    # return x for visualization

In [9]:
cnn = CNN()
print(cnn) # net architecture

CNN(
  (conv1): Sequential(
    (0): Conv1d(22, 10, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv1d(10, 9, kernel_size=(5,), stride=(1,), padding=(2,))
    (1): ReLU()
  )
)


In [10]:
optimizer = torch.optim.Adam(cnn.parameters(), lr=LR)   # optimize all cnn parameters
loss_func = nn.CrossEntropyLoss() # the target label is not one-hotted

In [90]:
for epoch in range(EPOCH):
    for step, (b_x, b_y) in enumerate(train_loader):   # gives batch data, normalize x when iterate train_loader

        output = cnn(b_x)[0]               # cnn output
        loss = loss_func(output, b_y)   # cross entropy loss
        optimizer.zero_grad()           # clear gradients for this training step
        loss.backward()                 # backpropagation, compute gradients
        optimizer.step()                # apply gradients

        if step % 50 == 0:
            test_output, last_layer = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].data.numpy()
            accuracy = float((pred_y == test_y.data.numpy()).astype(int).sum()) / float(test_y.size(0))
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)

Epoch:  0 | train loss: 2.2904 | test accuracy: 0.22
Epoch:  0 | train loss: 0.5798 | test accuracy: 0.77
Epoch:  0 | train loss: 0.5650 | test accuracy: 0.89
Epoch:  0 | train loss: 0.3613 | test accuracy: 0.90
Epoch:  0 | train loss: 0.1626 | test accuracy: 0.93
Epoch:  0 | train loss: 0.0915 | test accuracy: 0.93
Epoch:  0 | train loss: 0.1209 | test accuracy: 0.95
Epoch:  0 | train loss: 0.2732 | test accuracy: 0.96
Epoch:  0 | train loss: 0.1618 | test accuracy: 0.96
Epoch:  0 | train loss: 0.0209 | test accuracy: 0.97
Epoch:  0 | train loss: 0.0869 | test accuracy: 0.97
Epoch:  0 | train loss: 0.1375 | test accuracy: 0.97
Epoch:  0 | train loss: 0.0733 | test accuracy: 0.96
Epoch:  0 | train loss: 0.1371 | test accuracy: 0.97
Epoch:  0 | train loss: 0.0956 | test accuracy: 0.96
Epoch:  0 | train loss: 0.0491 | test accuracy: 0.97
Epoch:  0 | train loss: 0.0508 | test accuracy: 0.97
Epoch:  0 | train loss: 0.0327 | test accuracy: 0.98
Epoch:  0 | train loss: 0.0479 | test accuracy

In [11]:
for epoch in range(EPOCH):
    for step, b_x in enumerate(torch_X_train):   # gives batch data, normalize x when iterate train_loader
        b_y = torch_Y_train[step]
        output = cnn(b_x)#[0]               # cnn output
        loss = loss_func(output, b_y)   # cross entropy loss
        optimizer.zero_grad()           # clear gradients for this training step
        loss.backward()                 # backpropagation, compute gradients
        optimizer.step()                # apply gradients

        if step % 50 == 0:
            test_output, last_layer = cnn(torch_X_test)
            pred_y = torch.max(test_output, 1)[1].data.numpy()
            accuracy = float((pred_y == torch_Y_test.data.numpy()).astype(int).sum()) / float(torch_Y_test.size(0))
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy(), '| test accuracy: %.2f' % accuracy)

RuntimeError: Expected 3-dimensional input for 3-dimensional weight [10, 22, 5], but got 2-dimensional input of size [700, 22] instead