In [1]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import random

## Using our CNN
Now, we will add in our train_test + batchify method from last time to use our CNN on a dataset! 

In [2]:
class BIOF510_AC:
    
    
    def __init__(self,data):
        self.data = data

    
    
    '''
        
        Inside this Net class, we can define what we want our convolutional neural network to look like!
        We will define the convolutional layers AND the linear layers here 
        
        Inputs:
        
        
    '''
    class AutoCNN(nn.Module):
        
        def __init__(self):
            
            super(BIOF510_AC.AutoCNN, self).__init__()
            ''' here, we define our convolution layers'''
            self.conv1 = nn.Conv2d(1, 16, 3)
            self.conv2 = nn.Conv2d(16, 32, 3)
            
            ''' 
            max pooling - we need the indicies of the max vaues for unpooling
            so return_indicies=True 
            '''
            self.pool = nn.MaxPool2d(kernel_size=2,return_indices=True)
            
            ''' now need an unpool,to remove the effects of pooling in the decoder '''
            self.unpool = nn.MaxUnpool2d(2)
            self.tanh = nn.Tanh()
            self.relu = nn.ReLU()
            
            ''' Our transpose covolution layer'''
            self.unconv1 = nn.ConvTranspose2d(32, 16, 3)
            self.unconv2 = nn.ConvTranspose2d(16, 1, 3)

        def forward(self, x):
            
            x = self.conv1(x)
            x = self.relu(x)
            ''' pooling - indicies are returned '''
            x,indices1 = self.pool(x)
            x = self.conv2(x)
            x = self.relu(x)
            ''' pooling - indicies are returned'''
            x,indices2 = self.pool(x)
            
            ''' how big is the unpooled image we want to recreate?
            
            well, we lose a pixel on each end when we do convolution because 
            the kernel has to fit onto the images, so the center pixel cannot be on the 
            edge unless we pad the images
            
            
            We started with 28x28 -> 26x26 -> 13x13 (pooling) -> 11x11
            
            so we will need to unpool images of 11x11 and 26x26
            '''
            x = self.unpool(x,indices2,output_size=(11,11))
            x = self.unconv1(x)
            x = self.relu(x)
            
            x = self.unpool(x,indices1,output_size=(26,26))
            x = self.unconv2(x)
            x = self.tanh(x)

        
            return x


    ''' We will not do any parameter optimization for this tutorial, so no need to have any
    parameters for this method'''
    def train_test(self):
            
           
            batches = batchify_autoencoder(self.data,batch_size=16)
  
            neural_network = BIOF510_AC.AutoCNN()
        
            optimizer = optim.SGD(neural_network.parameters(), lr=0.01)
        
            loss_function = nn.MSELoss()
        
            neural_network.train()
        
            ### n_epochs
            for i in range(3):
                error = 0
                for ii in range(len(batches)):
                
                    optimizer.zero_grad()
                
                    batch = batches[ii]

                    predictions = neural_network(torch.tensor(np.asarray(batch).astype(np.float32)))
                    
                    loss = loss_function(predictions,torch.tensor(np.asarray(batch).astype(np.float32)))
                
                    loss.backward()
                
                    optimizer.step()
                    
                    error += loss.data
                    
                print('Error: ' + str((error/len(self.data))*16))

            return neural_network
        
   


''' Utility Function - function to turn the data into batches'''

def batchify_autoencoder(data,batch_size=16):
    
    batches= []


    for n in range(0,len(data),batch_size):
        if n+batch_size < len(data):
            batches.append(data[n:n+batch_size])
            

    if len(data)%batch_size > 0:
        batches.append(data[len(data)-(len(data)%batch_size):len(data)])

        
    return batches

In [3]:
data = torchvision.datasets.MNIST(
    root = './data/MNIST',
    download = True)


labels = data.targets
data = data.data
newdata = []

for image in data:
   image = np.ravel(image).astype(np.float64)
   image *= 1/image.max()
   newdata.append(image.reshape(1,28,28))

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw/train-images-idx3-ubyte.gz


31.0%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

98.2%IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)

100.0%


Extracting ./data/MNIST/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz


112.7%
  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


Extracting ./data/MNIST/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/MNIST/raw

Processing...
Done!


In [4]:
testclass = BIOF510_AC(newdata)
model = testclass.train_test()

Error: tensor(0.0443)
Error: tensor(0.0167)
Error: tensor(0.0126)
