In [1]:
# http://pytorch.org/
from os.path import exists
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())
cuda_output = !ldconfig -p|grep cudart.so|sed -e 's/.*\.\([0-9]*\)\.\([0-9]*\)$/cu\1\2/'
accelerator = cuda_output[0] if exists('/dev/nvidia0') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.1-{platform}-linux_x86_64.whl torchvision
import torch

tcmalloc: large alloc 1073750016 bytes == 0x58eba000 @  0x7ff6695652a4 0x591a07 0x5b5d56 0x502e9a 0x506859 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x502209 0x502f3d 0x506859 0x504c28 0x502540 0x502f3d 0x507641 0x504c28 0x502540 0x502f3d 0x507641


In [2]:
from keras.datasets import imdb
from keras.preprocessing import sequence
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Using TensorFlow backend.


In [0]:
vocab_size = 1000
# cut texts after this number of words (among top max_features most common words)
sentence_max_lenght = 150


In [4]:
print('Loading data...')
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=vocab_size)

                                                             
print(len(x_train), 'train sequences')
print(len(x_test), 'test sequences')

print('Pad sequences (samples x time)')
x_train = sequence.pad_sequences(x_train, maxlen=sentence_max_lenght, padding='pre')
x_test = sequence.pad_sequences(x_test, maxlen=sentence_max_lenght)
print('x_train shape:', x_train.shape)
print('x_test shape:', x_test.shape)
print('labels of y_train', set(y_train))

x_train, y_train = torch.LongTensor(x_train).to(device), torch.FloatTensor(y_train).to(device)
x_test, y_test = torch.LongTensor(x_test).to(device), torch.FloatTensor(y_test).to(device)

Loading data...
Downloading data from https://s3.amazonaws.com/text-datasets/imdb.npz
25000 train sequences
25000 test sequences
Pad sequences (samples x time)
x_train shape: (25000, 150)
x_test shape: (25000, 150)
labels of y_train {0, 1}


In [0]:
class Net4IMDB(torch.nn.Module):
    def __init__(self, vocab_size,
                 embedding_size,
                 hidden_size,
                 output_size):
        super(Net4IMDB, self).__init__()
        # write your model component at here
        self.embeddings = torch.nn.Embedding(vocab_size,embedding_size)
        self.linear1 = torch.nn.Linear(embedding_size,hidden_size)
        self.linear2 = torch.nn.Linear(hidden_size,output_size)
        self.sig = torch.nn.Sigmoid()
    def forward(self, x):
        # write model forward at here
        emb = self.embeddings(x).mean(dim=1)
        h_relu = self.linear1(emb).clamp(min=0)
        h_sigmoid = self.sig(h_relu)
        y_pred = self.linear2(h_sigmoid)
        return y_pred

In [0]:
from torch.nn import Sequential, Embedding, ReLU, Tanh, Sigmoid, Linear

class Net4IMDB(torch.nn.Module):
    def __init__(self, vocab_size, embedding_size, 
                 hidden_size, output_size):
        super(Net4IMDB, self).__init__()
        self.embeddings = Embedding(vocab_size, embedding_size)
        self.layer = Sequential(Linear(embedding_size, hidden_size), 
                                 ReLU(), 
                                 Linear(hidden_size, output_size), 
                                 Tanh())
        
    def forward(self, x):
        emb = self.embeddings(x).mean(dim=1)
        y_pred = self.layer(emb)
        return y_pred

In [7]:
embedding_size = 200
hidden_size = 200
output_size = 1
print('Build model')
model = Net4IMDB(vocab_size, 
                            embedding_size, 
                            hidden_size, 
                            output_size).to(device)
print(model)
criterion = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=1e-1)

Build model
Net4IMDB(
  (embeddings): Embedding(1000, 200)
  (layer): Sequential(
    (0): Linear(in_features=200, out_features=200, bias=True)
    (1): ReLU()
    (2): Linear(in_features=200, out_features=1, bias=True)
    (3): Tanh()
  )
)


In [0]:
def binary_accuracy(preds, y):
    #round predictions to the closest integer
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.sum()/len(correct)
    return acc

In [9]:
epochs = 100
batch_size = 2000
x_train_batch = torch.split(x_train, batch_size, dim=0)
y_train_batch = torch.split(y_train, batch_size, dim=0)
for epoch in range(epochs):
    for i,(x,y) in enumerate(zip(x_train_batch, y_train_batch)):
        y_pred = model.forward(x).squeeze(1)
        loss = criterion(y_pred, y)
        accuracy = binary_accuracy(y_pred, y)
        print('epoch %d | step %d | loss %.4f | accuacy %.2f'%(epoch, i, loss.item(), accuracy.item()), end='\r')

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()



In [0]:
with torch.no_grad():
    y_pred = model.forward(x_test).squeeze(1)
    binary_accuracy(y_pred, y_test)
    print('accuracy on test: %.4f'%(accuracy))

accuracy on test: 0.7560


In [0]:
!nvidia-smi

Mon Dec 17 06:21:59 2018       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 396.44                 Driver Version: 396.44                    |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   30C    P8    27W / 149W |      0MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
|  No ru