In [1]:
import os
import json
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm, tqdm_notebook
import keras
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from basic_transformer.models.basic_transformer import BasicTransformer
from basic_transformer import utils as local_util

Using TensorFlow backend.


In [2]:
DIM = 16
NUM_WORDS = 2_000
TEXT_COLUMN = 'review'
LABEL_COLUMN = 'sentiment'
LABEL_MAPPING = {'negative': 0, 'positive': 1}
MAX_SEQ_LEN = 500
# dataloaders
BATCH_SIZE = 16
SHUFFLE = True
DEBUG_DF = False

In [3]:
# get model
model = BasicTransformer(dim=DIM, num_embeddings=NUM_WORDS, embedding_dim=DIM)

In [4]:
# df = pd.read_csv("/media/can/MyData/datasets/yelp/df.csv")
df = pd.read_csv("/media/can/MyData/datasets/imdb-50k-movie-review/IMDB Dataset.csv")

In [5]:
# generate a fake df for debugging
if DEBUG_DF:
    n_positive, n_negative = 5000, 5000
    positive_label, negative_label = 'positive', 'negative'
    positive_text = ' '.join(['good'] * 10)
    negative_text = ' '.join(['bad'] * 10)
    df = [(positive_text, positive_label)] * n_positive + [(negative_text, negative_label)] * n_negative
    df = pd.DataFrame(df)
    df.columns = ('review', 'sentiment')
    df

In [6]:
datagen = local_util.dataset_generator.TextDataset(df=df, 
                                                   num_words=NUM_WORDS, 
                                                   text_column=TEXT_COLUMN, 
                                                   label_column=LABEL_COLUMN, 
                                                   label_mapping=LABEL_MAPPING, 
                                                   max_seq_len=400)

In [7]:
dataloader = DataLoader(dataset=datagen,  
                        batch_size=BATCH_SIZE, 
                        num_workers=multiprocessing.cpu_count(), 
                        shuffle=SHUFFLE)

In [8]:
model.cuda()

BasicTransformer(
  (embed_layer): Embedding(2001, 16)
  (linear): Linear(in_features=16, out_features=16, bias=True)
  (linear_clf): Linear(in_features=16, out_features=1, bias=True)
)

In [9]:
# train
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [10]:
losses = list()
running_loss = 0.0
every = 200
for epoch in range(2):
    for i, x in tqdm_notebook(enumerate(dataloader), total=len(dataloader)):
        inputs = x['seq']
        labels = x['label'].float()

        inputs = inputs.cuda()
        labels = labels.cuda()

        #
        optimizer.zero_grad()

        # 
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if i % 2 == 0:
            print('loss:', loss)

        # print statistics
        running_loss += loss.item()
        if i % every == every - 1:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / every))
            running_loss = 0.0

HBox(children=(IntProgress(value=0, max=3125), HTML(value='')))

  return F.binary_cross_entropy(input, target, weight=self.weight, reduction=self.reduction)


loss: tensor(0.9450, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.9654, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.8511, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7649, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7693, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.8033, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7588, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7633, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6825, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7241, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7348, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7369, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7034, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.7347, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6729, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6711, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6950, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7004, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6817, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7066, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6940, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6423, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7191, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7556, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6684, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7014, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.7470, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6495, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7000, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7217, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6682, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6901, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7191, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7084, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6880, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6710, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6939, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6618, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6811, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6972, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6923, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7492, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7009, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6704, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6791, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7094, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7051, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7011, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7019, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6599, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6957, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6942, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6900, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6567, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6777, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7147, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6825, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6872, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7028, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6833, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6731, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6829, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6724, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7292, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6907, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6697, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6942, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7017, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6941, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6971, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6799, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6863, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6982, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7044, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6926, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7028, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6865, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6939, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6799, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6870, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6984, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6865, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6948, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6887, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6731, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7041, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7068, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7224, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7077, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6822, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7034, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6771, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6772, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6856, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6843, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6827, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6693, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6668, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6716, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6900, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6990, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6413, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6810, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7104, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

loss: tensor(0.6830, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6945, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6880, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6893, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6690, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.7063, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6715, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6891, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6923, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6829, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6869, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6815, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor(0.6942, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
loss: tensor

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/can/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-299e5cbde9d4>", line 18, in <module>
    loss.backward()
  File "/home/can/anaconda3/lib/python3.7/site-packages/torch/tensor.py", line 118, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/home/can/anaconda3/lib/python3.7/site-packages/torch/autograd/__init__.py", line 93, in backward
    allow_unreachable=True)  # allow_unreachable flag
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/can/anaconda3/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2033, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'KeyboardInterrupt' object has no attribute '_render_traceback_'

During handling of the ab

KeyboardInterrupt: 

In [24]:
import numpy as np
import torch

In [20]:
emb_layer = torch.nn.Embedding(num_embeddings=17, embedding_dim=8)

In [34]:
# emb_layer(torch.tensor([0, 0, 10, 16, 17]))