In [6]:
from keras import layers
from keras.layers import Activation

In [7]:
def _get_attention(num_units, features_dim, hidden_dim):
    features = layers.Input((features_dim,))
    hidden = layers.Input((hidden_dim,))
    w1 = layers.Dense(num_units)
    w2 = layers.Dense(num_units)
    outputs = layers.Dense(1)
    score = outputs(Activation('tanh', name='tanh')(w1 * features + w2 * hidden))
    attention_weights = Activation('softmax')(score, axis=1)
    context_vector = attention_weights * features
    context_vector = tf.reduce_sum(context_vector, axis=1)
    model = Model(inputs=[features, hidden], outputs=[context_vector, attention_weights])
    return model

In [8]:
m = _get_attention(10, 10, 10)

TypeError: Expected float32, got <keras.layers.core.Dense object at 0x7f49d60f9a58> of type 'Dense' instead.

In [1]:
import ml_util

In [3]:
import ml_utils

In [4]:
from ml_u

<module 'ml_utils' from '/home/can/Desktop/kaggle_projects/ml_utils/__init__.py'>

In [None]:

import os
import json
import multiprocessing
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from tqdm import tqdm, tqdm_notebook
import keras
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from basic_transformer.models.basic_transformer import BasicTransformer
from basic_transformer import utils as local_util
RANDOM_SEED = 43
np.random.seed(RANDOM_SEED)

In [None]:
DATASET = 'yelp'  # 'yelp' or 'imdb'
DIM = 128
NUM_WORDS = 5_000
TEXT_COLUMN = 'review'
LABEL_COLUMN = 'sentiment'
LABEL_MAPPING = {'negative': 0, 'positive': 1}
MAX_SEQ_LEN = 128
# dataloaders
BATCH_SIZE = 16
SHUFFLE = True
DEBUG_DF = False

In [None]:
# get model
model = BasicTransformer(dim=DIM, num_embeddings=NUM_WORDS, embedding_dim=DIM)

In [None]:
if DATASET == 'yelp':
    df = pd.read_csv("/media/can/MyData/datasets/yelp/df.csv")
    df['sentiment'] = df['stars'].replace({1: 'negative', 5: 'positive'})
    df = df.rename(columns={'text': 'review'})
    df = df.sample(n=50_000, random_state=RANDOM_SEED)
elif DATASET == 'imdb':
    df = pd.read_csv("/media/can/MyData/datasets/imdb-50k-movie-review/IMDB Dataset.csv")
else:
    raise ValueError("Invalid data: {}".format(str(DATASET)))

In [None]:
txts_positive = df[df['sentiment'] == 'positive']['review'].sample(3).tolist()
txts_negative = df[df['sentiment'] == 'negative']['review'].sample(3).tolist()

In [None]:
lens = df['review'].apply(lambda x: len(x.split(' ')))
lens.mean()

In [None]:
# generate a fake df for debugging
if DEBUG_DF:
    n_positive, n_negative = 5000, 5000
    positive_label, negative_label = 'positive', 'negative'
    positive_text = ' '.join(['good'] * 10)
    negative_text = ' '.join(['bad'] * 10)
    df = [(positive_text, positive_label)] * n_positive + [(negative_text, negative_label)] * n_negative
    df = pd.DataFrame(df)
    df.columns = ('review', 'sentiment')
    df

In [None]:
datagen = local_util.dataset_generator.TextDataset(df=df, 
                                                   num_words=NUM_WORDS, 
                                                   text_column=TEXT_COLUMN, 
                                                   label_column=LABEL_COLUMN, 
                                                   label_mapping=LABEL_MAPPING, 
                                                   max_seq_len=MAX_SEQ_LEN)

In [None]:
dataloader = DataLoader(dataset=datagen,  
                        batch_size=BATCH_SIZE, 
                        num_workers=multiprocessing.cpu_count(), 
                        shuffle=SHUFFLE)

In [None]:
model.cuda()

In [None]:
# train
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [None]:
losses = list()
running_loss = 0.0
every = 200
for epoch in range(2):
    for i, x in tqdm_notebook(enumerate(dataloader), total=len(dataloader)):
        inputs = x['seq']
        labels = x['label'].float()

        inputs = inputs.cuda()
        labels = labels.cuda()

        #
        optimizer.zero_grad()

        # 
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        if i % 2 == 0:
            print('loss:', loss)

        # print statistics
        running_loss += loss.item()
        if i % every == every - 1:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / every))
            running_loss = 0.0

In [None]:
import numpy as np
import torch

In [None]:
emb_layer = torch.nn.Embedding(num_embeddings=17, embedding_dim=8)

In [None]:
# emb_layer(torch.tensor([0, 0, 10, 16, 17]))