In [1]:
from torch import nn, ones
from torch.autograd import Variable
from torchvision import models
from torch.nn.init import kaiming_normal
from torch import np
import torch
import torch.nn.functional as F
import random
import numpy as np

# Playing with embeddings - simple classifier

## Encoding functions

In [2]:
vocab = ['<BEGIN>','<STOP>','clear', 'cloudy', 'haze','partly_cloudy',
    'agriculture','artisinal_mine','bare_ground','blooming',
    'blow_down','conventional_mine','cultivation','habitation',
    'primary','road','selective_logging','slash_burn','water'
    ]

word_to_ix = { word: i for i, word in enumerate(vocab) }
print(word_to_ix)
one_hot_mapping = {k:np.eye(19)[v] for k,v in word_to_ix.items()}
# print(one_hot_mapping)

{'<BEGIN>': 0, '<STOP>': 1, 'clear': 2, 'cloudy': 3, 'haze': 4, 'partly_cloudy': 5, 'agriculture': 6, 'artisinal_mine': 7, 'bare_ground': 8, 'blooming': 9, 'blow_down': 10, 'conventional_mine': 11, 'cultivation': 12, 'habitation': 13, 'primary': 14, 'road': 15, 'selective_logging': 16, 'slash_burn': 17, 'water': 18}


In [3]:
def label_to_target(word_to_ix, label):
    return Variable(torch.LongTensor(
            list(map(lambda w: word_to_ix[w], label))
        ))

## Decoding functions

In [4]:
ix_to_word = {v: k for k, v in word_to_ix.items()}

In [5]:
flatten = lambda l: [item for sublist in l for item in sublist]

def predictions_to_label(ix_to_word, predictions):
    predictions = F.softmax(predictions)
    _, preds = torch.max(predictions.data, 1)
    return list(map(lambda ix: ix_to_word[ix], flatten(preds.tolist())))

## Batch creation

In [6]:
def gen_batch(n, vocab):
    batch = []
    for _ in range(n):
        batch.append(random.choice(vocab))
    return batch

## Model Definition

In [7]:
class EmbeddingIdentity(nn.Module):
    """ Testing weight sharing
    """
    
    def __init__(self, vocab, repr_dim, num_rnn_layers):
        super(EmbeddingIdentity, self).__init__()
        self.label_to_ix = { label: i for i, label in enumerate(vocab) }
        self.embeds = nn.Embedding(len(vocab), repr_dim)
        self.rnn = nn.LSTM(input_size=repr_dim,
                            hidden_size=repr_dim,
                            num_layers=num_rnn_layers,
                            batch_first = True)
        self.fc = nn.Linear(repr_dim, len(vocab))
        
        # link embedding and decoding weight
        self.fc.weight = self.embeds.weight
    
    def toVariable(self, x):
        return Variable(torch.LongTensor(
            list(map(lambda lbl: self.label_to_ix[lbl], x))
        ))
    
    def forward(self, x, hidden=None):
        x = self.toVariable(x)          # Tensor with a single integer
        f = self.embeds(x).unsqueeze(1) # Dim 1x5 --> unsqueeze --> 1x1x5
        f, hidden = self.rnn(f, hidden) # Dim output: 1x1x5, Dim hidden: 2x1x5
        f = self.fc(f.contiguous().squeeze(1)) # Dim 1x19
        return f
        

## Playground

In [8]:
model = EmbeddingIdentity(vocab,5, 2)

In [9]:
output = model(["slash_burn"])
predictions_to_label(ix_to_word, output)

['road']

In [10]:
batch = gen_batch(10, vocab)

In [11]:
output = model(batch)
predictions_to_label(ix_to_word, output)

['bare_ground',
 'bare_ground',
 'road',
 'bare_ground',
 'road',
 'road',
 'bare_ground',
 'road',
 'bare_ground',
 'bare_ground']

In [12]:
target = label_to_target(word_to_ix,batch)
batch

['road',
 'selective_logging',
 'slash_burn',
 'partly_cloudy',
 'water',
 'slash_burn',
 'blow_down',
 'blooming',
 'clear',
 'cultivation']

In [13]:
loss = F.cross_entropy(output, target)

In [14]:
# loss.backward()
loss

Variable containing:
 3.1522
[torch.FloatTensor of size 1]

# Wrapping up for multi epoch training

In [15]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [16]:
for epoch in range(500):
    X = gen_batch(500, vocab)
    y = label_to_target(word_to_ix,X)
    y_pred = model(X)
    loss = criterion(y_pred, y)
    print(epoch, loss.data[0])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

0 3.0327892303466797
1 3.008108377456665
2 3.017551898956299
3 3.005432367324829
4 2.9789912700653076
5 2.9626643657684326
6 2.976961135864258
7 2.970008611679077
8 2.9364094734191895
9 2.941561222076416
10 2.9203896522521973
11 2.9210522174835205
12 2.909860849380493
13 2.8913323879241943
14 2.866137981414795
15 2.8700342178344727
16 2.8909213542938232
17 2.8746190071105957
18 2.836191177368164
19 2.8244004249572754
20 2.824340343475342
21 2.794071912765503
22 2.7842977046966553
23 2.7581307888031006
24 2.762523889541626
25 2.741461753845215
26 2.6938722133636475
27 2.6771738529205322
28 2.6585323810577393
29 2.642106056213379
30 2.622920513153076
31 2.574413299560547
32 2.5437347888946533
33 2.529202938079834
34 2.5020318031311035
35 2.4690897464752197
36 2.4492805004119873
37 2.360297441482544
38 2.376901865005493
39 2.321429967880249
40 2.263556718826294
41 2.2434051036834717
42 2.1823229789733887
43 2.1596827507019043
44 2.1252403259277344
45 2.0954039096832275
46 2.06956815719604

362 0.06461622565984726
363 0.06301934272050858
364 0.06491687148809433
365 0.0638754591345787
366 0.06364478915929794
367 0.06462763249874115
368 0.06405320018529892
369 0.06570523977279663
370 0.0616951659321785
371 0.06168324500322342
372 0.061845507472753525
373 0.05868249386548996
374 0.06011258065700531
375 0.06058717891573906
376 0.05790421739220619
377 0.06199761480093002
378 0.059761274605989456
379 0.0572650283575058
380 0.056322094053030014
381 0.05799992382526398
382 0.056367985904216766
383 0.05416860431432724
384 0.05654078349471092
385 0.05512376129627228
386 0.05502678453922272
387 0.0556374192237854
388 0.05215751752257347
389 0.05562286823987961
390 0.05423089489340782
391 0.05449938029050827
392 0.05499233305454254
393 0.051911719143390656
394 0.05303974449634552
395 0.051560044288635254
396 0.0517883338034153
397 0.050359077751636505
398 0.04952573776245117
399 0.05212709680199623
400 0.04838406667113304
401 0.04988162964582443
402 0.050857122987508774
403 0.0504196

In [17]:
batch2 = gen_batch(30, vocab)

In [18]:
output2 = model(batch2)

In [19]:
batch2

['clear',
 'habitation',
 'conventional_mine',
 'slash_burn',
 'blooming',
 'water',
 'road',
 'conventional_mine',
 'partly_cloudy',
 '<BEGIN>',
 'artisinal_mine',
 'habitation',
 'cloudy',
 'selective_logging',
 'artisinal_mine',
 'conventional_mine',
 'slash_burn',
 'agriculture',
 'artisinal_mine',
 'conventional_mine',
 'habitation',
 'artisinal_mine',
 'blow_down',
 'agriculture',
 'blow_down',
 'water',
 'conventional_mine',
 'habitation',
 'selective_logging',
 'primary']

In [20]:
predictions_to_label(ix_to_word, output2)

['clear',
 'habitation',
 'conventional_mine',
 'slash_burn',
 'blooming',
 'water',
 'road',
 'conventional_mine',
 'partly_cloudy',
 '<BEGIN>',
 'artisinal_mine',
 'habitation',
 'cloudy',
 'selective_logging',
 'artisinal_mine',
 'conventional_mine',
 'slash_burn',
 'agriculture',
 'artisinal_mine',
 'conventional_mine',
 'habitation',
 'artisinal_mine',
 'blow_down',
 'agriculture',
 'blow_down',
 'water',
 'conventional_mine',
 'habitation',
 'selective_logging',
 'primary']

# Sequence prediction

In [21]:
from torch.nn.utils.rnn import pad_packed_sequence, pack_padded_sequence

In [22]:
seq_vocab = ['clear', 'cloudy', 'haze','partly_cloudy',
    'agriculture','artisinal_mine','bare_ground','blooming',
    'blow_down','conventional_mine','cultivation','habitation',
    'primary','road','selective_logging','slash_burn','water'
    ]


In [23]:
def gen_batch_sequences(n, seq_vocab):
    batch = []
    for _ in range(n):
        seq = ['<BEGIN>']
        for _ in range(random.randint(1,16)):
            seq.append(random.choice(seq_vocab))
        seq.append('<STOP>')
        seq = list(dict.fromkeys(seq)) # Remove duplicate while keeping order
        batch.append(seq)
    return batch

In [24]:
batch_seq = gen_batch_sequences(10, seq_vocab)

In [25]:
batch_seq

[['<BEGIN>', 'bare_ground', '<STOP>'],
 ['<BEGIN>',
  'water',
  'clear',
  'primary',
  'blooming',
  'haze',
  'cultivation',
  'selective_logging',
  'artisinal_mine',
  'partly_cloudy',
  'conventional_mine',
  '<STOP>'],
 ['<BEGIN>', 'artisinal_mine', '<STOP>'],
 ['<BEGIN>',
  'selective_logging',
  'haze',
  'conventional_mine',
  'blooming',
  'agriculture',
  'artisinal_mine',
  'blow_down',
  'slash_burn',
  'habitation',
  '<STOP>'],
 ['<BEGIN>', 'primary', '<STOP>'],
 ['<BEGIN>', 'artisinal_mine', 'haze', '<STOP>'],
 ['<BEGIN>',
  'habitation',
  'selective_logging',
  'water',
  'agriculture',
  'blooming',
  'haze',
  '<STOP>'],
 ['<BEGIN>',
  'cultivation',
  'haze',
  'partly_cloudy',
  'road',
  'conventional_mine',
  'artisinal_mine',
  'clear',
  '<STOP>'],
 ['<BEGIN>', 'habitation', '<STOP>'],
 ['<BEGIN>', 'habitation', '<STOP>']]

In [26]:
def seqlabels_to_target(word_to_ix, list_seq):
    return list(map(lambda s: torch.LongTensor(
                list(map(lambda label: word_to_ix[label], s))
            ), list_seq))

In [27]:
embeds = nn.Embedding(len(vocab), 5, padding_idx=0)

In [28]:
seq_tensors = seqlabels_to_target(word_to_ix, batch_seq)

In [29]:
Variable(seq_tensors[1])

Variable containing:
  0
 18
  2
 14
  9
  4
 12
 16
  7
  5
 11
  1
[torch.LongTensor of size 12]

In [30]:
embeds(Variable(seq_tensors[1]))

Variable containing:
 0.0000  0.0000  0.0000  0.0000  0.0000
 0.0506 -1.3719 -0.2585 -0.1785  0.6526
-1.0919  0.1947 -0.1726  1.1912  1.6100
 1.3168 -0.0635 -1.2732  0.4671 -0.8394
 0.3536  1.1308  1.1159  1.6579  2.1703
-1.5680 -0.1476 -0.8719 -0.3190  0.7808
 0.7385 -0.7012 -0.6762 -1.0369  0.2994
-1.0733 -1.7238 -1.3880 -0.3421  1.5200
 0.0005  0.5236  1.6549 -0.5829  0.3660
 0.2306  1.4325  0.7247 -0.7757 -2.6264
-0.0849 -2.8016  1.3666  1.0155 -0.1480
 1.2792 -0.4146 -0.3290 -0.3934 -0.2931
[torch.FloatTensor of size 12x5]

In [31]:
Variable(seq_tensors[3])

Variable containing:
  0
 16
  4
 11
  9
  6
  7
 10
 17
 13
  1
[torch.LongTensor of size 11]

In [32]:
embeds(Variable(seq_tensors[3]))

Variable containing:
 0.0000  0.0000  0.0000  0.0000  0.0000
-1.0733 -1.7238 -1.3880 -0.3421  1.5200
-1.5680 -0.1476 -0.8719 -0.3190  0.7808
-0.0849 -2.8016  1.3666  1.0155 -0.1480
 0.3536  1.1308  1.1159  1.6579  2.1703
-0.2308 -0.8565 -1.3905  0.0961  0.1172
 0.0005  0.5236  1.6549 -0.5829  0.3660
 0.8778 -1.2870 -0.0061 -1.4612  2.4435
-0.1125  1.6002  0.9264  0.1641  2.1513
-0.6048 -0.6636 -0.3441 -1.0603  2.2499
 1.2792 -0.4146 -0.3290 -0.3934 -0.2931
[torch.FloatTensor of size 11x5]

Seems like it will be a pain to work with variable size input. It would need my custom data loader. Hence I would create a custom one directly for Amazon dataset

In [33]:
from torch.utils.data.dataset import Dataset
from torchvision import transforms
import pandas as pd
import os

from torch import np, from_numpy # Numpy like wrapper

class TagsDataset(Dataset):
    """Dataset wrapping target labels for Kaggle - Planet Amazon from Space competition.

    Arguments:
        A CSV file path
        Path to image folder
        Extension of images
    """

    def __init__(self, csv_path, vocab_mapping):
    
        self.df = pd.read_csv(csv_path)
        self.vocab_mapping = vocab_mapping

        self.tags = self.df['tags'].str.split()

        
    def __getitem__(self, index):
        vocab = self.vocab_mapping
        tags = []
        tags.append(vocab['<BEGIN>'])
        tags.extend([vocab[tag] for tag in self.tags[index]])
        tags.append(vocab['<STOP>'])
        
        tags = torch.Tensor(tags)
        
        return tags, tags

    def __len__(self):
        return len(self.df.index)


    def collate_fn(self, data):
        """Creates mini-batch tensors for tags with variable size

        Args:
            data: list of tuple (input, target). 
                - input: torch tensor of shape (?); variable length.
                - target: torch tensor of same shape (?); variable length.
        Returns:
            inputs: torch tensor of shape (batch_size, padded_length).
            targets: torch tensor of shape (batch_size, padded_length).
            lengths: list; valid length for each padded tags.
        """
        # Sort a data list by target length (descending order).
        data.sort(key=lambda x: len(x[1]), reverse=True)
        _, tags = zip(*data)

        # Merge tags (from tuple of 1D tensor to 2D tensor).
        lengths = [len(tag) for tag in tags]
        targets = torch.zeros(len(tags), max(lengths)).long()
        for i, tag in enumerate(tags):
            end = lengths[i]
            targets[i, :end] = tag[:end]        
        return targets, targets, lengths


In [34]:
X_train = TagsDataset('./data/train.csv',word_to_ix)

In [35]:
train_loader = torch.utils.data.DataLoader(dataset=X_train, 
                                              batch_size=100,
                                              shuffle=True,
                                              num_workers=1,
                                              collate_fn=X_train.collate_fn)

In [55]:
class SeqPred(nn.Module):
    """ Testing weight sharing + Variable Length sequence
    """
    
    def __init__(self, vocab_size, embed_dim, num_rnn_layers):
        super(SeqPred, self).__init__()
 
        self.vocab_size = vocab_size
        self.embeds = nn.Embedding(vocab_size, embed_dim) # , padding_idx=0 Ignore the <start> (0 in vocab) for gradient
        self.rnn = nn.LSTM(embed_dim, embed_dim, num_rnn_layers, batch_first = True)
        self.fc = nn.Linear(embed_dim, vocab_size)
        self.n_layers = num_rnn_layers
        
        # link embedding and decoding weight
        self.fc.weight = self.embeds.weight
    
    
    def forward(self, tags, lengths, hidden=None):
        embed = self.embeds(tags)
        print(embed.size())
        print(len(lengths))
        packed = pack_padded_sequence(embed, lengths, batch_first=True)
        out, hidden = self.rnn(packed, hidden)
        out = self.fc(out.data) #Unpack PackedSeq and feed to FC
        return out, hidden

    def genTags(self, inputs, states):
        tag_ids = []
        inputs = self.embeds(inputs)
        for i in range(self.vocab_size):                    # maximum sampling length
            hiddens, states = self.rnn(inputs, states)      # (batch_size, 1, hidden_size)
            outputs = self.fc(hiddens.squeeze(1))           # (batch_size, vocab_size)
            # outputs = F.softmax(outputs)
            predicted = outputs.max(1)[1]
            tag_ids.append(predicted)
            inputs = self.embeds(predicted)
        tag_ids = torch.cat(tag_ids, 1)                     # (batch_size, 19)
        return tag_ids.squeeze()

In [56]:
model = SeqPred(19, 5, 2)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [57]:
epoch =0
for batch_idx, (data, target, lengths) in enumerate(train_loader):
    data = Variable(data)
    target = Variable(target)
    targets = pack_padded_sequence(target, lengths, batch_first=True)[0]
    
    model.zero_grad()
    
    outputs, _ = model(data,lengths)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    
    if batch_idx % 100 == 0:
        print('Train Epoch: {:03d} [{:05d}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
            epoch, batch_idx * len(data), len(train_loader) * len(data),
            100. * batch_idx / len(train_loader), loss.data[0]))

torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 10, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 10, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9

torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 10, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 7, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 7, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 9, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 10, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8, 5])
100
torch.Size([100, 8

In [39]:
# Set initial states
state = (Variable(torch.zeros(2, 1, 5)),
             Variable(torch.zeros(2, 1, 5)))

In [40]:
start = Variable(torch.rand(1, 1).mul(19).long(), volatile=True)
start

Variable containing:
 14
[torch.LongTensor of size 1x1]

In [41]:
start0 = Variable(torch.zeros(1, 1).long(), volatile=True)
start0

Variable containing:
 0
[torch.LongTensor of size 1x1]

In [42]:
model.genTags(start,state)

Variable containing:
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
 14
[torch.LongTensor of size 19]