In [48]:
import torch
from torch import nn
from torch.nn import functional as F

In [159]:
MAX_LENGTH = 20
BATCH_SIZE = 5

In [133]:
class Encoder(nn.Module):
    
    def __init__(self, hidden_size, dropout_p = 0.2):
        super().__init__()
        self.embedding = nn.Linear(2, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_size = hidden_size, hidden_size = hidden_size, batch_first=True)
        
    def forward(self, x):
        "x : [N, L, 2]"
        embedded = self.dropout(self.embedding(x))
        encoder_outputs, encoder_hidden = self.lstm(embedded)
        return  encoder_outputs, encoder_hidden

In [51]:
class Decoder(nn.Module):
    
    def __init__(self, hidden_size, dropout_p = 0.2):
        super().__init__()
        self.embedding = nn.Linear(2, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_size = hidden_size, hidden_size = hidden_size, batch_first=True)
        
        
        self.first_decoder_input = torch.rand((BATCH_SIZE, 1, hidden_size))
        self.W1 = nn.Linear(hidden_size, hidden_size)
        self.W2 = nn.Linear(hidden_size, hidden_size)
        self.V = nn.Linear(hidden_size, 1)
        
      
    def attn_scores(encoder_outputs, decoder_hidden):
        return self.V(F.tanh(self.W1(encoder_outputs) + self.W2(decoder_hidden))).squeeze(-1)
        
    def forward(self, encoder_outputs, encoder_hidden, target_tensor, target_lengths):
        '''
        encoder_hidden = [N, 1, H] 
        encoder_outputs = [N, L, H] 
        decoder_target = [N, L+1]
        target_tensor = [N, L, 2]
        target_lengths = [N, 1]
        '''
        
        batch_size, max_len, _ = encoder_outputs.size()
        encoder_outputs = torch.cat((self.first_decoder_input, encoder_outputs), dim=1)
        decoder_hidden = encoder_hidden
        decoder_input = torch.rand((batch_size, 1, 2), 
                                    dtype=torch.long)
        decoder_outputs = []
        
        for i in range(max_len + 1):
            
            decoder_input_embed = self.dropout(self.embedding(decoder_input))
            decoder_output, *decoder_hidden = self.lstm(decoder_input_embed, decoder_hidden)
            
            '''decoder_output = [N, 1, H]'''
            raw_attn_scores = attn_scores(encoder_outputs, decoder_output)
            '''
            raw_attn_scores = [N, L+1]
            mask attn_scores and softmax over only needed dictionary'''
            ones = torch.full(size = (batch_size, max_len + 1), fill_value = 1., dtype=torch.float)
            mask = mask.masked_fill(torch.cumsum(ones, dim=-1) > target_lengths + 1., float('-inf'))
        
            masked_attn_scores = raw_attn_scores * mask
            #masked_attn_softmax_scores = F.softmax(masked_attn_scores, dim=-1)
            
            decoder_outputs.append(masked_attn_scores)
            
            decoder_input = target_tensor[:, i, :].unsqueeze(1)
            
        decoder_outputs = torch.concat(decoder_outputs, dim=1)
        return decoder_outputs, 
        
    
    def calculate_loss(decoder_outputs, decoder_target, target_lengths):
        loss = 0
        b_sz, max_len = decoder_target.size()
        for i in range(b_sz):
            weights = torch.zeros((max_len))
            weights[:target_lengths[i]+1] = 1.
            for c in range(max_len):
                loss += F.functional.cross_entropy(input = decoder_outputs[i, c, ...].unsqueeze(0), 
                                                   target= decoder_target[i, c], 
                                                   weight=weights,
                                                   ignore_index = -1)
        return loss
            

In [134]:
encoder = Encoder(128)

In [135]:
decoder = Decoder(128)

In [62]:
import numpy as np
import itertools

def solve_tsp_dynamic(points):
    def length(x_coord, y_coord):
        return np.linalg.norm(np.asarray(x_coord) - np.asarray(y_coord))
    #calc all lengths
    all_distances = [[length(x,y) for y in points] for x in points]
    #initial value - just distance from 0 to every other point + keep the track of edges
    A = {(frozenset([0, idx+1]), idx+1): (dist, [0,idx+1]) for idx,dist in enumerate(all_distances[0][1:])}
    cnt = len(points)
    for m in range(2, cnt):
        B = {}
        for S in [frozenset(C) | {0} for C in itertools.combinations(range(1, cnt), m)]:
              for j in S - {0}:
                B[(S, j)] = min( [(A[(S-{j},k)][0] + all_distances[k][j], A[(S-{j},k)][1] + [j]) \
                                  for k in S if k != 0 and k!=j])  #this will use 0th index of tuple for ordering, the same as if key=itemgetter(0) used
        A = B
    res = min([(A[d][0] + all_distances[0][d[1]], A[d][1]) for d in iter(A)])
    return np.asarray(res[1]) + 1 # 0 for padding

In [63]:
tsp_coords = [np.random.uniform(size=[_,2]) for _ in range(7, 12)]
tsp_coords

[array([[0.24654737, 0.28955304],
        [0.81493453, 0.32096205],
        [0.76229006, 0.5276015 ],
        [0.3558138 , 0.6140508 ],
        [0.3980779 , 0.78049744],
        [0.33152915, 0.96015033],
        [0.00903356, 0.70169921]]),
 array([[0.63434193, 0.48949929],
        [0.22053111, 0.31208303],
        [0.32576242, 0.91834259],
        [0.72666392, 0.65154926],
        [0.93390254, 0.23901986],
        [0.68041478, 0.80601716],
        [0.90774641, 0.09020232],
        [0.73596878, 0.4775064 ]]),
 array([[0.0831995 , 0.54500851],
        [0.46698088, 0.68325647],
        [0.40556722, 0.39281098],
        [0.67254393, 0.96984177],
        [0.73834042, 0.9472338 ],
        [0.25537716, 0.69658391],
        [0.32134913, 0.55591329],
        [0.31439819, 0.52454627],
        [0.66510867, 0.70989666]]),
 array([[0.34096657, 0.96257525],
        [0.20009105, 0.99308967],
        [0.69466211, 0.78232001],
        [0.57594894, 0.36426997],
        [0.11037123, 0.7763809 ],
        

In [73]:
batch_size=5

In [69]:
tsp_tours = [solve_tsp_dynamic(c.tolist()) for c in tsp_coords]
tsp_tours

[array([1, 2, 3, 4, 5, 6, 7]),
 array([1, 8, 5, 7, 2, 3, 6, 4]),
 array([1, 6, 2, 4, 5, 9, 3, 8, 7]),
 array([ 1,  2,  5,  9,  8,  4, 10,  6,  3,  7]),
 array([ 1,  8,  9, 11,  2,  6,  3, 10,  5,  7,  4])]

In [65]:
max_len = max([len(x) for x in tsp_tours])
max_len

11

In [229]:
np.array([np.pad(row, (0, max_len-len(row)), constant_values = -1.) for row in tsp_tours])

array([[ 1,  2,  3,  4,  5,  6,  7, -1, -1, -1, -1],
       [ 1,  8,  5,  7,  2,  3,  6,  4, -1, -1, -1],
       [ 1,  6,  2,  4,  5,  9,  3,  8,  7, -1, -1],
       [ 1,  2,  5,  9,  8,  4, 10,  6,  3,  7, -1],
       [ 1,  8,  9, 11,  2,  6,  3, 10,  5,  7,  4]])

In [245]:
target_tensor = np.zeros((batch_size, max_len+1))
target_tensor[:, :max_len] = np.array([np.pad(row, (0, max_len-len(row)), constant_values = -1.) for row in tsp_tours])
target_tensor

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7., -1., -1., -1., -1.,  0.],
       [ 1.,  8.,  5.,  7.,  2.,  3.,  6.,  4., -1., -1., -1.,  0.],
       [ 1.,  6.,  2.,  4.,  5.,  9.,  3.,  8.,  7., -1., -1.,  0.],
       [ 1.,  2.,  5.,  9.,  8.,  4., 10.,  6.,  3.,  7., -1.,  0.],
       [ 1.,  8.,  9., 11.,  2.,  6.,  3., 10.,  5.,  7.,  4.,  0.]])

In [246]:
input_tensor = np.array([np.pad(x, pad_width = ((0, max_len - len(x)), (0, 0)), constant_values = np.array([0., 0.])) for x in tsp_coords])
input_tensor

array([[[0.24654737, 0.28955304],
        [0.81493453, 0.32096205],
        [0.76229006, 0.5276015 ],
        [0.3558138 , 0.6140508 ],
        [0.3980779 , 0.78049744],
        [0.33152915, 0.96015033],
        [0.00903356, 0.70169921],
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.63434193, 0.48949929],
        [0.22053111, 0.31208303],
        [0.32576242, 0.91834259],
        [0.72666392, 0.65154926],
        [0.93390254, 0.23901986],
        [0.68041478, 0.80601716],
        [0.90774641, 0.09020232],
        [0.73596878, 0.4775064 ],
        [0.        , 0.        ],
        [0.        , 0.        ],
        [0.        , 0.        ]],

       [[0.0831995 , 0.54500851],
        [0.46698088, 0.68325647],
        [0.40556722, 0.39281098],
        [0.67254393, 0.96984177],
        [0.73834042, 0.9472338 ],
        [0.25537716, 0.69658391],
        [0.32134913, 0.55591329],
        [0

In [247]:
input_tensor = torch.from_numpy(input_tensor).float()
decoder_target = torch.from_numpy(target_tensor).long()

In [248]:
target_lengths = torch.tensor([7, 8, 9, 10, 11]).unsqueeze(-1).int()

In [249]:
target_lengths

tensor([[ 7],
        [ 8],
        [ 9],
        [10],
        [11]], dtype=torch.int32)

In [266]:
input_tensor.dtype, decoder_target.dtype

(torch.float32, torch.int64)

In [275]:
decoder_target

tensor([[ 1,  2,  3,  4,  5,  6,  7, -1, -1, -1, -1,  0],
        [ 1,  8,  5,  7,  2,  3,  6,  4, -1, -1, -1,  0],
        [ 1,  6,  2,  4,  5,  9,  3,  8,  7, -1, -1,  0],
        [ 1,  2,  5,  9,  8,  4, 10,  6,  3,  7, -1,  0],
        [ 1,  8,  9, 11,  2,  6,  3, 10,  5,  7,  4,  0]])

In [276]:
input_tensor.shape, decoder_target.shape, target_lengths.shape

(torch.Size([5, 11, 2]), torch.Size([5, 12]), torch.Size([5, 1]))

In [277]:
encoder_outputs, encoder_hidden = encoder(input_tensor)

In [278]:
encoder_outputs.size()

torch.Size([5, 11, 128])

In [279]:
encoder_outputs.size(), [x.size() for x in encoder_hidden]

(torch.Size([5, 11, 128]), [torch.Size([1, 5, 128]), torch.Size([1, 5, 128])])

In [283]:
decoder_outputs = decoder(encoder_outputs, encoder_hidden, input_tensor, target_lengths)

tensor([[1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., -inf],
        [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])
tensor([[1., 1., 1., 1., 1., 1., 1., 1., -inf, -inf, -inf, -inf],
 

In [284]:
decoder_outputs

[tensor([[-0.0305,  0.1693,  0.1382,  0.1344,  0.1676,  0.1633,  0.1728,  0.1659,
           0.1714,  0.1932,  0.1977,  0.1842],
         [-0.0316,  0.1048,  0.1168,  0.1543,  0.1521,  0.1274,  0.1306,  0.1215,
           0.0934,  0.1142,  0.1344,  0.1549],
         [-0.2054,  0.1482,  0.1449,  0.1686,  0.1283,  0.1282,  0.1517,  0.1382,
           0.1510,  0.1334,  0.1266,  0.1252],
         [-0.1243,  0.1600,  0.1840,  0.1593,  0.1572,  0.1846,  0.1700,  0.1661,
           0.1811,  0.1893,  0.1360,  0.1638],
         [ 0.0793,  0.1350,  0.1309,  0.1629,  0.1731,  0.1909,  0.1784,  0.1733,
           0.1375,  0.1233,  0.1836,  0.1687]], grad_fn=<SqueezeBackward1>),
 tensor([[-0.0470,  0.1556,  0.1247,  0.1211,  0.1543,  0.1502,  0.1595,  0.1526,
           0.1582,  0.1798,  0.1843,  0.1707],
         [-0.0209,  0.1181,  0.1301,  0.1678,  0.1655,  0.1409,  0.1441,  0.1351,
           0.1070,  0.1278,  0.1480,  0.1685],
         [-0.1967,  0.1607,  0.1574,  0.1814,  0.1406,  0.1412,  0.

In [319]:
loss = calculate_loss(decoder_outputs, decoder_target)

In [320]:
loss

tensor(2.4689, grad_fn=<NllLossBackward0>)

In [318]:
def calculate_loss(decoder_outputs, decoder_target):
    
    return F.cross_entropy(
        decoder_outputs.view(-1, decoder_outputs.size(-1)),
        decoder_target.view(-1),
        ignore_index=-1
    )

In [294]:
class Decoder(nn.Module):
    
    def __init__(self, hidden_size, dropout_p = 0.2):
        super().__init__()
        self.embedding = nn.Linear(2, hidden_size)
        self.dropout = nn.Dropout(dropout_p)
        self.lstm = nn.LSTM(input_size = hidden_size, hidden_size = hidden_size, batch_first=True)
        
        
        self.first_decoder_input = torch.rand((BATCH_SIZE, 1, hidden_size))
        self.W1 = nn.Linear(hidden_size, hidden_size)
        self.W2 = nn.Linear(hidden_size, hidden_size)
        self.V = nn.Linear(hidden_size, 1)
        
      
    def attn_scores(self, encoder_outputs, decoder_hidden):
        return self.V(F.tanh(self.W1(encoder_outputs) + self.W2(decoder_hidden))).squeeze(-1)
        
    def forward(self, encoder_outputs, encoder_hidden, target_tensor, target_lengths):
        '''
        encoder_hidden = [N, 1, H] 
        encoder_outputs = [N, L, H] 
        target_tensor = [N, L, 2]
        target_lengths = [N, 1]
        '''
        
        batch_size, max_len, _ = encoder_outputs.size()
        encoder_outputs = torch.cat((self.first_decoder_input, encoder_outputs), dim=1)
        decoder_hidden = encoder_hidden
        self.first_decoder_input
        decoder_outputs = []
        
        for i in range(max_len + 1):
            if i == 0:
                decoder_input_embed = self.first_decoder_input
            else:
                decoder_input_embed = self.embedding(decoder_input)
                
            decoder_output, decoder_hidden = self.lstm(decoder_input_embed, decoder_hidden)
            
            '''decoder_output = [N, 1, H]'''
            raw_attn_scores = self.attn_scores(encoder_outputs, decoder_output)
            '''
            
            raw_attn_scores = [N, L+1]
            mask attn_scores and softmax over only needed dictionary'''
            
            ones = torch.full(size = (batch_size, max_len + 1), fill_value = 1., dtype=torch.float)
            #mask = ones.masked_fill(torch.cumsum(ones, dim=-1) > target_lengths + 1., float('-inf'))
            
            masked_attn_scores = raw_attn_scores.masked_fill(torch.cumsum(ones, dim=-1) > target_lengths + 1., float('-inf'))
            masked_attn_softmax_scores = F.softmax(masked_attn_scores, dim=-1)
            
            decoder_outputs.append(masked_attn_softmax_scores)
            
            decoder_input = target_tensor[:, i-1, :].unsqueeze(1)
            
        decoder_outputs = torch.stack(decoder_outputs, dim=1)
        return decoder_outputs
        
    
    def calculate_loss(decoder_outputs, decoder_target):
        loss = F.cross_entropy(
            decoder_outputs.view(-1, decoder_outputs.size(-1)),
            decoder_target.view(-1),
            ignore_index=-1)
        return loss
            
            
decoder = Decoder(128)