## Loading the data, padding (based on 2.0)

In [1]:
import sys
import os
import numpy as np
import torch

In [7]:
a=torch.randn(50,184)
a.shape

torch.Size([50, 184])

In [9]:
a[:,1:50].shape

torch.Size([50, 49])

In [105]:
zeros = np.zeros(5, dtype=int)
np.concatenate((a,zeros))

array([1, 1, 2, 3, 4, 5, 1, 0, 0, 0, 0, 0])

In [2]:
def read_chinese_data(inputfilename):
    with open(inputfilename, "r") as inputfile:
        sentences = []
        collection_words = []
        collection_labels = []
        for line in inputfile:
            if line[0] == '#':
                continue
            columns = line.split()
            #print(words)
            if columns == []:
                sentences.append((''.join(collection_words), collection_labels))
                collection_words = []
                collection_labels = []
                continue
            collection_words.append(columns[1])
            collection_labels += [1] + ([0] * (len(columns[1]) - 1))
            
    return sentences

In [3]:
train_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-train.conllu')

In [4]:
test_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-test.conllu')

In [5]:
print( train_sentences[:2] )

[('看似簡單，只是二選一做決擇，但其實他們代表的是你周遭的親朋好友，試著給你不同的意見，但追根究底，最後決定的還是自己。', [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]), ('其便當都是買來的，就算加熱也是由媽媽負責（後來揭曉其實是避免帶來厄運），父親則在電視台上班。', [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1])]


In [6]:
def index_chars(sentences):
    char_set = set((char for sen in sentences for char in sen))
    char_list = [0, '<START>','<END>'] + list(char_set)
    return char_list, {char:i for i,char in enumerate(char_list)}

In [7]:
char_list, char_index = index_chars([x[0] for x in train_sentences + test_sentences])

In [8]:
char_list[:10]

[0, '<START>', '<END>', '猾', '術', '査', '大', '擲', '腸', '臭']

In [9]:
import itertools
dict(itertools.islice(char_index.items(), 10))

{0: 0,
 '<START>': 1,
 '<END>': 2,
 '猾': 3,
 '術': 4,
 '査': 5,
 '大': 6,
 '擲': 7,
 '腸': 8,
 '臭': 9}

In [10]:
def convert_sentence(sentence, index_dict, add_tags=True): # turn sentence to [int]
    indexed_sentence = [index_dict[x] for x in sentence]
    if add_tags:
        start_idx, end_idx = index_dict['<START>'], index_dict['<END>']
        return [start_idx]+indexed_sentence+[end_idx]
    return indexed_sentence

In [11]:
convert_sentence(train_sentences[0][0], char_index)[:10]

[1, 477, 1851, 1253, 1661, 1636, 2487, 1347, 2430, 2605]

In [12]:
def pad_lengths(sentences, max_length, padding=0):
    return [x + ([padding] * (max_length - len(x))) for x in sentences]

In [13]:
[ len(x) for x in pad_lengths([convert_sentence(xy[0], char_index) for xy in train_sentences[:3]], 50) ]


[60, 50, 50]

In [14]:
from torch.utils.data import DataLoader, Dataset
# Dataset()
batching = DataLoader(train_sentences, shuffle=True, batch_size=5)

In [15]:
def create_dataset(x, device="cpu"):
    converted = [(convert_sentence(x1[0], char_index), x1[1]) for x1 in x] # change (sen,labels) to (idxs, labels)
    X, y = zip(*converted)
    lengths = [len(x2) for x2 in X] # list of sen_lengths
    padded_X = pad_lengths(X, max(lengths)) # pad all sentences to same length
    Xt = torch.LongTensor(padded_X).to(device)
    padded_y = pad_lengths(y, max(lengths), padding=-1)
    yt = torch.LongTensor(padded_y).to(device)
    lengths_t = torch.LongTensor(lengths).to(device)
    return Xt, lengths_t, yt

In [16]:
train_X_tensor, train_lengths_tensor, train_y_tensor = create_dataset(train_sentences, "cuda:2")
test_X_tensor, test_lengths_tensor, test_y_tensor = create_dataset(test_sentences, "cuda:2")

In [17]:
# train_X_tensor[0]
train_X_tensor.shape

torch.Size([3997, 184])

In [18]:
train_y_tensor.shape

torch.Size([3997, 184])

In [19]:
import numpy as np
np.array([2])

array([2])

## Packing the sequences for RNN

In [20]:
testtensor = torch.randn((10,100,200))

In [21]:
testtensor.shape

torch.Size([10, 100, 200])

In [22]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
# import torch

In [23]:
testlengths = torch.randint(1, 100, (10,)) # 10 ints from 0~99

In [24]:
testlengths.size(), testlengths # 10 ints from 0~99

(torch.Size([10]), tensor([55, 18, 84, 15, 16, 49, 10, 81, 33, 23]))

In [25]:
packed = pack_padded_sequence(testtensor, testlengths, batch_first=True, enforce_sorted=False)

In [26]:
testtensor.shape

torch.Size([10, 100, 200])

In [27]:
packed

PackedSequence(data=tensor([[-1.1378, -1.3203,  0.3465,  ...,  1.7615, -0.1950,  0.7516],
        [-1.0073, -1.1699,  2.1020,  ..., -1.3888,  0.2716,  0.1707],
        [-0.8821, -0.1149,  0.1548,  ..., -1.7127, -0.6041, -1.4357],
        ...,
        [-0.5486,  0.9946,  0.3260,  ...,  2.5292,  0.2625,  0.0137],
        [ 0.3048, -2.4559,  1.6755,  ..., -0.3890, -1.1695,  1.2055],
        [ 0.8466, -1.5087,  0.0897,  ...,  0.6626,  0.0715, -0.7769]]), batch_sizes=tensor([10, 10, 10, 10, 10, 10, 10, 10, 10, 10,  9,  9,  9,  9,  9,  8,  7,  7,
         6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,
         3,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1]), sorted_indices=tensor([2, 7, 0, 5, 8, 9, 1, 4, 3, 6]), unsorted_indices=tensor([2, 6, 0, 8, 7, 3, 9, 1, 4, 5]))

In [28]:
len(packed.batch_sizes)

84

In [29]:
unpacked = pad_packed_sequence(packed, batch_first=True, total_length=100)

In [30]:
unpacked

(tensor([[[-0.8821, -0.1149,  0.1548,  ..., -1.7127, -0.6041, -1.4357],
          [-0.1984,  0.5625,  1.3907,  ..., -0.8502,  0.3741,  1.1749],
          [-0.6078, -1.5637, -2.4217,  ...,  0.7867,  0.0665, -0.5499],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 1.2804,  1.5981,  1.0911,  ...,  0.1076,  1.2622, -0.1663],
          [ 1.2860,  0.8111, -0.8793,  ...,  1.0539, -0.0384, -0.3184],
          [-0.5289,  1.3340,  0.7162,  ..., -0.0407, -1.9918, -1.5672],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-1.1378, -1.3203,  0.3465,  ...,  1.7615, -0.1950,  0.7516],
          [-0.2176,  1.1322,

In [31]:
unpacked[0]

tensor([[[-0.8821, -0.1149,  0.1548,  ..., -1.7127, -0.6041, -1.4357],
         [-0.1984,  0.5625,  1.3907,  ..., -0.8502,  0.3741,  1.1749],
         [-0.6078, -1.5637, -2.4217,  ...,  0.7867,  0.0665, -0.5499],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 1.2804,  1.5981,  1.0911,  ...,  0.1076,  1.2622, -0.1663],
         [ 1.2860,  0.8111, -0.8793,  ...,  1.0539, -0.0384, -0.3184],
         [-0.5289,  1.3340,  0.7162,  ..., -0.0407, -1.9918, -1.5672],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-1.1378, -1.3203,  0.3465,  ...,  1.7615, -0.1950,  0.7516],
         [-0.2176,  1.1322, -0.9195,  ...,  0

In [32]:
unpacked[0].size()

torch.Size([10, 100, 200])

## Batching (based on 1.0, 1.1, 1.2)

In [33]:
class Batcher:
    def __init__(self, X, lengths, y, device, batch_size=50, max_iter=None):
        self.X = X
        self.lengths = lengths # We need the lengths to efficiently use the padding.
        self.y = y
        self.device = device
        self.batch_size=batch_size
        self.max_iter = max_iter
        self.curr_iter = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.curr_iter == self.max_iter:
            raise StopIteration
        permutation = torch.randperm(self.X.size()[0], device=self.device)
        permX = self.X[permutation]
        permlengths = self.lengths[permutation]
        permy = self.y[permutation]
        splitX = torch.split(permX, self.batch_size)
        splitlengths = torch.split(permlengths, self.batch_size)
        splity = torch.split(permy, self.batch_size)
        
        self.curr_iter += 1
        return zip(splitX, splitlengths, splity)

In [34]:
b = Batcher(train_X_tensor, train_lengths_tensor, train_y_tensor, torch.device('cuda:2'), max_iter=100)

In [35]:
testbatching = next(b)

In [36]:
testbatching

<zip at 0x7f1c4ae1c040>

In [37]:
testbatch = next(testbatching)

In [38]:
testbatch

(tensor([[   1, 2693, 1942,  ...,    0,    0,    0],
         [   1, 1517, 2676,  ...,    0,    0,    0],
         [   1, 1979,   74,  ...,    0,    0,    0],
         ...,
         [   1,  791,    6,  ...,    0,    0,    0],
         [   1, 2446, 3281,  ...,    0,    0,    0],
         [   1,  916, 1740,  ...,    0,    0,    0]], device='cuda:2'),
 tensor([50, 61, 21, 35, 27, 26, 27, 31, 42, 37, 17, 70, 28, 52, 29, 34, 18, 51,
         24, 33, 81, 21, 28, 62, 61, 29, 50, 39, 47, 16, 30, 49, 22, 26, 24, 59,
         53, 28, 51, 37, 32, 77, 17, 38, 39, 31, 26, 30, 24, 33],
        device='cuda:2'),
 tensor([[ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1],
         ...,
         [ 1,  0,  0,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1]], device='cuda:2'))

## Modeling

In [39]:
import torch.nn as nn

In [43]:
emb = nn.Embedding(len(char_index), 200, 0).to("cuda:2") # vocab_size, embed_dim_size, pad_id

In [44]:
testX, testlengths, testy = testbatch

In [45]:
testembs = emb(testX)

In [46]:
testembs

tensor([[[ 1.3313, -0.1083, -0.1623,  ...,  0.6618, -0.3960,  0.6848],
         [ 0.6811,  0.8222, -1.1983,  ...,  0.3099, -2.0272, -0.6533],
         [-0.7029, -0.0723, -0.4438,  ...,  0.6376, -0.1063,  1.3729],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 1.3313, -0.1083, -0.1623,  ...,  0.6618, -0.3960,  0.6848],
         [-1.1540, -0.6256,  1.0599,  ...,  0.7053,  0.6215, -0.2059],
         [ 0.9920, -0.8448,  0.3381,  ...,  0.3543,  0.8014, -1.0057],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 1.3313, -0.1083, -0.1623,  ...,  0.6618, -0.3960,  0.6848],
         [ 0.4250, -0.8220,  0.8770,  ...,  0

In [47]:
testembs.size()  #B,len_sen, embed

torch.Size([50, 184, 200])

In [48]:
testembs.device

device(type='cuda', index=2)

In [49]:
testlstm = nn.LSTM(200, 150, batch_first=True).to("cuda:2")

In [50]:
testembspadded = pack_padded_sequence(testembs, testlengths.to("cpu"), batch_first=True, enforce_sorted=False)

In [51]:
testoutput, teststate = testlstm(testembspadded) # out, (_,_)

In [52]:
testoutput

PackedSequence(data=tensor([[-0.0644,  0.2255,  0.0190,  ...,  0.2447, -0.0979,  0.1634],
        [-0.0644,  0.2255,  0.0190,  ...,  0.2447, -0.0979,  0.1634],
        [-0.0644,  0.2255,  0.0190,  ...,  0.2447, -0.0979,  0.1634],
        ...,
        [-0.0678, -0.2112,  0.0997,  ...,  0.0505,  0.0151, -0.2896],
        [-0.0409, -0.1709, -0.0353,  ..., -0.0584, -0.2017, -0.1920],
        [-0.0883, -0.2539,  0.0350,  ...,  0.0267,  0.0591,  0.1822]],
       device='cuda:2', grad_fn=<CudnnRnnBackward>), batch_sizes=tensor([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 49, 47,
        46, 46, 46, 44, 43, 43, 40, 40, 37, 35, 32, 30, 28, 26, 25, 23, 22, 21,
        21, 19, 18, 16, 16, 16, 15, 15, 15, 15, 15, 14, 14, 13, 11,  9,  8,  7,
         7,  7,  7,  7,  7,  6,  6,  4,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,
         2,  2,  2,  2,  2,  1,  1,  1,  1]), sorted_indices=tensor([20, 41, 11, 23,  1, 24, 35, 36, 13, 17, 38,  0, 26, 31, 28,  8, 44, 27,
        43,  9, 39, 

In [53]:
testunpacked = pad_packed_sequence(testoutput, batch_first=True)

In [54]:
testunpacked[0].size() # B, max_sen_len, embed

torch.Size([50, 81, 150])

In [55]:
testsigm = nn.Sigmoid().to("cuda:2")

In [56]:
testoutput2 = testsigm(testunpacked[0])

In [57]:
testoutput2.size()

torch.Size([50, 81, 150])

In [58]:
testlin = nn.Linear(150, 2).to("cuda:2")

In [59]:
testoutput3 = testlin(testoutput2)

In [60]:
testoutput3.size()

torch.Size([50, 81, 2])

In [61]:
testsoft = nn.LogSoftmax(2).to("cuda:2")

In [62]:
testoutput4 = testsoft(testoutput3)

In [63]:
testoutput4

tensor([[[-0.6907, -0.6956],
         [-0.7096, -0.6769],
         [-0.6848, -0.7015],
         ...,
         [-0.6864, -0.6999],
         [-0.6864, -0.6999],
         [-0.6864, -0.6999]],

        [[-0.6907, -0.6956],
         [-0.6993, -0.6871],
         [-0.6785, -0.7081],
         ...,
         [-0.6864, -0.6999],
         [-0.6864, -0.6999],
         [-0.6864, -0.6999]],

        [[-0.6907, -0.6956],
         [-0.6715, -0.7153],
         [-0.6571, -0.7306],
         ...,
         [-0.6864, -0.6999],
         [-0.6864, -0.6999],
         [-0.6864, -0.6999]],

        ...,

        [[-0.6907, -0.6956],
         [-0.6770, -0.7096],
         [-0.6851, -0.7013],
         ...,
         [-0.6864, -0.6999],
         [-0.6864, -0.6999],
         [-0.6864, -0.6999]],

        [[-0.6907, -0.6956],
         [-0.6879, -0.6984],
         [-0.7010, -0.6854],
         ...,
         [-0.6864, -0.6999],
         [-0.6864, -0.6999],
         [-0.6864, -0.6999]],

        [[-0.6907, -0.6956],
       

In [64]:
testy_short = testy[:, :max(testlengths)]

In [65]:
testy_short

tensor([[ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1],
        ...,
        [ 1,  0,  0,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1]], device='cuda:2')

In [66]:
testy_short.size()

torch.Size([50, 81])

In [67]:
max(testlengths)

tensor(81, device='cuda:2')

In [68]:
testpermuted = testoutput4.permute(0, 2, 1)

In [69]:
testpermuted

tensor([[[-0.6907, -0.7096, -0.6848,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.6769, -0.7015,  ..., -0.6999, -0.6999, -0.6999]],

        [[-0.6907, -0.6993, -0.6785,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.6871, -0.7081,  ..., -0.6999, -0.6999, -0.6999]],

        [[-0.6907, -0.6715, -0.6571,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.7153, -0.7306,  ..., -0.6999, -0.6999, -0.6999]],

        ...,

        [[-0.6907, -0.6770, -0.6851,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.7096, -0.7013,  ..., -0.6999, -0.6999, -0.6999]],

        [[-0.6907, -0.6879, -0.7010,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.6984, -0.6854,  ..., -0.6999, -0.6999, -0.6999]],

        [[-0.6907, -0.7140, -0.7029,  ..., -0.6864, -0.6864, -0.6864],
         [-0.6956, -0.6727, -0.6835,  ..., -0.6999, -0.6999, -0.6999]]],
       device='cuda:2', grad_fn=<PermuteBackward>)

In [70]:
nllloss = nn.NLLLoss(ignore_index=-1).to("cuda:2")

In [71]:
nllloss(testpermuted, testy_short)

tensor(0.6930, device='cuda:2', grad_fn=<NllLoss2DBackward>)

In [72]:
class Segmenter(nn.Module):
    def __init__(self, vocab_size, emb_size):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.emb_size = emb_size
        
        self.emb = nn.Embedding(self.vocab_size, self.emb_size, 0)
        self.lstm = nn.LSTM(self.emb_size, 150, batch_first=True)
        self.sig1 = nn.Sigmoid()
        self.lin = nn.Linear(150, 2)
        self.softmax = nn.LogSoftmax(2)
        
    def forward(self, x, lengths):
        embs = self.emb(x)
        packed = pack_padded_sequence(embs, lengths.to("cpu"), batch_first=True, enforce_sorted=False)
        output1, _ = self.lstm(packed)
        unpacked, _ = pad_packed_sequence(output1, batch_first=True)
        output2 = self.sig1(unpacked)
        output3 = self.lin(output2)
        return self.softmax(output3)
        

In [73]:
import torch.optim as optim

In [74]:
def train(X, lengths, y, vocab_size, emb_size, batch_size, epochs, device, model=None):
    b = Batcher(X, lengths, y, device, batch_size=batch_size, max_iter=epochs)
    if not model:
        m = Segmenter(vocab_size, emb_size).to(device)
    else:
        m = model
    loss = nn.NLLLoss(ignore_index=-1)
    optimizer = optim.Adam(m.parameters(), lr=0.005)
    epoch = 0
    for split in b:
        tot_loss = 0
        for batch in split:
            optimizer.zero_grad()
            
            print('input:', batch[0].shape, batch[1].shape, 'y:', batch[2].shape)
            
            o = m(batch[0], batch[1])
            
            print('output:',o.shape)
            print('output permuted:', o.permute(0,2,1).shape)
            print('expects:', batch[2][:, :max(batch[1])].shape)
            
            l = loss(o.permute(0,2,1), batch[2][:, :max(batch[1])])
            tot_loss += l
            l.backward()
            optimizer.step()
            break
        print("Total loss in epoch {} is {}.".format(epoch, tot_loss))
        break
        epoch += 1
    return m

In [75]:
model = train(train_X_tensor, train_lengths_tensor, train_y_tensor, len(char_index), 200, 50, 30, "cuda:2")

input: torch.Size([50, 184]) torch.Size([50]) y: torch.Size([50, 184])
output: torch.Size([50, 101, 2])
output permuted: torch.Size([50, 2, 101])
expects: torch.Size([50, 101])
Total loss in epoch 0 is 0.6775771975517273.


## Evaluation

In [64]:
model.eval()

Segmenter(
  (emb): Embedding(3648, 200, padding_idx=0)
  (lstm): LSTM(200, 150, batch_first=True)
  (sig1): Sigmoid()
  (lin): Linear(in_features=150, out_features=2, bias=True)
  (softmax): LogSoftmax(dim=2)
)

In [65]:
with torch.no_grad():
    rawpredictions = model(test_X_tensor, test_lengths_tensor)

In [66]:
rawpredictions.size()

torch.Size([500, 156, 2])

In [67]:
rawpredictions

tensor([[[-5.5307e+00, -3.9711e-03],
         [-4.9722e-04, -7.6067e+00],
         [-1.7355e+01,  0.0000e+00],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.1877e+01, -6.9141e-06],
         [-2.7702e-02, -3.6001e+00],
         [-7.6969e+00, -4.5432e-04],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-6.5656e+00, -1.4089e-03],
         [-1.6689e-06, -1.3334e+01],
         [-6.5181e+00, -1.4776e-03],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        ...,

        [[-5.2260e+00, -5.3896e-03],
         [-5.4596e-05, -9.8150e+00],
         [-1.3359e+01, -1.5497e-06],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.4282e+01, -5.9605e-07

In [68]:
import math
math.log2(0.9), math.log2(0.8)

(-0.15200309344504995, -0.3219280948873623)

In [69]:
predictions = torch.argmax(rawpredictions, 2)

In [70]:
predictions

tensor([[1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        ...,
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1]], device='cuda:2')

In [71]:
predictions.size()

torch.Size([500, 156])

In [72]:
predictions[0]

tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:2')

In [73]:
test_sentences[0]

('然而，這樣的處理也衍生了一些問題。', [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1])

In [74]:
test_y_tensor[0]

tensor([ 1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  0,  1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], device='cuda:2')

In [75]:
test_lengths_tensor[0]

tensor(17, device='cuda:2')

In [76]:
collectpreds = []
collecty = []

In [77]:
for i in range(test_X_tensor.size(0)):
    collectpreds.append(predictions[i][:test_lengths_tensor[i]])
    collecty.append(test_y_tensor[i][:test_lengths_tensor[i]])

In [78]:
collecty

[tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
         0, 1, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
         1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
         1, 1, 0, 1, 1, 1, 1, 1], device='cuda:2'),
 tensor([1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
         1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
         1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
         0, 0, 0, 1, 1], device='c

In [79]:
allpreds = torch.cat(collectpreds)

In [80]:
allpreds.size()

torch.Size([19206])

In [81]:
classes = torch.cat(collecty)

In [82]:
allpreds, classes

(tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'))

In [83]:
classes.size()

torch.Size([19206])

In [84]:
classes = classes.float()
allpreds = allpreds.float()

In [85]:
tp = sum(classes * allpreds)
fp = sum(classes * (~allpreds.bool()).float())
tn = sum((~classes.bool()).float() * (~allpreds.bool()).float())
fn = sum((~classes.bool()).float() * allpreds)

tp, fp, tn, fn

(tensor(11339., device='cuda:2'),
 tensor(673., device='cuda:2'),
 tensor(6418., device='cuda:2'),
 tensor(776., device='cuda:2'))

In [86]:
accuracy = (tp + tn) / (tp + fp + tn + fn)
accuracy

tensor(0.9246, device='cuda:2')

In [87]:
recall = tp / (tp + fn)
recall

tensor(0.9359, device='cuda:2')

In [88]:
precision = tp / (tp + fp)
precision

tensor(0.9440, device='cuda:2')

In [89]:
f1 = (2 * recall * precision) / (recall + precision)
f1

tensor(0.9399, device='cuda:2')