## Loading the data, padding (based on 2.0)

In [23]:
import sys
import os
import numpy as np
import torch

In [24]:
def read_chinese_data(inputfilename):
    with open(inputfilename, "r") as inputfile:
        sentences = []
        collection_words = []
        collection_labels = []
        for line in inputfile:
            if line[0] == '#':
                continue
            columns = line.split()
            #print(words)
            if columns == []:
                sentences.append((''.join(collection_words), collection_labels))
                collection_words = []
                collection_labels = []
                continue
            collection_words.append(columns[1])
            collection_labels += [1] + ([0] * (len(columns[1]) - 1))
            
    return sentences

In [25]:
train_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-train.conllu')

In [26]:
test_sentences = read_chinese_data('/scratch/lt2316-h20-resources/zh_gsd-ud-test.conllu')

In [107]:
print( train_sentences[:2] )

[('看似簡單，只是二選一做決擇，但其實他們代表的是你周遭的親朋好友，試著給你不同的意見，但追根究底，最後決定的還是自己。', [1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1]), ('其便當都是買來的，就算加熱也是由媽媽負責（後來揭曉其實是避免帶來厄運），父親則在電視台上班。', [1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1])]


In [27]:
def index_chars(sentences):
    char_set = set((char for sen in sentences for char in sen))
    char_list = [0, '<START>','<END>'] + list(char_set)
    return char_list, {char:i for i,char in enumerate(char_list)}

In [28]:
char_list, char_index = index_chars([x[0] for x in train_sentences + test_sentences])

In [125]:
char_list[:10]

[0, '<START>', '<END>', '嘎', '靜', '今', '肌', '胺', '澀', '腐']

In [126]:
import itertools
dict(itertools.islice(char_index.items(), 10))

{0: 0,
 '<START>': 1,
 '<END>': 2,
 '嘎': 3,
 '靜': 4,
 '今': 5,
 '肌': 6,
 '胺': 7,
 '澀': 8,
 '腐': 9}

In [29]:
def convert_sentence(sentence, index_dict, add_tags=True): # turn sentence to [int]
    indexed_sentence = [index_dict[x] for x in sentence]
    if add_tags:
        start_idx, end_idx = index_dict['<START>'], index_dict['<END>']
        return [start_idx]+indexed_sentence+[end_idx]
    return indexed_sentence

In [134]:
convert_sentence(train_sentences[0][0], char_index)[:10]

[1, 2435, 2749, 2037, 2005, 2028, 684, 2420, 1160, 581]

In [30]:
def pad_lengths(sentences, max_length, padding=0):
    return [x + ([padding] * (max_length - len(x))) for x in sentences]

In [130]:
[ len(x) for x in pad_lengths([convert_sentence(xy[0], char_index) for xy in train_sentences[:3]], 50) ]


[58, 50, 50]

In [135]:
from torch.utils.data import DataLoader, Dataset
# Dataset()
batching = DataLoader(train_sentences, shuffle=True, batch_size=5)

In [31]:
def create_dataset(x, device="cpu"):
    converted = [(convert_sentence(x1[0], char_index), x1[1]) for x1 in x] # change (sen,labels) to (idxs, labels)
    X, y = zip(*converted)
    lengths = [len(x2) for x2 in X] # list of sen_lengths
    padded_X = pad_lengths(X, max(lengths)) # pad all sentences to same length
    Xt = torch.LongTensor(padded_X).to(device)
    padded_y = pad_lengths(y, max(lengths), padding=-1)
    yt = torch.LongTensor(padded_y).to(device)
    lengths_t = torch.LongTensor(lengths).to(device)
    return Xt, lengths_t, yt

In [32]:
train_X_tensor, train_lengths_tensor, train_y_tensor = create_dataset(train_sentences, "cuda:2")
test_X_tensor, test_lengths_tensor, test_y_tensor = create_dataset(test_sentences, "cuda:2")

In [33]:
# train_X_tensor[0]
train_X_tensor.shape

torch.Size([3997, 184])

In [34]:
train_y_tensor.shape

torch.Size([3997, 184])

In [35]:
import numpy as np
np.array([2])

array([2])

## Packing the sequences for RNN

In [7]:
testtensor = torch.randn((10,100,200))

In [8]:
testtensor.shape

torch.Size([10, 100, 200])

In [36]:
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
# import torch

In [4]:
testlengths = torch.randint(1, 100, (10,)) # 10 ints from 0~99

In [15]:
testlengths.size(), testlengths # 10 ints from 0~99

(torch.Size([10]), tensor([93, 22, 67, 76, 17, 38, 38,  2, 60, 41]))

In [11]:
packed = pack_padded_sequence(testtensor, testlengths, batch_first=True, enforce_sorted=False)

In [12]:
testtensor.shape

torch.Size([10, 100, 200])

In [14]:
packed

PackedSequence(data=tensor([[ 0.2843, -0.2642,  0.2152,  ..., -0.0950, -0.2541,  0.6031],
        [ 0.6809, -1.9295,  0.7601,  ..., -1.3251, -0.4201,  1.0243],
        [ 0.7649,  0.5703, -0.6670,  ...,  0.1672, -1.3409,  1.5025],
        ...,
        [ 0.5266, -1.7738, -0.5618,  ..., -2.0935,  0.1094, -1.1434],
        [ 2.0874, -1.2844,  2.5851,  ...,  0.0093,  0.4601,  1.3961],
        [ 0.2843,  1.4861,  0.9259,  ...,  0.9705,  2.1365,  0.1034]]), batch_sizes=tensor([10, 10,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  8,
         8,  8,  8,  8,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
         7,  7,  5,  5,  5,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
         4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,  2,
         2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1]), sorted_indices=tensor([0, 3, 2, 8, 9, 5, 6, 1, 4, 7]), unsorted_indices=tensor([0, 7, 2, 1, 8, 5, 6, 9, 3, 4]))

In [16]:
len(packed.batch_sizes)

93

In [17]:
unpacked = pad_packed_sequence(packed, batch_first=True, total_length=100)

In [18]:
unpacked

(tensor([[[ 0.2843, -0.2642,  0.2152,  ..., -0.0950, -0.2541,  0.6031],
          [-0.6319,  0.2654,  1.3908,  ...,  2.2848, -0.5198,  1.5425],
          [ 0.0650,  0.3719, -0.8636,  ..., -0.8656, -1.6473, -0.6849],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[-1.8083,  0.4517, -0.1655,  ..., -0.1614,  1.3763,  3.0522],
          [-0.0526, -1.0759, -1.0354,  ...,  1.8745,  0.5190, -1.7398],
          [-0.3062, -0.7792, -1.8397,  ..., -2.2383, -0.6125, -0.6285],
          ...,
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
          [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],
 
         [[ 0.7649,  0.5703, -0.6670,  ...,  0.1672, -1.3409,  1.5025],
          [-0.2136, -1.6033,

In [55]:
unpacked[0]

tensor([[[ 0.7038,  1.4047, -0.3232,  ...,  0.0143,  0.6232, -0.3649],
         [ 0.0178,  0.7354, -0.0055,  ..., -0.0820,  1.0416, -0.1642],
         [ 1.2808,  0.5234,  0.1960,  ..., -1.0231,  1.1244, -0.3035],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.4254,  1.8586,  0.1914,  ..., -0.2758,  0.7993, -0.2507],
         [-0.6537, -1.6943,  0.9580,  ...,  0.4435, -1.3842, -1.4496],
         [-2.2641, -0.3484,  0.3794,  ...,  0.3705, -1.2520,  0.1982],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.8197, -0.7112,  0.0086,  ..., -1.0629,  0.9276, -0.5575],
         [-0.1425,  0.4942,  1.7464,  ...,  0

In [56]:
unpacked[0].size()

torch.Size([10, 100, 200])

## Batching (based on 1.0, 1.1, 1.2)

In [37]:
class Batcher:
    def __init__(self, X, lengths, y, device, batch_size=50, max_iter=None):
        self.X = X
        self.lengths = lengths # We need the lengths to efficiently use the padding.
        self.y = y
        self.device = device
        self.batch_size=batch_size
        self.max_iter = max_iter
        self.curr_iter = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.curr_iter == self.max_iter:
            raise StopIteration
        permutation = torch.randperm(self.X.size()[0], device=self.device)
        permX = self.X[permutation]
        permlengths = self.lengths[permutation]
        permy = self.y[permutation]
        splitX = torch.split(permX, self.batch_size)
        splitlengths = torch.split(permlengths, self.batch_size)
        splity = torch.split(permy, self.batch_size)
        
        self.curr_iter += 1
        return zip(splitX, splitlengths, splity)

In [38]:
b = Batcher(train_X_tensor, train_lengths_tensor, train_y_tensor, torch.device('cuda:2'), max_iter=100)

In [59]:
testbatching = next(b)

In [60]:
testbatching

<zip at 0x7faa79a98cc0>

In [61]:
testbatch = next(testbatching)

In [62]:
testbatch

(tensor([[3525, 3592,  344,  ...,    0,    0,    0],
         [2548, 1419,  123,  ...,    0,    0,    0],
         [3469, 1349,  336,  ...,    0,    0,    0],
         ...,
         [2190, 1001, 1618,  ...,    0,    0,    0],
         [2080,  177,  916,  ...,    0,    0,    0],
         [2875, 1109, 1346,  ...,    0,    0,    0]], device='cuda:2'),
 tensor([ 90,  30,  70,  25,  30,  26,  22,  40,  26,  64,  24,  47, 103,  31,
          34,  14,  55,  30,  17,  22,  53,  83,  63,  14,  37,  29,  43,  47,
          42,  88,  29,  16,  50,  47,  35,  33,  40,  32,  25,  55,  75,  27,
         110,  59,  46,  20,  16,  19,  14,  24], device='cuda:2'),
 tensor([[ 1,  1,  0,  ..., -1, -1, -1],
         [ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  0,  0,  ..., -1, -1, -1],
         ...,
         [ 1,  0,  1,  ..., -1, -1, -1],
         [ 1,  1,  1,  ..., -1, -1, -1],
         [ 1,  1,  0,  ..., -1, -1, -1]], device='cuda:2'))

## Modeling

In [39]:
import torch.nn as nn

In [64]:
emb = nn.Embedding(len(int_index), 200, 0).to("cuda:2") # vocab_size, embed_dim_size, pad_id

In [65]:
testX, testlengths, testy = testbatch

In [66]:
testembs = emb(testX)

In [67]:
testembs

tensor([[[ 0.1196, -0.6549,  0.5564,  ...,  0.6592, -0.0433, -1.9007],
         [-1.2655,  1.5405,  0.8641,  ...,  0.5380,  1.4060,  2.9751],
         [ 2.4744, -0.6471, -1.0599,  ...,  1.8156, -0.0497, -0.0740],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-1.0716, -0.9499,  1.1424,  ...,  0.6893, -0.2926,  0.9506],
         [ 0.6275,  0.3448, -0.8052,  ..., -1.3687,  0.8800,  0.0881],
         [-1.2560, -0.3070, -2.1011,  ..., -0.6712,  0.4612,  1.4226],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[-0.0117,  0.5996,  1.0459,  ..., -0.2355, -0.8195,  1.8119],
         [ 1.5911, -0.7417, -0.6687,  ..., -1

In [68]:
testembs.size()  

torch.Size([50, 182, 200])

In [69]:
testembs.device

device(type='cuda', index=2)

In [70]:
testlstm = nn.LSTM(200, 150, batch_first=True).to("cuda:2")

In [71]:
testembspadded = pack_padded_sequence(testembs, testlengths.to("cpu"), batch_first=True, enforce_sorted=False)

In [72]:
testoutput, teststate = testlstm(testembspadded)

In [73]:
testoutput

PackedSequence(data=tensor([[-6.9929e-02, -6.1790e-02,  2.9183e-01,  ..., -7.7959e-02,
         -3.3203e-02, -8.4488e-02],
        [ 1.4346e-01,  2.5605e-01, -1.4306e-01,  ..., -1.5494e-02,
         -2.7943e-02, -6.4576e-02],
        [-2.0854e-01,  4.3622e-02, -9.9771e-03,  ...,  2.9854e-01,
         -2.8373e-02,  1.4944e-01],
        ...,
        [-2.6879e-03, -6.5062e-02,  4.6864e-02,  ...,  4.8320e-02,
         -5.8132e-02,  8.3431e-02],
        [ 1.3940e-01,  6.9773e-04,  1.5035e-01,  ..., -2.8966e-05,
          3.8662e-04, -5.3611e-02],
        [ 1.1099e-01, -2.7968e-01, -6.9197e-02,  ..., -4.4968e-02,
          3.8688e-02,  5.1162e-03]], device='cuda:2',
       grad_fn=<CudnnRnnBackward>), batch_sizes=tensor([50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 47, 47, 45, 44,
        44, 43, 42, 42, 40, 40, 38, 36, 34, 33, 33, 31, 28, 27, 26, 25, 24, 23,
        23, 22, 22, 22, 20, 20, 19, 18, 18, 18, 17, 14, 14, 14, 13, 13, 13, 12,
        12, 10, 10, 10, 10,  9,  9,  9,  9,

In [74]:
testunpacked = pad_packed_sequence(testoutput, batch_first=True)

In [75]:
testunpacked[0].size()

torch.Size([50, 110, 150])

In [76]:
testsigm = nn.Sigmoid().to("cuda:2")

In [77]:
testoutput2 = testsigm(testunpacked[0])

In [78]:
testoutput2.size()

torch.Size([50, 110, 150])

In [79]:
testlin = nn.Linear(150, 2).to("cuda:2")

In [80]:
testoutput3 = testlin(testoutput2)

In [81]:
testoutput3.size()

torch.Size([50, 110, 2])

In [82]:
testsoft = nn.LogSoftmax(2).to("cuda:2")

In [83]:
testoutput4 = testsoft(testoutput3)

In [84]:
testoutput4

tensor([[[-0.4815, -0.9620],
         [-0.4881, -0.9513],
         [-0.4777, -0.9682],
         ...,
         [-0.4883, -0.9510],
         [-0.4883, -0.9510],
         [-0.4883, -0.9510]],

        [[-0.4839, -0.9580],
         [-0.4787, -0.9665],
         [-0.4856, -0.9554],
         ...,
         [-0.4883, -0.9510],
         [-0.4883, -0.9510],
         [-0.4883, -0.9510]],

        [[-0.4918, -0.9456],
         [-0.4912, -0.9465],
         [-0.4770, -0.9693],
         ...,
         [-0.4883, -0.9510],
         [-0.4883, -0.9510],
         [-0.4883, -0.9510]],

        ...,

        [[-0.4923, -0.9448],
         [-0.4785, -0.9668],
         [-0.5048, -0.9254],
         ...,
         [-0.4883, -0.9510],
         [-0.4883, -0.9510],
         [-0.4883, -0.9510]],

        [[-0.5023, -0.9293],
         [-0.5004, -0.9322],
         [-0.4846, -0.9570],
         ...,
         [-0.4883, -0.9510],
         [-0.4883, -0.9510],
         [-0.4883, -0.9510]],

        [[-0.4824, -0.9605],
       

In [85]:
testy_short = testy[:, :max(testlengths)]

In [86]:
testy_short

tensor([[ 1,  1,  0,  ..., -1, -1, -1],
        [ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  0,  0,  ..., -1, -1, -1],
        ...,
        [ 1,  0,  1,  ..., -1, -1, -1],
        [ 1,  1,  1,  ..., -1, -1, -1],
        [ 1,  1,  0,  ..., -1, -1, -1]], device='cuda:2')

In [87]:
testy_short.size()

torch.Size([50, 110])

In [88]:
max(testlengths)

tensor(110, device='cuda:2')

In [89]:
testpermuted = testoutput4.permute(0, 2, 1)

In [90]:
testpermuted

tensor([[[-0.4815, -0.4881, -0.4777,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9620, -0.9513, -0.9682,  ..., -0.9510, -0.9510, -0.9510]],

        [[-0.4839, -0.4787, -0.4856,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9580, -0.9665, -0.9554,  ..., -0.9510, -0.9510, -0.9510]],

        [[-0.4918, -0.4912, -0.4770,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9456, -0.9465, -0.9693,  ..., -0.9510, -0.9510, -0.9510]],

        ...,

        [[-0.4923, -0.4785, -0.5048,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9448, -0.9668, -0.9254,  ..., -0.9510, -0.9510, -0.9510]],

        [[-0.5023, -0.5004, -0.4846,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9293, -0.9322, -0.9570,  ..., -0.9510, -0.9510, -0.9510]],

        [[-0.4824, -0.4865, -0.4796,  ..., -0.4883, -0.4883, -0.4883],
         [-0.9605, -0.9540, -0.9650,  ..., -0.9510, -0.9510, -0.9510]]],
       device='cuda:2', grad_fn=<PermuteBackward>)

In [91]:
nllloss = nn.NLLLoss(ignore_index=-1).to("cuda:2")

In [92]:
nllloss(testpermuted, testy_short)

tensor(0.7849, device='cuda:2', grad_fn=<NllLoss2DBackward>)

In [40]:
class Segmenter(nn.Module):
    def __init__(self, vocab_size, emb_size):
        super().__init__()
        
        self.vocab_size = vocab_size
        self.emb_size = emb_size
        
        self.emb = nn.Embedding(self.vocab_size, self.emb_size, 0)
        self.lstm = nn.LSTM(self.emb_size, 150, batch_first=True)
        self.sig1 = nn.Sigmoid()
        self.lin = nn.Linear(150, 2)
        self.softmax = nn.LogSoftmax(2)
        
    def forward(self, x, lengths):
        embs = self.emb(x)
        packed = pack_padded_sequence(embs, lengths.to("cpu"), batch_first=True, enforce_sorted=False)
        output1, _ = self.lstm(packed)
        unpacked, _ = pad_packed_sequence(output1, batch_first=True)
        output2 = self.sig1(unpacked)
        output3 = self.lin(output2)
        return self.softmax(output3)
        

In [41]:
import torch.optim as optim

In [55]:
def train(X, lengths, y, vocab_size, emb_size, batch_size, epochs, device, model=None):
    b = Batcher(X, lengths, y, device, batch_size=batch_size, max_iter=epochs)
    if not model:
        m = Segmenter(vocab_size, emb_size).to(device)
    else:
        m = model
    loss = nn.NLLLoss(ignore_index=-1)
    optimizer = optim.Adam(m.parameters(), lr=0.005)
    epoch = 0
    for split in b:
        tot_loss = 0
        for batch in split:
            optimizer.zero_grad()
            print('input:', batch[0].shape, batch[1].shape, 'y:', batch[2].shape)
            o = m(batch[0], batch[1])
#             print('output:', o.permute(0,2,1).shape)
#             print('expects:', batch[2][:, :max(batch[1])].shape)
            l = loss(o.permute(0,2,1), batch[2][:, :max(batch[1])])
            tot_loss += l
            l.backward()
            optimizer.step()
            break
        print("Total loss in epoch {} is {}.".format(epoch, tot_loss))
        break
        epoch += 1
    return m

In [56]:
model = train(train_X_tensor, train_lengths_tensor, train_y_tensor, len(char_index), 200, 50, 30, "cuda:2")

input: torch.Size([50, 184]) torch.Size([50])
Total loss in epoch 0 is 0.7423067688941956.


## Evaluation

In [64]:
model.eval()

Segmenter(
  (emb): Embedding(3648, 200, padding_idx=0)
  (lstm): LSTM(200, 150, batch_first=True)
  (sig1): Sigmoid()
  (lin): Linear(in_features=150, out_features=2, bias=True)
  (softmax): LogSoftmax(dim=2)
)

In [65]:
with torch.no_grad():
    rawpredictions = model(test_X_tensor, test_lengths_tensor)

In [66]:
rawpredictions.size()

torch.Size([500, 156, 2])

In [67]:
rawpredictions

tensor([[[-5.5307e+00, -3.9711e-03],
         [-4.9722e-04, -7.6067e+00],
         [-1.7355e+01,  0.0000e+00],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.1877e+01, -6.9141e-06],
         [-2.7702e-02, -3.6001e+00],
         [-7.6969e+00, -4.5432e-04],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-6.5656e+00, -1.4089e-03],
         [-1.6689e-06, -1.3334e+01],
         [-6.5181e+00, -1.4776e-03],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        ...,

        [[-5.2260e+00, -5.3896e-03],
         [-5.4596e-05, -9.8150e+00],
         [-1.3359e+01, -1.5497e-06],
         ...,
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02],
         [-3.2601e+00, -3.9142e-02]],

        [[-1.4282e+01, -5.9605e-07

In [68]:
import math
math.log2(0.9), math.log2(0.8)

(-0.15200309344504995, -0.3219280948873623)

In [69]:
predictions = torch.argmax(rawpredictions, 2)

In [70]:
predictions

tensor([[1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1],
        ...,
        [1, 0, 1,  ..., 1, 1, 1],
        [1, 1, 1,  ..., 1, 1, 1],
        [1, 0, 1,  ..., 1, 1, 1]], device='cuda:2')

In [71]:
predictions.size()

torch.Size([500, 156])

In [72]:
predictions[0]

tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:2')

In [73]:
test_sentences[0]

('然而，這樣的處理也衍生了一些問題。', [1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1])

In [74]:
test_y_tensor[0]

tensor([ 1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  1,  0,  1,  0,  1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1], device='cuda:2')

In [75]:
test_lengths_tensor[0]

tensor(17, device='cuda:2')

In [76]:
collectpreds = []
collecty = []

In [77]:
for i in range(test_X_tensor.size(0)):
    collectpreds.append(predictions[i][:test_lengths_tensor[i]])
    collecty.append(test_y_tensor[i][:test_lengths_tensor[i]])

In [78]:
collecty

[tensor([1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
         0, 1, 1, 1, 0, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0,
         1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1,
         1, 1, 0, 1, 1, 1, 1, 1], device='cuda:2'),
 tensor([1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0,
         1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
         1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
         0, 0, 0, 1, 1], device='c

In [79]:
allpreds = torch.cat(collectpreds)

In [80]:
allpreds.size()

torch.Size([19206])

In [81]:
classes = torch.cat(collecty)

In [82]:
allpreds, classes

(tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'),
 tensor([1, 0, 1,  ..., 1, 0, 1], device='cuda:2'))

In [83]:
classes.size()

torch.Size([19206])

In [84]:
classes = classes.float()
allpreds = allpreds.float()

In [85]:
tp = sum(classes * allpreds)
fp = sum(classes * (~allpreds.bool()).float())
tn = sum((~classes.bool()).float() * (~allpreds.bool()).float())
fn = sum((~classes.bool()).float() * allpreds)

tp, fp, tn, fn

(tensor(11339., device='cuda:2'),
 tensor(673., device='cuda:2'),
 tensor(6418., device='cuda:2'),
 tensor(776., device='cuda:2'))

In [86]:
accuracy = (tp + tn) / (tp + fp + tn + fn)
accuracy

tensor(0.9246, device='cuda:2')

In [87]:
recall = tp / (tp + fn)
recall

tensor(0.9359, device='cuda:2')

In [88]:
precision = tp / (tp + fp)
precision

tensor(0.9440, device='cuda:2')

In [89]:
f1 = (2 * recall * precision) / (recall + precision)
f1

tensor(0.9399, device='cuda:2')