In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import os
from tqdm import tqdm
import torch.nn.functional as F
from gensim.models import Word2Vec
from sklearn import metrics
from sklearn.metrics import classification_report
import itertools

In [18]:
import torch
import torch.nn as nn

In [19]:
class BertModel(nn.Module):
    def __init__(self, voc_size:int=30000, seq_len: int=512, d_model: int=384, d_ff:int=3072, pad_idx: int=1,
                num_encoder: int=12, num_heads: int=12, n_layers=6, dropout: float=0.1):
        super(BertModel, self).__init__()
        self.pad_idx = pad_idx
        self.layers = nn.ModuleList([nn.TransformerEncoderLayer(d_model=d_model, nhead=8, batch_first=True) for _ in range(n_layers)])
        self.fc = nn.Linear(d_model, d_model)
        self.activ1 = nn.Tanh()
        self.classifier = nn.Linear(d_model, 1)
        
    def forward(self, input: torch.Tensor) -> torch.Tensor:
        output = input
        for layer in self.layers:
            output = layer(output)
        output = output[0][0].unsqueeze(0)
        h_pooled = self.activ1(self.fc(output)) # [batch_size, d_model]
        logits_clsf = self.classifier(h_pooled) # [batch_size, 2]
        return logits_clsf # [B, S, D_model]

In [20]:
def log_metrics(preds, labels):
    preds = torch.stack(preds)
    preds = preds.cpu().detach().numpy()
    labels = torch.stack(labels)
    labels = labels.cpu().detach().numpy()
    
    
    ## Method 2 using ravel()
    fpr_micro, tpr_micro, _ = metrics.roc_curve(labels.ravel(), preds.ravel())
    auc_micro = metrics.auc(fpr_micro, tpr_micro)
    n=10
    preds = [round(i) for i in preds.ravel()]
    acc = metrics.accuracy_score(labels.ravel(), preds)
    f1 = metrics.f1_score(labels.ravel(), preds)
    preds = [preds[i*n:(i+1)*n] for i in range(len(preds)//6)]
    #target_names = list(genre_list.keys())
    #print(classification_report(labels,preds, target_names=target_names))
    return {"auc_micro": auc_micro, "acc" : acc, "f1" : f1, 'pred' : preds}

In [21]:
device='cuda'

In [22]:
def train_one_epoch(dataloader, model, optimizer, device, loss_fn):
    model.train()
    tk0 = tqdm(dataloader, total=len(dataloader))
    total_loss = 0.0
    
    for source_tensor, label in tk0:
        optimizer.zero_grad()
        output = model(source_tensor.to(device))
        loss = loss_fn(output.view(-1, output.size(-1)), torch.FloatTensor([label]).to(device))
        
        total_loss += loss
        loss.backward()
        optimizer.step()
        
    avg_train_loss = total_loss / len(dataloader) 
    print(" Average training loss: {0:.2f}".format(avg_train_loss))  

def test_one_epoch(testset, model, loss_fn):
    model.eval()
    tk0 = tqdm(testset, total=len(testset))
    targets = []
    outputs = []
    total_loss = 0.0
    with torch.no_grad():
        for source_tensor, label in tk0:
            label = torch.FloatTensor([label])
            output = model(source_tensor.to(device))
            loss = loss_fn(output.view(-1, output.size(-1)), label.to(device))
            total_loss += loss
            targets.extend(label)
            outputs.extend(torch.sigmoid(output[0]).to('cpu'))
    avg_valid_loss = total_loss / len(testset) 
    score = log_metrics(outputs, targets)
    print(" Average valid loss: {0:.2f}".format(avg_valid_loss))  
    print('AUC_SCORE: ', score["auc_micro"], " acc: ", score["acc"], "f1: ", score["f1"])
    return score, avg_valid_loss
    
def fit(model, train_dataloader, valid_dataloader=None,test_dataloader=None, EPOCHS=3, lr=0.000002):
    loss_fn = nn.BCEWithLogitsLoss() #ignore padding
    optimizer = torch.optim.AdamW(model.parameters(),lr=lr)
    for i in range(EPOCHS):
        print(f"EPOCHS:{i+1}")
        print('TRAIN')
        train_one_epoch(train_dataloader, model, optimizer, device, loss_fn)
        print('Valid')
        auc, loss = test_one_epoch(valid_dataloader,model, loss_fn)
        if i == 0:
            torch.save(model,'rnn_best.model')
            best_loss = loss
        if  loss<best_loss:
            best_loss = loss
            print(i,' EPOCH BEST MODEL!')
            torch.save(model,'rnn_best.model')
        print('Test')
        auc, loss = test_one_epoch(test_dataloader,model, loss_fn)
    best_model = torch.load('rnn_best.model')
    auc, loss  = test_one_epoch(test_dataloader,best_model, loss_fn)
    print('BEST MODEL')
    print('AUC_SCORE: ', auc["auc_micro"], " acc: ", auc["acc"], "f1: ", auc["f1"])
    return auc

In [40]:
original = []
reverse = []
path1 = 'word2vec_model_text3/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,384)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.wv.index_to_key:
            emb = w2v.wv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,384)
    for i,emb in enumerate(embs[0]):
#        if i == 0:
#            continue
        reverse_emb[0][-(i+1)] = emb

    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

In [41]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [42]:
bert = BertModel(d_model=384).to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.77it/s]


 Average training loss: 0.68
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 859.90it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.7107000000000001  acc:  0.59 f1:  0.6893939393939394
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 862.44it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.7144  acc:  0.565 f1:  0.6765799256505576
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.70it/s]


 Average training loss: 0.65
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 877.85it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7438  acc:  0.64 f1:  0.7000000000000001
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 874.58it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7567999999999999  acc:  0.65 f1:  0.7107438016528925
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.63it/s]


 Average training loss: 0.61
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 867.91it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.7637  acc:  0.675 f1:  0.7210300429184548
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 873.88it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7823  acc:  0.7 f1:  0.7435897435897435
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.29it/s]


 Average training loss: 0.58
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 858.39it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7676  acc:  0.68 f1:  0.7264957264957265
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 868.32it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.796  acc:  0.71 f1:  0.7521367521367521
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.09it/s]


 Average training loss: 0.55
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 863.55it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.774  acc:  0.69 f1:  0.7327586206896552
4  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 882.61it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8082  acc:  0.735 f1:  0.7685589519650655
EPOCHS:6
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.07it/s]


 Average training loss: 0.53
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 834.50it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7756000000000001  acc:  0.7 f1:  0.736842105263158
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 870.69it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8153  acc:  0.73 f1:  0.7610619469026547
EPOCHS:7
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.23it/s]


 Average training loss: 0.50
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 855.03it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7729  acc:  0.7 f1:  0.7345132743362832
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 862.67it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8188000000000001  acc:  0.725 f1:  0.7555555555555556
EPOCHS:8
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.49it/s]


 Average training loss: 0.48
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 828.57it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.7698999999999999  acc:  0.675 f1:  0.7186147186147186
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 841.85it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8227000000000001  acc:  0.73 f1:  0.7672413793103449
EPOCHS:9
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.56it/s]


 Average training loss: 0.46
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 874.35it/s]


 Average valid loss: 0.62
AUC_SCORE:  0.771  acc:  0.675 f1:  0.7186147186147186
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 890.42it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8252  acc:  0.76 f1:  0.7876106194690266
EPOCHS:10
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 114.93it/s]


 Average training loss: 0.44
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 798.68it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7696999999999999  acc:  0.68 f1:  0.7241379310344828
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 861.40it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8293  acc:  0.755 f1:  0.7822222222222222
EPOCHS:11
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.13it/s]


 Average training loss: 0.42
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 831.19it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.7676999999999999  acc:  0.695 f1:  0.7336244541484717
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 822.09it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8295  acc:  0.75 f1:  0.7767857142857143
EPOCHS:12
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.08it/s]


 Average training loss: 0.39
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 861.50it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.7675  acc:  0.685 f1:  0.72
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 902.54it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8278  acc:  0.745 f1:  0.7713004484304934
EPOCHS:13
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 114.54it/s]


 Average training loss: 0.37
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 877.96it/s]


 Average valid loss: 0.68
AUC_SCORE:  0.7646999999999999  acc:  0.68 f1:  0.7192982456140351
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 882.58it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8287000000000001  acc:  0.745 f1:  0.7692307692307692
EPOCHS:14
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.65it/s]


 Average training loss: 0.34
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 881.44it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.7656000000000001  acc:  0.685 f1:  0.7248908296943233
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 883.08it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.8259  acc:  0.745 f1:  0.7692307692307692
EPOCHS:15
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.38it/s]


 Average training loss: 0.32
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 817.18it/s]


 Average valid loss: 0.71
AUC_SCORE:  0.7613  acc:  0.685 f1:  0.72
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 884.81it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.8243999999999999  acc:  0.74 f1:  0.7592592592592593


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 880.78it/s]

 Average valid loss: 0.56
AUC_SCORE:  0.8082  acc:  0.735 f1:  0.7685589519650655
BEST MODEL
AUC_SCORE:  0.8082  acc:  0.735 f1:  0.7685589519650655
{'auc_micro': 0.8082, 'acc': 0.735, 'f1': 0.7685589519650655, 'pred': [[1, 0, 1, 0, 1, 0, 1, 1, 0, 1], [0, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 1, 1, 1, 1, 1, 0, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 0], [1, 1, 0, 1, 1, 1, 0, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [0, 0, 0, 1, 1, 0, 1, 0, 0, 1], [0, 1, 0, 1, 1, 0, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 1, 0, 1, 1, 0], [0, 0, 1, 0, 0, 0, 1, 0, 0, 0], [1, 1, 0, 1, 1, 1, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1, 1], [0, 1, 0, 1, 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 1, 0, 0, 0, 1], [0, 0, 1, 0, 0, 0, 1, 1, 0, 0], [], [], [], [], [], [], [], [], [], [], [], [], []]}





In [7]:
import random

In [8]:
original = []
reverse = []
path1 = 'word2vec_model_char/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,384)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.wv.index_to_key:
            emb = w2v.wv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,384)
    for i,emb in enumerate(embs[0]):
#        if i == 0:
#            continue
        reverse_emb[0][-(i+1)] = emb

    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

  emb = torch.from_numpy(emb)


In [9]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [10]:
bert = BertModel(d_model=384).to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:27<00:00, 72.04it/s]


 Average training loss: 0.69
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 823.10it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.7131  acc:  0.6 f1:  0.6946564885496184
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 820.83it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.7031000000000001  acc:  0.62 f1:  0.696
EPOCHS:2
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 75.20it/s]


 Average training loss: 0.65
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 507.24it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7371  acc:  0.675 f1:  0.7186147186147186
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 515.94it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.7733  acc:  0.685 f1:  0.72
EPOCHS:3
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 74.22it/s]


 Average training loss: 0.60
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 509.99it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.75  acc:  0.645 f1:  0.6728110599078341
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 516.00it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8007  acc:  0.715 f1:  0.7348837209302327
EPOCHS:4
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:25<00:00, 79.81it/s]


 Average training loss: 0.56
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 516.25it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7576999999999999  acc:  0.66 f1:  0.6761904761904762
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 521.32it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8121  acc:  0.715 f1:  0.7348837209302327
EPOCHS:5
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 76.08it/s]


 Average training loss: 0.52
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 496.73it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7669999999999999  acc:  0.665 f1:  0.6854460093896715
4  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 882.95it/s]


 Average valid loss: 0.53
AUC_SCORE:  0.8186  acc:  0.73 f1:  0.7476635514018691
EPOCHS:6
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:27<00:00, 71.53it/s]


 Average training loss: 0.50
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 519.14it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7682999999999999  acc:  0.665 f1:  0.6883720930232557
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 514.73it/s]


 Average valid loss: 0.53
AUC_SCORE:  0.8209  acc:  0.725 f1:  0.7441860465116279
EPOCHS:7
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:24<00:00, 80.35it/s]


 Average training loss: 0.47
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 504.00it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7686999999999999  acc:  0.67 f1:  0.7000000000000001
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 510.11it/s]


 Average valid loss: 0.53
AUC_SCORE:  0.8201999999999999  acc:  0.73 f1:  0.75
EPOCHS:8
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 75.50it/s]


 Average training loss: 0.44
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 495.76it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7705  acc:  0.67 f1:  0.6944444444444444
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 752.20it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8179  acc:  0.73 f1:  0.75
EPOCHS:9
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:27<00:00, 73.80it/s]


 Average training loss: 0.43
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 790.01it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.7718  acc:  0.67 f1:  0.6915887850467289
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 690.57it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8187  acc:  0.74 f1:  0.7592592592592593
EPOCHS:10
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 76.21it/s]


 Average training loss: 0.40
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 766.53it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.7678  acc:  0.66 f1:  0.6822429906542056
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 508.64it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8175999999999999  acc:  0.745 f1:  0.7627906976744185
EPOCHS:11
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 74.73it/s]


 Average training loss: 0.38
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 866.48it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.7687000000000002  acc:  0.67 f1:  0.6886792452830188
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 849.85it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8154  acc:  0.73 f1:  0.7452830188679247
EPOCHS:12
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 76.59it/s]


 Average training loss: 0.35
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 737.96it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7687999999999999  acc:  0.675 f1:  0.7058823529411764
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 745.32it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.8132  acc:  0.73 f1:  0.7476635514018691
EPOCHS:13
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 75.28it/s]


 Average training loss: 0.32
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 840.47it/s]


 Average valid loss: 0.64
AUC_SCORE:  0.7718999999999999  acc:  0.665 f1:  0.6995515695067266
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 849.18it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.8120999999999999  acc:  0.72 f1:  0.7407407407407408
EPOCHS:14
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:26<00:00, 76.08it/s]


 Average training loss: 0.30
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 712.74it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.7693  acc:  0.67 f1:  0.7053571428571428
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 709.30it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.8076  acc:  0.725 f1:  0.7441860465116279
EPOCHS:15
TRAIN


100%|███████████████████████████████████████| 2000/2000 [00:27<00:00, 72.12it/s]


 Average training loss: 0.27
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 509.28it/s]


 Average valid loss: 0.71
AUC_SCORE:  0.7667999999999999  acc:  0.66 f1:  0.6991150442477876
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 445.54it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.802  acc:  0.72 f1:  0.7407407407407408


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 511.58it/s]

 Average valid loss: 0.53
AUC_SCORE:  0.8186  acc:  0.73 f1:  0.7476635514018691
BEST MODEL
AUC_SCORE:  0.8186  acc:  0.73 f1:  0.7476635514018691
{'auc_micro': 0.8186, 'acc': 0.73, 'f1': 0.7476635514018691, 'pred': [[1, 1, 0, 1, 1, 0, 0, 1, 0, 1], [0, 0, 1, 1, 1, 1, 0, 1, 0, 0], [1, 1, 1, 1, 1, 1, 1, 0, 0, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [1, 0, 0, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 0, 1, 1, 1, 0, 1, 0], [1, 1, 1, 1, 1, 1, 1, 1, 0, 1], [0, 1, 0, 1, 0, 0, 0, 1, 0, 1], [0, 0, 1, 0, 1, 0, 0, 1, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 1, 1, 0], [1, 1, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 1, 1, 1, 1, 0, 1, 0], [0, 0, 0, 1, 0, 0, 0, 0, 1, 1], [0, 1, 1, 0, 1, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 1, 1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1], [], [], [], [], [], [], [], [], [], [], [], [], []]}





In [139]:
original = []
reverse = []
path1 = 'word2vec_model_extra+emb/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,384)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.wv.index_to_key:
            emb = w2v.wv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,384)
    for i,emb in enumerate(embs[0]):
#        if i == 0:
#            continue
        reverse_emb[0][-(i+1)] = emb

    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

In [140]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [141]:
bert = BertModel(d_model=384).to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 114.06it/s]


 Average training loss: 0.69
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 857.94it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.7226  acc:  0.555 f1:  0.6763636363636364
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 851.85it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.7558000000000001  acc:  0.63 f1:  0.7218045112781953
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 113.89it/s]


 Average training loss: 0.66
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 820.07it/s]


 Average valid loss: 0.63
AUC_SCORE:  0.7595999999999999  acc:  0.665 f1:  0.7172995780590716
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 783.47it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.8038  acc:  0.725 f1:  0.7619047619047619
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 113.57it/s]


 Average training loss: 0.62
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 824.88it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.7699  acc:  0.685 f1:  0.7096774193548387
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 762.76it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.8196  acc:  0.745 f1:  0.7713004484304934
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.02it/s]


 Average training loss: 0.58
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 805.56it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7769999999999999  acc:  0.705 f1:  0.7203791469194314
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 860.23it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8224999999999999  acc:  0.76 f1:  0.7777777777777777
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.81it/s]


 Average training loss: 0.55
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 844.50it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.7794  acc:  0.715 f1:  0.7246376811594203
4  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 836.01it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8305  acc:  0.77 f1:  0.780952380952381
EPOCHS:6
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.98it/s]


 Average training loss: 0.53
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 843.20it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.7799000000000001  acc:  0.72 f1:  0.7227722772277227
5  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 853.59it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8358999999999999  acc:  0.775 f1:  0.784688995215311
EPOCHS:7
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.14it/s]


 Average training loss: 0.51
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 826.36it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.7798  acc:  0.72 f1:  0.7227722772277227
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 828.36it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8364  acc:  0.76 f1:  0.7692307692307692
EPOCHS:8
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.24it/s]


 Average training loss: 0.49
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 847.43it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7796  acc:  0.705 f1:  0.7064676616915423
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 849.09it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8371000000000001  acc:  0.755 f1:  0.7655502392344498
EPOCHS:9
TRAIN


 28%|███████████                            | 568/2000 [00:05<00:12, 113.06it/s]


KeyboardInterrupt: 

In [121]:
original = []
reverse = []
path1 = 'word2vec_model_extra+pos/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,400)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.wv.index_to_key:
            emb = w2v.wv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,400)
    for i,emb in enumerate(embs[0]):
#        if i == 0:
#            continue
        reverse_emb[0][-(i+1)] = emb

    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

In [124]:
print(len(original))
print(len(reverse))

1202
1202


In [125]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [126]:
bert = BertModel(d_model=400).to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.69it/s]


 Average training loss: 0.64
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.56it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8585999999999999  acc:  0.745 f1:  0.7605633802816901
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 846.62it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8965000000000001  acc:  0.775 f1:  0.7945205479452055
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.21it/s]


 Average training loss: 0.49
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.17it/s]


 Average valid loss: 0.46
AUC_SCORE:  0.882  acc:  0.785 f1:  0.7981220657276995
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.98it/s]


 Average valid loss: 0.39
AUC_SCORE:  0.9269999999999999  acc:  0.83 f1:  0.8411214953271027
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.81it/s]


 Average training loss: 0.40
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 804.53it/s]


 Average valid loss: 0.43
AUC_SCORE:  0.8862  acc:  0.775 f1:  0.7867298578199052
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 854.55it/s]


 Average valid loss: 0.35
AUC_SCORE:  0.9328  acc:  0.845 f1:  0.8558139534883721
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.90it/s]


 Average training loss: 0.36
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 816.98it/s]


 Average valid loss: 0.43
AUC_SCORE:  0.8837  acc:  0.77 f1:  0.780952380952381
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 835.86it/s]


 Average valid loss: 0.34
AUC_SCORE:  0.9362  acc:  0.845 f1:  0.8571428571428572
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.57it/s]


 Average training loss: 0.33
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 782.15it/s]


 Average valid loss: 0.43
AUC_SCORE:  0.8835999999999999  acc:  0.77 f1:  0.7788461538461539
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 845.97it/s]


 Average valid loss: 0.33
AUC_SCORE:  0.9368000000000001  acc:  0.845 f1:  0.8558139534883721
EPOCHS:6
TRAIN


 40%|███████████████▋                       | 806/2000 [00:06<00:10, 116.14it/s]


KeyboardInterrupt: 

In [148]:
original = []
reverse = []
path1 = 'word2vec_model_edge/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,400)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.wv.index_to_key:
            emb = w2v.wv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,400)
    for i,emb in enumerate(embs[0]):
#        if i == 0:
#            continue
        reverse_emb[0][-(i+1)] = emb

    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

In [149]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [150]:
bert = BertModel(d_model=400).to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.22it/s]


 Average training loss: 0.68
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 812.67it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.6969000000000002  acc:  0.635 f1:  0.6995884773662552
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.63it/s]


 Average valid loss: 0.64
AUC_SCORE:  0.7909999999999999  acc:  0.72 f1:  0.7723577235772358
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.93it/s]


 Average training loss: 0.64
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 834.78it/s]


 Average valid loss: 0.61
AUC_SCORE:  0.7545000000000001  acc:  0.675 f1:  0.7085201793721974
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 832.23it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.8299000000000002  acc:  0.755 f1:  0.7841409691629956
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.67it/s]


 Average training loss: 0.59
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 832.42it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7715  acc:  0.705 f1:  0.7230046948356806
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 836.02it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8419  acc:  0.77 f1:  0.790909090909091
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.02it/s]


 Average training loss: 0.55
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 823.98it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.778  acc:  0.7 f1:  0.719626168224299
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 817.07it/s]


 Average valid loss: 0.49
AUC_SCORE:  0.8526  acc:  0.78 f1:  0.8
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.88it/s]


 Average training loss: 0.52
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 843.25it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.7777999999999999  acc:  0.69 f1:  0.7019230769230769
4  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 826.83it/s]


 Average valid loss: 0.47
AUC_SCORE:  0.8575  acc:  0.785 f1:  0.7981220657276995
EPOCHS:6
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.56it/s]


 Average training loss: 0.49
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 828.69it/s]


 Average valid loss: 0.57
AUC_SCORE:  0.7798999999999999  acc:  0.685 f1:  0.7042253521126761
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 835.81it/s]


 Average valid loss: 0.47
AUC_SCORE:  0.8609  acc:  0.785 f1:  0.8036529680365297
EPOCHS:7
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.50it/s]


 Average training loss: 0.47
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 831.49it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7793  acc:  0.695 f1:  0.7109004739336493
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 840.11it/s]


 Average valid loss: 0.46
AUC_SCORE:  0.8646000000000001  acc:  0.775 f1:  0.7887323943661971
EPOCHS:8
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.87it/s]


 Average training loss: 0.45
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 830.60it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7768  acc:  0.695 f1:  0.7162790697674418
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 806.31it/s]


 Average valid loss: 0.47
AUC_SCORE:  0.8643000000000001  acc:  0.785 f1:  0.7981220657276995
EPOCHS:9
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.11it/s]


 Average training loss: 0.42
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 837.11it/s]


 Average valid loss: 0.60
AUC_SCORE:  0.7782000000000001  acc:  0.715 f1:  0.7272727272727273
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 841.70it/s]


 Average valid loss: 0.46
AUC_SCORE:  0.8635999999999999  acc:  0.8 f1:  0.8076923076923077
EPOCHS:10
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.36it/s]


 Average training loss: 0.39
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 832.71it/s]


 Average valid loss: 0.62
AUC_SCORE:  0.7767000000000001  acc:  0.7 f1:  0.7321428571428572
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 839.86it/s]


 Average valid loss: 0.48
AUC_SCORE:  0.8659  acc:  0.775 f1:  0.7945205479452055
EPOCHS:11
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.57it/s]


 Average training loss: 0.37
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 788.66it/s]


 Average valid loss: 0.64
AUC_SCORE:  0.7779  acc:  0.7 f1:  0.7297297297297298
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 826.96it/s]


 Average valid loss: 0.48
AUC_SCORE:  0.8651  acc:  0.77 f1:  0.787037037037037
EPOCHS:12
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.23it/s]


 Average training loss: 0.35
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 827.35it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.7724000000000001  acc:  0.68 f1:  0.709090909090909
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 826.15it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8581  acc:  0.76 f1:  0.7777777777777777
EPOCHS:13
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.52it/s]


 Average training loss: 0.31
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 840.06it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.776  acc:  0.69 f1:  0.7207207207207207
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.68it/s]


 Average valid loss: 0.51
AUC_SCORE:  0.8594999999999999  acc:  0.77 f1:  0.787037037037037
EPOCHS:14
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 119.40it/s]


 Average training loss: 0.29
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 850.62it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.7708  acc:  0.7 f1:  0.7222222222222223
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 848.29it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8549000000000001  acc:  0.78 f1:  0.7884615384615384
EPOCHS:15
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.82it/s]


 Average training loss: 0.26
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 835.98it/s]


 Average valid loss: 0.73
AUC_SCORE:  0.7702  acc:  0.675 f1:  0.7085201793721974
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 845.38it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8533999999999999  acc:  0.78 f1:  0.7904761904761904


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 845.12it/s]

 Average valid loss: 0.47
AUC_SCORE:  0.8575  acc:  0.785 f1:  0.7981220657276995
BEST MODEL
AUC_SCORE:  0.8575  acc:  0.785 f1:  0.7981220657276995
{'auc_micro': 0.8575, 'acc': 0.785, 'f1': 0.7981220657276995, 'pred': [[1, 1, 1, 1, 1, 1, 0, 1, 0, 1], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 0, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 0, 1, 0, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 0], [1, 0, 0, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 0], [1, 1, 1, 1, 0, 1, 1, 1, 0, 1], [0, 1, 0, 1, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 1, 0, 0, 1, 0], [0, 0, 0, 1, 1, 0, 1, 0, 1, 0], [1, 0, 1, 1, 0, 0, 1, 0, 0, 0], [0, 1, 1, 0, 1, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1], [], [], [], [], [], [], [], [], [], [], [], [], []]}





# 평균

In [46]:
import pickle
with open('emb_avg2.pickle','rb') as fw:
    movie_emb = pickle.load(fw)

In [127]:
original = []
reverse = []
path1 = 'word2vec_model_char/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    if movie_name not in movie_emb.keys():
        continue
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])

    k = 0

    real_embs = []
    for emb in movie_emb[movie_name]:
        if str(emb[0]) != 'nan':
            real_embs.append(emb)
    embs = torch.FloatTensor(real_embs).unsqueeze(0)

    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,len(embs[0]),384)
    for i,emb in enumerate(embs[0]):
        reverse_emb[0][-(i+1)] = emb
    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    

In [129]:
embs

tensor([[[ 0.0946,  0.4989,  0.2511,  ..., -0.0374,  0.1700,  0.5408],
         [-0.1516,  0.2858,  0.4151,  ...,  0.1799,  0.0222,  0.2225],
         [ 0.1228,  0.0312,  0.2744,  ...,  0.1853,  0.2074,  0.0189],
         ...,
         [ 0.1141,  0.3026,  0.3107,  ..., -0.1369, -0.1973,  0.2539],
         [ 0.3033,  0.3994,  0.3760,  ..., -0.0968,  0.0949,  0.3761],
         [ 0.1520,  0.1358,  0.3401,  ..., -0.1170, -0.0191,  0.3754]]])

In [130]:
reverse_emb

tensor([[[ 0.1520,  0.1358,  0.3401,  ..., -0.1170, -0.0191,  0.3754],
         [ 0.3033,  0.3994,  0.3760,  ..., -0.0968,  0.0949,  0.3761],
         [ 0.1141,  0.3026,  0.3107,  ..., -0.1369, -0.1973,  0.2539],
         ...,
         [ 0.1228,  0.0312,  0.2744,  ...,  0.1853,  0.2074,  0.0189],
         [-0.1516,  0.2858,  0.4151,  ...,  0.1799,  0.0222,  0.2225],
         [ 0.0946,  0.4989,  0.2511,  ..., -0.0374,  0.1700,  0.5408]]])

In [131]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [132]:
bert = BertModel().to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.30it/s]


 Average training loss: 0.68
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 764.86it/s]


 Average valid loss: 0.64
AUC_SCORE:  0.7629  acc:  0.66 f1:  0.6666666666666666
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 874.82it/s]


 Average valid loss: 0.64
AUC_SCORE:  0.7462  acc:  0.685 f1:  0.6956521739130436
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.89it/s]


 Average training loss: 0.63
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 870.19it/s]


 Average valid loss: 0.58
AUC_SCORE:  0.7889  acc:  0.7 f1:  0.7
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 819.26it/s]


 Average valid loss: 0.59
AUC_SCORE:  0.7777  acc:  0.695 f1:  0.6871794871794872
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.94it/s]


 Average training loss: 0.59
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 855.75it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8002000000000001  acc:  0.715 f1:  0.7135678391959798
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 835.40it/s]


 Average valid loss: 0.56
AUC_SCORE:  0.8018  acc:  0.715 f1:  0.7192118226600985
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.48it/s]


 Average training loss: 0.55
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 861.10it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.806  acc:  0.73 f1:  0.7326732673267328
3  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 858.90it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8154  acc:  0.715 f1:  0.7164179104477613
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.14it/s]


 Average training loss: 0.53
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 854.98it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8067000000000001  acc:  0.735 f1:  0.7389162561576353
4  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 820.56it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8228  acc:  0.755 f1:  0.7609756097560977
EPOCHS:6
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.47it/s]


 Average training loss: 0.51
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 840.43it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8075  acc:  0.745 f1:  0.7512195121951218
5  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 832.44it/s]


 Average valid loss: 0.52
AUC_SCORE:  0.8287  acc:  0.775 f1:  0.784688995215311
EPOCHS:7
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 113.72it/s]


 Average training loss: 0.48
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 832.51it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8073  acc:  0.735 f1:  0.7336683417085427
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 849.07it/s]


 Average valid loss: 0.51
AUC_SCORE:  0.8333  acc:  0.755 f1:  0.7609756097560977
EPOCHS:8
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 113.93it/s]


 Average training loss: 0.47
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 842.03it/s]


 Average valid loss: 0.54
AUC_SCORE:  0.8071  acc:  0.745 f1:  0.7487684729064039
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 822.52it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8376  acc:  0.775 f1:  0.784688995215311
EPOCHS:9
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 112.83it/s]


 Average training loss: 0.45
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 806.42it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8066000000000001  acc:  0.74 f1:  0.7425742574257426
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 807.84it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8418  acc:  0.755 f1:  0.7632850241545893
EPOCHS:10
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 115.48it/s]


 Average training loss: 0.43
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 834.00it/s]


 Average valid loss: 0.55
AUC_SCORE:  0.8065  acc:  0.75 f1:  0.7474747474747474
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 841.39it/s]


 Average valid loss: 0.50
AUC_SCORE:  0.8424999999999999  acc:  0.76 f1:  0.7669902912621359
EPOCHS:11
TRAIN


 49%|███████████████████▏                   | 987/2000 [00:08<00:08, 115.60it/s]


KeyboardInterrupt: 

# Sum

In [8]:
import pickle
with open('emb_sum.pickle','rb') as fw:
    movie_emb = pickle.load(fw)

In [20]:
original = []
reverse = []
path1 = 'word2vec_model_char/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    if movie_name not in movie_emb.keys():
        continue
    pre_embs = embs
    
    w2v=Word2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.wv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])

    k = 0

    real_embs = []
    for emb in movie_emb[movie_name]:
        if str(emb[0]) != 'nan':
            real_embs.append(emb)
    embs = torch.FloatTensor(real_embs).unsqueeze(0).squeeze(2)

    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,len(embs[0]),384)
    for i,emb in enumerate(embs[0]):
        reverse_emb[0][-(i+1)] = emb
    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    

In [21]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [22]:
embs.shape

torch.Size([1, 63, 384])

In [23]:
bert = BertModel().to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 120.97it/s]


 Average training loss: 0.70
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 867.05it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.54135  acc:  0.525 f1:  0.6494464944649446
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 897.52it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.5998345935727789  acc:  0.5217391304347826 f1:  0.639344262295082
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 121.24it/s]


 Average training loss: 0.68
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 879.16it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.5612499999999999  acc:  0.535 f1:  0.6324110671936759
1  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 864.55it/s]


 Average valid loss: 0.68
AUC_SCORE:  0.6333884688090736  acc:  0.5380434782608695 f1:  0.632034632034632
EPOCHS:3
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 122.00it/s]


 Average training loss: 0.67
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 867.55it/s]


 Average valid loss: 0.68
AUC_SCORE:  0.5706  acc:  0.505 f1:  0.5857740585774058
2  EPOCH BEST MODEL!
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 890.04it/s]


 Average valid loss: 0.68
AUC_SCORE:  0.6437854442344046  acc:  0.5706521739130435 f1:  0.6488888888888888
EPOCHS:4
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 122.60it/s]


 Average training loss: 0.65
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 881.32it/s]


 Average valid loss: 0.68
AUC_SCORE:  0.57435  acc:  0.505 f1:  0.5714285714285714
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 893.91it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.652882797731569  acc:  0.5760869565217391 f1:  0.6320754716981132
EPOCHS:5
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 120.60it/s]


 Average training loss: 0.63
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 823.67it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.5717500000000001  acc:  0.515 f1:  0.5610859728506787
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 817.42it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.6584357277882799  acc:  0.5978260869565217 f1:  0.6407766990291262
EPOCHS:6
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 119.24it/s]


 Average training loss: 0.62
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 900.72it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.5711499999999999  acc:  0.515 f1:  0.5488372093023256
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 906.63it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.6658790170132326  acc:  0.5978260869565217 f1:  0.6336633663366337
EPOCHS:7
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 121.66it/s]


 Average training loss: 0.60
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 901.09it/s]


 Average valid loss: 0.70
AUC_SCORE:  0.57065  acc:  0.515 f1:  0.5402843601895734
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 904.83it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.6685964083175804  acc:  0.6195652173913043 f1:  0.6428571428571429
EPOCHS:8
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 123.24it/s]


 Average training loss: 0.59
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 891.37it/s]


 Average valid loss: 0.71
AUC_SCORE:  0.5745  acc:  0.53 f1:  0.5392156862745098
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 837.35it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.6710775047258979  acc:  0.625 f1:  0.6461538461538462
EPOCHS:9
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 118.42it/s]


 Average training loss: 0.57
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 882.95it/s]


 Average valid loss: 0.72
AUC_SCORE:  0.57355  acc:  0.53 f1:  0.5480769230769231
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 894.36it/s]


 Average valid loss: 0.65
AUC_SCORE:  0.6776937618147447  acc:  0.6086956521739131 f1:  0.6399999999999999
EPOCHS:10
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 119.49it/s]


 Average training loss: 0.56
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 877.80it/s]


 Average valid loss: 0.73
AUC_SCORE:  0.5693499999999999  acc:  0.515 f1:  0.5314009661835749
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 882.60it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.6754489603024575  acc:  0.6141304347826086 f1:  0.6467661691542288
EPOCHS:11
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 120.94it/s]


 Average training loss: 0.53
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 891.98it/s]


 Average valid loss: 0.74
AUC_SCORE:  0.5676  acc:  0.525 f1:  0.5365853658536585
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 859.83it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.678875236294896  acc:  0.6141304347826086 f1:  0.64321608040201
EPOCHS:12
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 117.35it/s]


 Average training loss: 0.51
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 880.62it/s]


 Average valid loss: 0.76
AUC_SCORE:  0.55945  acc:  0.515 f1:  0.5221674876847291
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 873.13it/s]


 Average valid loss: 0.66
AUC_SCORE:  0.6828922495274102  acc:  0.6304347826086957 f1:  0.6458333333333334
EPOCHS:13
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 116.64it/s]


 Average training loss: 0.48
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 899.70it/s]


 Average valid loss: 0.78
AUC_SCORE:  0.55915  acc:  0.52 f1:  0.5294117647058824
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 903.94it/s]


 Average valid loss: 0.67
AUC_SCORE:  0.6812381852551985  acc:  0.6467391304347826 f1:  0.6666666666666667
EPOCHS:14
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 119.80it/s]


 Average training loss: 0.45
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 870.12it/s]


 Average valid loss: 0.80
AUC_SCORE:  0.5527500000000001  acc:  0.525 f1:  0.5365853658536585
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 878.85it/s]


 Average valid loss: 0.69
AUC_SCORE:  0.6795841209829867  acc:  0.625 f1:  0.6461538461538462
EPOCHS:15
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 121.67it/s]


 Average training loss: 0.42
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 893.39it/s]


 Average valid loss: 0.83
AUC_SCORE:  0.5546500000000001  acc:  0.52 f1:  0.5428571428571428
Test


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 893.06it/s]


 Average valid loss: 0.72
AUC_SCORE:  0.6759215500945179  acc:  0.6195652173913043 f1:  0.6428571428571429


100%|████████████████████████████████████████| 184/184 [00:00<00:00, 889.46it/s]

 Average valid loss: 0.68
AUC_SCORE:  0.6437854442344046  acc:  0.5706521739130435 f1:  0.6488888888888888
BEST MODEL
AUC_SCORE:  0.6437854442344046  acc:  0.5706521739130435 f1:  0.6488888888888888
{'auc_micro': 0.6437854442344046, 'acc': 0.5706521739130435, 'f1': 0.6488888888888888, 'pred': [[1, 1, 0, 1, 1, 1, 1, 1, 1, 0], [1, 1, 1, 0, 1, 1, 1, 1, 0, 1], [1, 1, 1, 0, 1, 1, 1, 0, 1, 1], [1, 1, 1, 1, 1, 0, 1, 1, 1, 1], [1, 0, 1, 1, 0, 1, 1, 1, 1, 1], [1, 0, 0, 1, 1, 1, 1, 1, 1, 1], [0, 1, 1, 1, 1, 1, 1, 0, 1, 1], [1, 1, 1, 0, 1, 1, 1, 1, 1, 0], [0, 1, 0, 1, 1, 1, 1, 1, 1, 0], [0, 1, 0, 0, 1, 1, 1, 0, 1, 1], [1, 1, 0, 1, 0, 1, 0, 1, 1, 1], [0, 0, 1, 1, 1, 1, 0, 0, 1, 1], [1, 1, 0, 1, 1, 0, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 1, 1, 1, 0], [1, 0, 1, 1, 1, 1, 1, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1, 0, 0], [0, 0, 0, 1, 0, 1, 1, 1, 0, 1], [1, 1, 0, 1, 1, 1, 1, 1, 1, 0], [1, 0, 1, 1], [], [], [], [], [], [], [], [], [], [], []]}





# Doc2Vec

In [134]:
from gensim.models.doc2vec import Doc2Vec, TaggedDocument

In [142]:
original = []
reverse = []
path1 = 'doc2vec/'

filenames = os.listdir(path1)
train_data = []
valid_data = []
test_data = []
embs = []
for num, filename in enumerate(filenames):
    movie_name = filename[:-14]
    
    pre_embs = embs
    
    w2v=Doc2Vec.load(path1 + filename)
    base = 'scene_'
    scene_num = 0
    passs = 0
    max_scene = 0
    for node in w2v.dv.index_to_key:
        if base in node:
            scene_num += 1
            if int(node[6:])>max_scene:
                max_scene = int(node[6:])
    embs = torch.zeros(1,scene_num,384)
    k = 0
    for i in range(1,max_scene+1):
        emb_name = base + str(i)
        if emb_name in w2v.dv.index_to_key:
            emb = w2v.dv[emb_name]
            emb = torch.from_numpy(emb)
            embs[0][k] = emb
            k += 1
    if len(embs[0]>5):        
        original.append([embs,[1]])
    reverse_emb = torch.zeros(1,scene_num,384)
    for i,emb in enumerate(embs[0]):
        reverse_emb[0][(-i)] = emb
    if len(reverse_emb[0]>5):
        reverse.append([reverse_emb,[0]])
    
    

In [143]:
train_data = original[:1000]+reverse[:1000]
valid_data = original[1000:1100]+reverse[1000:1100]
test_data = original[1100:1200]+reverse[1100:1200]
random.Random(4).shuffle(train_data)

In [144]:
bert = BertModel().to('cuda')
score = fit(bert, train_data,valid_data,test_data,EPOCHS=15)
print(score)

EPOCHS:1
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:17<00:00, 114.35it/s]


 Average training loss: 0.70
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 890.62it/s]


 Average valid loss: 0.70
AUC_SCORE:  0.4998  acc:  0.5 f1:  0.6575342465753424
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 393.04it/s]


 Average valid loss: 0.70
AUC_SCORE:  0.5  acc:  0.5 f1:  0.6644295302013423
EPOCHS:2
TRAIN


100%|██████████████████████████████████████| 2000/2000 [00:16<00:00, 117.95it/s]


 Average training loss: 0.70
Valid


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 852.14it/s]


 Average valid loss: 0.70
AUC_SCORE:  0.49970000000000003  acc:  0.5 f1:  0.6644295302013423
Test


100%|████████████████████████████████████████| 200/200 [00:00<00:00, 888.87it/s]


 Average valid loss: 0.70
AUC_SCORE:  0.4998  acc:  0.5 f1:  0.6666666666666666
EPOCHS:3
TRAIN


 15%|█████▋                                 | 294/2000 [00:02<00:15, 112.91it/s]


KeyboardInterrupt: 