In [1]:
import numpy as np
import torch
import string
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pad_packed_sequence as unpack
from torch.nn.utils.rnn import pack_padded_sequence as pack
import math
import torch.utils.data as data
import json
import os
import pandas as pd
import random
import copy
import torch.utils.data.sampler as sampler


In [2]:
all_letters = string.printable
n_letters = len(all_letters)
def letterToIndex(letter):
    return all_letters.find(letter)
def linesToTensor(lines):
    line_length = 15000
    if max([ len(line) for line in lines]) < line_length:
        line_length = max( [len(line) for line in lines] )
    #line_length = max( [len(line) for line in lines] )
    #xx = [max(len(line)-15000,0) for line in lines]
    #print float(np.sum(xx)) / float(np.sum([len(line) for line in lines]))
    tensor = torch.zeros(len(lines), line_length, n_letters)
    for b, line in enumerate(lines): 
        line = line[:15000]
        for li, letter in enumerate(line):
            tensor[b][li + line_length - len(line)][letterToIndex(letter)] = 1 #뒤로 맞춰줌

    return tensor

In [3]:
class LangModel(nn.Module):
    def __init__(self, preTrained='True', input=100):
        super(LangModel, self).__init__()

        # Language Model
        self.lang = nn.LSTM(input, 128, 3, batch_first=True) 
 
        # Output 
        self.output = nn.Linear(128, 2)
        n = self.output.in_features * self.output.out_features
        self.output.weight.data.normal_(0, math.sqrt(2. / n))
        self.output.bias.data.zero_()

    def forward(self, text):
        text.cuda()
        h0 = ( Variable(torch.zeros(3, text.size(0), 128)).cuda(),  Variable(torch.zeros(3, text.size(0), 128)).cuda())

        lang_feature, hn = self.lang(text, h0 )
        lang_feature = lang_feature[:,-1,:]

        pred = self.output(lang_feature)
        return pred


In [4]:
import torch.utils.data as data

class chat_ds(data.Dataset):
    def __init__(self,d_type):
        self.gt_range =  1- 0.25
        self.d_type=d_type
        with open('./label.json') as f1:  
            self.gt=json.load(f1)
        with open('./chat.json') as f2:  
            self.text=json.load(f2)
        self.sample=list(map(int,list(self.gt.keys())))
        if d_type=='train':
            self.sample=self.sample[:60]
        if d_type=='val':
            self.sample=self.sample[60:90]
        if d_type=='test':
            self.sample=self.sample[90:]
            
        self.WeightedSampling=[]
        for i in self.sample:
            self.WeightedSampling.extend(copy.copy(self.gt[str(i)]))
        
        sampling = np.array(self.WeightedSampling)
        neg_idx = np.where(sampling == 0)[0]
        pos_idx = np.where(sampling == 1)[0]
        sampling = sampling.astype(np.float32)

        begin_pos = 0 
        hl_frames = []
        for it, cur_pos in enumerate(pos_idx):
            if it+1 < len(pos_idx): 
                if((pos_idx[it+1] - cur_pos) > 1):
                    begin = int((it+1 - begin_pos) * self.gt_range) + begin_pos
                    hl_frames.extend( pos_idx[begin: it] ) 
                    begin_pos = it+1


        sampling.fill(0)
        sampling[neg_idx] = len(sampling) / float(len(neg_idx))
       # self.WeightedSampling[pos_idx] = len(self.WeightedSampling) / float(len(pos_idx))
        sampling[hl_frames] = len(sampling) / float(len(hl_frames))
        self.WeightedSampling = sampling

        self.sum=np.insert(np.cumsum([len(self.gt[str(i)]) for i in self.sample]),0,0)
        print("data load fin")

        
    def __len__(self):
        return len(self.WeightedSampling)
    def __getitem__(self,index):
        if self.d_type=='train':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]
            vframe=index-self.sum[vid]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]
            label=self.gt[str(vid)][vframe]
            return win_text,label
        if self.d_type=='val':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]+60
            vframe=index-self.sum[vid-60]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]
            label=self.gt[str(vid)][vframe]
            return win_text,label
        if self.d_type=='test':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]+90
            vframe=index-self.sum[vid-90]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]
            label=self.gt[str(vid)][vframe]
            return win_text,label
        
            
            



In [5]:
class SampleSequentialSampler(sampler.Sampler):
    """Samples elements sequentially, always in the same order.
    Arguments:
        data_source (Dataset): dataset to sample from
        offset (int): offset between the samples
    """

    def __init__(self, data_source, offset=10):
        self.num_samples = len(data_source) 
        self.offset = offset

    def __iter__(self):
        return iter(np.arange(0, self.num_samples, self.offset ))

    def __len__(self):
        return len(np.arange(0, self.num_samples, self.offset ))


In [6]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = 0.1 * (0.5 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return lr

In [7]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [8]:
def fmeasure(output, target):
    _, pred = output.topk(1, 1, True, True)
    pred = pred.view(-1,1)
    target = target.view(-1,1)

    #overlap = ((pred== 1) + (target == 1)).gt(1)
    #overlap = overlap.view(-1,1)
    TP = len(np.where((pred==1)&(target==1)==True)[0]) # True positive
    FP = len(np.where((pred==1)&(target==0)==True)[0]) # Condition positive = TP + FN
    TN = len(np.where((pred==0)&(target==0)==True)[0])
    FN = len(np.where((pred==0)&(target==1)==True)[0])

    
    #overlap_len = overlap.data.long().sum()
    pred_len = pred.data.long().sum()
    gt_len   =  target.data.long().sum()

    return TP,FP,TN,FN,pred_len, gt_len

In [9]:
def clip_gradient(model, clip_norm):
    """Computes a gradient clipping coefficient based on gradient norm."""
    totalnorm = 0
    for pm in model.parameters():
        if pm.requires_grad:
            #print(pm.size())
            if pm.grad is not None:
                modulenorm = pm.grad.data.norm()
                totalnorm += modulenorm ** 2
    totalnorm = np.sqrt(totalnorm)

    norm = clip_norm / max(totalnorm, clip_norm)
    for p in model.parameters():
        if p.requires_grad:
            if p.grad is not None:
                p.grad.mul_(norm)

In [10]:
#game_id='102844212428895431'
train=chat_ds('train')
val=chat_ds('val')

data load fin
data load fin


In [11]:
sampler1 = torch.utils.data.sampler.WeightedRandomSampler(weights=train.WeightedSampling.tolist(), num_samples=10000)
train_loader=torch.utils.data.DataLoader(train,batch_size=32,sampler=sampler1)
sampler2 =  SampleSequentialSampler(val, 30)
val_loader=torch.utils.data.DataLoader(val,batch_size=32,sampler= sampler2)

In [44]:
for i,j in enumerate(val_loader):
    print(j[1])

60 0
60 30
60 60
60 90
60 120
60 150
60 180
60 210
60 240
60 270
60 300
60 330
60 360
60 390
60 420
60 450
60 480
60 510
60 540
60 570
60 600
60 630
60 660
60 690
60 720
60 750
60 780
60 810
60 840
60 870
60 900
60 930
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
        1, 0, 1, 0, 1, 0, 0, 0])
60 960
60 990
60 1020
60 1050
60 1080
60 1110
60 1140
60 1170
60 1200
60 1230
60 1260
60 1290
60 1320
60 1350
60 1380
60 1410
60 1440
60 1470
61 22
61 52
61 82
61 112
61 142
61 172
61 202
61 232
61 262
61 292
61 322
61 352
61 382
61 412
tensor([0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 1, 0, 0, 0, 0])
61 442
61 472
61 502
61 532
61 562
61 592
61 622
61 652
61 682
61 712
61 742
61 772
61 802
61 832
61 862
61 892
61 922
61 952
61 982
61 1012
61 1042
61 1072
61 1102
61 1132
61 1162
61 1192
61 1222
61 1252
61 1282
61 1312
61 1342
61 1372
tensor([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
 

76 945
76 975
76 1005
76 1035
76 1065
76 1095
76 1125
76 1155
76 1185
76 1215
76 1245
76 1275
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0])
76 1305
76 1335
76 1365
76 1395
76 1425
76 1455
76 1485
76 1515
76 1545
76 1575
76 1605
76 1635
76 1665
76 1695
76 1725
76 1755
76 1785
76 1815
76 1845
76 1875
76 1905
76 1935
76 1965
76 1995
76 2025
76 2055
76 2085
76 2115
76 2145
76 2175
76 2205
76 2235
tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0,
        1, 1, 0, 0, 0, 0, 0, 0])
76 2265
76 2295
76 2325
76 2355
76 2385
76 2415
77 24
77 54
77 84
77 114
77 144
77 174
77 204
77 234
77 264
77 294
77 324
77 354
77 384
77 414
77 444
77 474
77 504
77 534
77 564
77 594
77 624
77 654
77 684
77 714
77 744
77 774
tensor([0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 1, 0, 0])
77 804
77 834
77 864
77 894
77 924
77 954
77 984
77 1014
77 1044
77 1074
77 1104
77 

In [12]:
###### model load #####
model=LangModel().cuda()
criterion = nn.CrossEntropyLoss().cuda()

In [16]:
#train set
optimizer = torch.optim.SGD(model.parameters(), 0.1,momentum=0.9,weight_decay=1e-4)

for epoch in range(60):
    lr = adjust_learning_rate(optimizer, epoch)

    print(epoch)
    model.train()
    for text,labels in (train_loader):
        inputs=linesToTensor(text)
        inputs = inputs.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()
        output=model(inputs)

        loss=criterion(output,labels)
        loss.backward()
        optimizer.step()

    #validation
    model.eval()
    pred_sum = 0#model output
    gt_sum = 0#label
    tp_sum=0
    fp_sum=0
    fn_sum=0
    with torch.no_grad():
        for it, (text,labels) in enumerate(val_loader):
            inputs=linesToTensor(text)
            inputs = inputs.cuda()
            labels = labels.cuda()
            output=model(inputs)

            TP,FP,TN,FN,pred_len, gt_len=fmeasure(output.cpu(),labels.cpu())
            print(TP,FP,TN,FN,pred_len, gt_len)
            tp_sum += TP
            fp_sum += FP
            fn_sum += FN
            pred_sum += pred_len
            gt_sum += gt_len
        if tp_sum>0 and fp_sum>0 and fn_sum>0:
            precision = tp_sum/(tp_sum+fp_sum)
            recall = tp_sum / (tp_sum+fn_sum)
            f1 = (2*precision*recall / (precision + recall)) * 100
            print( tp_sum, fp_sum, fn_sum)
            print('[{}/{}], prec:{}, recall:{}, f1:{}'.format(it, len(val_loader), precision, recall, f1))
            torch.save(model.state_dict(),'{}'.format(str(epoch)+"train"))
        else:
            torch.save(model.state_dict(),'{}'.format(str(epoch)+"train"))


0
0 0 24 8 tensor(0) tensor(8)
0 0 23 9 tensor(0) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 30 2 tensor(0) tensor(2)
0 0 24 8 tensor(0) tensor(8)
0 0 21 11 tensor(0) tensor(11)
0 0 20 12 tensor(0) tensor(12)
0 0 30 2 tensor(0) tensor(2)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 28 4 tensor(0) tensor(4)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6

0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 30 2 tensor(0) tensor(2)
0 0 24 8 tensor(0) tensor(8)
0 0 21 11 tensor(0) tensor(11)
0 0 20 12 tensor(0) tensor(12)
0 0 30 2 tensor(0) tensor(2)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 28 4 tensor(0) tensor(4)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 24 8 tensor(0) tensor(8)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 29 3 tensor(0) tensor(3)
0 0 28 4 tensor(0) tensor(4)
0 0 22 10 tensor(0) tensor(10)
0 0 26 6

0 1 24 7 tensor(1) tensor(7)
1 0 29 2 tensor(1) tensor(3)
0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6 tensor(0) tensor(6)
0 1 23 8 tensor(1) tensor(8)
0 1 23 8 tensor(1) tensor(8)
0 1 25 6 tensor(1) tensor(6)
1 1 26 4 tensor(2) tensor(5)
0 1 27 4 tensor(1) tensor(4)
0 0 25 7 tensor(0) tensor(7)
1 0 28 3 tensor(1) tensor(4)
0 3 26 3 tensor(3) tensor(3)
0 0 28 4 tensor(0) tensor(4)
1 1 21 9 tensor(2) tensor(10)
0 0 26 6 tensor(0) tensor(6)
1 0 29 2 tensor(1) tensor(3)
0 1 26 5 tensor(1) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 24 8 tensor(0) tensor(8)
0 0 28 4 tensor(0) tensor(4)
0 1 24 7 tensor(1) tensor(7)
0 1 25 6 tensor(1) tensor(6)
0 0 29 3 tensor(0) tensor(3)
0 0 23 9 tensor(0) tensor(9)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 2 22 8 tensor(2) tensor(8)
0 1 24 7 tensor(1) tensor(7)
0 0 20 12 tensor(0) tensor(12)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 1 28 3 tensor(1) tensor(3)
2 0 22 5 te

6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
3 29 0 0 tensor(32) tensor(3)
8 24 0 0 tensor(32) tensor(8)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
9 23 0 0 tensor(32) tensor(9)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
8 24 0 0 tensor(32) tensor(8)
7 25 0 0 tensor(32) tensor(7)
12 20 0 0 tensor(32) tensor(12)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
3 29 0 0 tensor(32) tensor(3)
7 22 0 0 tensor(29) tensor(7)
13
1 2 22 7 tensor(3) tensor(8)
1 1 22 8 tensor(2) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 3 25 4 tensor(3) tensor(4)
1 3 24 4 tensor(4) tensor(5)
1 0 25 6 tensor(1) tensor(7)
1 1 27 3 tensor(2) tensor(4)
0 0 25 7 tensor(0) tensor(7)
2 2 25 3 tensor(4) tensor(5)
2 4 21 5 tensor(6) tensor(7)
3 2 25 2 tensor(5) tensor(5)
3 1 25 3 tensor(4) tensor(6)
0 2 25 5 tensor(2) tensor(5)
2 0 26 4 tensor(2)

0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 29 3 tensor(0) tensor(3)
0 0 22 7 tensor(0) tensor(7)
17
0 1 23 8 tensor(1) tensor(8)
0 0 23 9 tensor(0) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
1 1 26 4 tensor(2) tensor(5)
1 0 25 6 tensor(1) tensor(7)
0 1 27 4 tensor(1) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 1 26 5 tensor(1) tensor(5)
1 1 24 6 tensor(2) tensor(7)
2 0 27 3 tensor(2) tensor(5)
1 0 26 5 tensor(1) tensor(6)
0 1 26 5 tensor(1) tensor(5)
1 0 26 5 tensor(1) tensor(6)
0 2 25 5 tensor(2) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 30 2 tensor(0) tensor(2)
0 0 24 8 tensor(0) tensor(8)
0 0 21 11 tensor(0) tensor(11)
0 0 20 12 tensor(0) tensor(12)
1 1 29 1 tensor(2) tensor(2)
0 1 27 4 tensor(1) tensor(4)
2 2 23 5 tensor(4) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
1 0 26 5 tensor(1) tensor(6)
0 0 28 4 tensor(0) tensor(4)
0 0 28 4 tensor(0) tensor(4)
0 1 24 

3 18 10 1 tensor(21) tensor(4)
5 19 8 0 tensor(24) tensor(5)
6 18 7 1 tensor(24) tensor(7)
2 19 9 2 tensor(21) tensor(4)
6 15 10 1 tensor(21) tensor(7)
4 16 11 1 tensor(20) tensor(5)
6 18 7 1 tensor(24) tensor(7)
5 17 10 0 tensor(22) tensor(5)
5 17 9 1 tensor(22) tensor(6)
2 16 11 3 tensor(18) tensor(5)
6 18 8 0 tensor(24) tensor(6)
3 18 9 2 tensor(21) tensor(5)
2 4 19 7 tensor(6) tensor(9)
6 21 5 0 tensor(27) tensor(6)
5 17 7 3 tensor(22) tensor(8)
2 19 11 0 tensor(21) tensor(2)
6 18 6 2 tensor(24) tensor(8)
10 18 3 1 tensor(28) tensor(11)
6 11 9 6 tensor(17) tensor(12)
1 16 14 1 tensor(17) tensor(2)
3 17 11 1 tensor(20) tensor(4)
7 16 9 0 tensor(23) tensor(7)
3 21 8 0 tensor(24) tensor(3)
5 19 8 0 tensor(24) tensor(5)
4 20 6 2 tensor(24) tensor(6)
4 23 5 0 tensor(27) tensor(4)
2 15 13 2 tensor(17) tensor(4)
6 20 5 1 tensor(26) tensor(7)
2 21 8 1 tensor(23) tensor(3)
3 17 9 3 tensor(20) tensor(6)
0 22 9 1 tensor(22) tensor(1)
5 17 9 1 tensor(22) tensor(6)
6 16 8 2 tensor(22) tensor(8)

6 26 0 0 tensor(32) tensor(6)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
4 28 0 0 tensor(32) tensor(4)
3 29 0 0 tensor(32) tensor(3)
4 28 0 0 tensor(32) tensor(4)
10 22 0 0 tensor(32) tensor(10)
6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
3 29 0 0 tensor(32) tensor(3)
8 24 0 0 tensor(32) tensor(8)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
9 23 0 0 tensor(32) tensor(9)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
8 24 0 0 tensor(32) tensor(8)
7 25 0 0 tensor(32) tensor(7)
12 20 0 0 tensor(32) tensor(12)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
3 29 0 0 tensor(32) tensor(3)
7 22 0 0 tensor(29) tensor(7)
27
0 0 24 8 tensor(0) tensor(8)
0 0 23 9 tensor(0) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7

6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
9 23 0 0 tensor(32) tensor(9)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
8 24 0 0 tensor(32) tensor(8)
7 25 0 0 tensor(32) tensor(7)
12 20 0 0 tensor(32) tensor(12)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
3 29 0 0 tensor(32) tensor(3)
7 22 0 0 tensor(29) tensor(7)
31
0 0 24 8 tensor(0) tensor(8)
0 0 23 9 tensor(0) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 30 2 tensor(0) tensor(2)
0 0 24 8 tensor(0) tensor(8)
0 0 21 11 tensor(0) tensor(11)
0 0 20 12 tensor(0) tens

5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
5 27 0 0 tensor(32) tensor(5)
6 26 0 0 tensor(32) tensor(6)
5 27 0 0 tensor(32) tensor(5)
6 26 0 0 tensor(32) tensor(6)
5 27 0 0 tensor(32) tensor(5)
9 23 0 0 tensor(32) tensor(9)
6 26 0 0 tensor(32) tensor(6)
8 24 0 0 tensor(32) tensor(8)
2 30 0 0 tensor(32) tensor(2)
8 24 0 0 tensor(32) tensor(8)
11 21 0 0 tensor(32) tensor(11)
12 20 0 0 tensor(32) tensor(12)
2 30 0 0 tensor(32) tensor(2)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
3 29 0 0 tensor(32) tensor(3)
5 27 0 0 tensor(32) tensor(5)
6 26 0 0 tensor(32) tensor(6)
4 28 0 0 tensor(32) tensor(4)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
3 29 0 0 tensor(32) tensor(3)
6 26 0 0 tensor(32) tensor(6)
1 31 0 0 tensor(32) tensor(1)
6 26 0 0 tensor(32) tensor(6)
8 24 0 0 tensor(32) tensor(8)
8 24 0 0 tensor(32) tensor(8)
6 26 0

0 0 20 12 tensor(0) tensor(12)
0 0 30 2 tensor(0) tensor(2)
0 0 28 4 tensor(0) tensor(4)
1 0 25 6 tensor(1) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
1 0 26 5 tensor(1) tensor(6)
0 0 28 4 tensor(0) tensor(4)
0 0 28 4 tensor(0) tensor(4)
0 1 24 7 tensor(1) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 24 8 tensor(0) tensor(8)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
1 0 28 3 tensor(1) tensor(4)
0 2 27 3 tensor(2) tensor(3)
0 0 28 4 tensor(0) tensor(4)
0 0 22 10 tensor(0) tensor(10)
0 0 26 6 tensor(0) tensor(6)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 24 8 tensor(0) tensor(8)
0 0 28 4 tensor(0) tensor(4)
0 1 24 7 tensor(1) tensor(7)
0 0 26 6 tensor(0) tensor(6)
0 0 29 3 tensor(0) tensor(3)
0 0 23 9 t

0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 24 8 tensor(0) tensor(8)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 29 3 tensor(0) tensor(3)
0 0 28 4 tensor(0) tensor(4)
0 0 22 10 tensor(0) tensor(10)
0 0 26 6 tensor(0) tensor(6)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 24 8 tensor(0) tensor(8)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 26 6 tensor(0) tensor(6)
0 0 29 3 tensor(0) tensor(3)
0 0 23 9 tensor(0) tensor(9)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 24 8 tensor(0) tensor(8)
0 0 25 7 tensor(0) tensor(7)
0 0 20 12 tensor(0) tensor(12)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 29 3 tensor(0) tensor(3)
0 0 22 7 tensor(0) tensor(7)
44
0 0 24 8 tensor(0) tensor(8)
0 0 23 

3 29 0 0 tensor(32) tensor(3)
8 24 0 0 tensor(32) tensor(8)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
6 26 0 0 tensor(32) tensor(6)
3 29 0 0 tensor(32) tensor(3)
9 23 0 0 tensor(32) tensor(9)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
8 24 0 0 tensor(32) tensor(8)
7 25 0 0 tensor(32) tensor(7)
12 20 0 0 tensor(32) tensor(12)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
3 29 0 0 tensor(32) tensor(3)
7 22 0 0 tensor(29) tensor(7)
48
0 0 24 8 tensor(0) tensor(8)
0 0 23 9 tensor(0) tensor(9)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) ten

2 16 12 2 tensor(18) tensor(4)
1 17 12 2 tensor(18) tensor(3)
5 11 11 2 tensor(16) tensor(7)
226 931 153
[64/65], prec:0.19533275713050993, recall:0.5963060686015831, f1:29.427083333333332
52
8 24 0 0 tensor(32) tensor(8)
9 23 0 0 tensor(32) tensor(9)
5 27 0 0 tensor(32) tensor(5)
4 28 0 0 tensor(32) tensor(4)
5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
5 27 0 0 tensor(32) tensor(5)
7 25 0 0 tensor(32) tensor(7)
5 27 0 0 tensor(32) tensor(5)
6 26 0 0 tensor(32) tensor(6)
5 27 0 0 tensor(32) tensor(5)
6 26 0 0 tensor(32) tensor(6)
5 27 0 0 tensor(32) tensor(5)
9 23 0 0 tensor(32) tensor(9)
6 25 1 0 tensor(31) tensor(6)
8 24 0 0 tensor(32) tensor(8)
2 29 1 0 tensor(31) tensor(2)
8 24 0 0 tensor(32) tensor(8)
11 21 0 0 tensor(32) tensor(11)
11 20 0 1 tensor(31) tensor(12)
2 30 0 0 tensor(32) tensor(2)
4 28 0 0 tensor(32) tensor(4)
7 25 0 0 tensor(32) tensor(7)
3 29 0 0 tensor(32) tensor(3)
5 27 0 0 tensor(32) tens

0 0 25 7 tensor(0) tensor(7)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 23 9 tensor(0) tensor(9)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 30 2 tensor(0) tensor(2)
0 0 24 8 tensor(0) tensor(8)
0 0 21 11 tensor(0) tensor(11)
0 0 20 12 tensor(0) tensor(12)
0 0 30 2 tensor(0) tensor(2)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 27 5 tensor(0) tensor(5)
0 0 26 6 tensor(0) tensor(6)
0 0 28 4 tensor(0) tensor(4)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 29 3 tensor(0) tensor(3)
0 0 26 6 tensor(0) tensor(6)
0 0 31 1 tensor(0) tensor(1)
0 0 26 6 tensor(0) tensor(6)
0 0 24 8 tensor(0) tensor(8)
0 0 24 8 tensor(0) tensor(8)
0 0 26 6 tensor(0) tensor(6)
0 0 27 5 tensor(0) tensor(5)
0 0 28 4 tensor(0) tensor(4)
0 0 25 7 tensor(0) tensor(7)
0 0 28 4 tensor(0) tensor(4)
0 0 29 3 tensor(0) tensor(3)
0 0 28 4 t

In [23]:
#test
test=chat_ds('test')
test_loader=torch.utils.data.DataLoader(test,batch_size=32)
checkpoint=torch.load('./21train',map_location='cuda:0')
model.load_state_dict(checkpoint)
model.cuda()
model.eval()
pred_sum = 0#model output
gt_sum = 0#label
tp_sum=0
fp_sum=0
fn_sum=0
with torch.no_grad():
    for it, (text,labels) in enumerate(test_loader):
        inputs=linesToTensor(text)
        inputs = inputs.cuda()
        labels = labels.cuda()
        output=model(inputs)

        TP,FP,TN,FN,pred_len, gt_len=fmeasure(output.cpu(),labels.cpu())
        print(TP,FP,TN,FN,pred_len, gt_len)
        tp_sum += TP
        fp_sum += FP
        fn_sum += FN
        pred_sum += pred_len
        gt_sum += gt_len
    if tp_sum>0 and fp_sum>0 and fn_sum>0:
        precision = tp_sum/(tp_sum+fp_sum)
        recall = tp_sum / (tp_sum+fn_sum)
        f1 = (2*precision*recall / (precision + recall)) * 100
        print( tp_sum, fp_sum, fn_sum)
        print('[{}/{}], prec:{}, recall:{}, f1:{}'.format(it, len(val_loader), precision, recall, f1))

data load fin
0 16 16 0 tensor(16) tensor(0)
0 22 10 0 tensor(22) tensor(0)
0 16 16 0 tensor(16) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 12 20 0 tensor(12) tensor(0)
0 20 12 0 tensor(20) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 14 18 0 tensor(14) tensor(0)
0 16 16 0 tensor(16) tensor(0)
0 18 14 0 tensor(18) tensor(0)
0 18 14 0 tensor(18) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 11 21 0 tensor(11) tensor(0)
0 9 23 0 tensor(9) tensor(0)
0 12 20 0 tensor(12) tensor(0)
0 9 23 0 tensor(9) tensor(0)
3 11 14 4 tensor(14) tensor(7)
0 12 20 0 tensor(12) tensor(0)
0 8 24 0 tensor(8) tensor(0)
0 16 16 0 tensor(16) tensor(0)
0 13 19 0 tensor(13) tensor(0)
0 19 13 0 tensor(19) tensor(0)
6 6 12 8 tensor(12) tensor(14)
7 12 7 6 tensor(19) tensor(13)
0 20 12 0 tensor(20) tensor(0)
6 14 9 3 tensor(20) tensor(9)
9 18 5 0 tensor(27) tensor(9)
0 18 14 0 tensor(18) tensor(0)
0 24 8 0 tensor(24) tensor(0)
0 11 21 0 tensor(11) tensor(0)
9 10 7 6 tensor(19) tensor(15)
0 19 13 0 tensor(19) tensor(0)
0 1

0 24 8 0 tensor(24) tensor(0)
19 12 0 1 tensor(31) tensor(20)
9 11 2 10 tensor(20) tensor(19)
0 20 12 0 tensor(20) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 25 7 0 tensor(25) tensor(0)
0 18 14 0 tensor(18) tensor(0)
0 12 20 0 tensor(12) tensor(0)
0 21 11 0 tensor(21) tensor(0)
7 10 13 2 tensor(17) tensor(9)
9 21 2 0 tensor(30) tensor(9)
0 21 11 0 tensor(21) tensor(0)
0 22 10 0 tensor(22) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 16 16 0 tensor(16) tensor(0)
0 14 18 0 tensor(14) tensor(0)
3 19 10 0 tensor(22) tensor(3)
23 0 0 9 tensor(23) tensor(32)
0 28 4 0 tensor(28) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 19 13 0 tensor(19) tensor(0)
20 6 3 3 tensor(26) tensor(23)
4 19 9 0 tensor(23) tensor(4)
2 27 3 0 tensor(29) tensor(2)
4 24 3 1 tensor(28) tensor(5)
22 0 0 10 tensor(22) tensor(32)
15 10 6 1 tensor(25) tensor(16)
0 23 9 0 tensor(23) tensor(0)
0 18 14 0 tensor(18) tensor(0)
8 10 13 1 tensor(18) tensor(9)
22 4 1 5 tensor(26) tensor(27)
0 24 8 0 tensor(24) tensor(0)
1 30 1 0 tens

0 18 14 0 tensor(18) tensor(0)
0 13 19 0 tensor(13) tensor(0)
0 22 10 0 tensor(22) tensor(0)
0 18 14 0 tensor(18) tensor(0)
8 10 7 7 tensor(18) tensor(15)
10 19 2 1 tensor(29) tensor(11)
0 23 9 0 tensor(23) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 14 18 0 tensor(14) tensor(0)
4 11 11 6 tensor(15) tensor(10)
0 20 12 0 tensor(20) tensor(0)
0 20 12 0 tensor(20) tensor(0)
3 12 12 5 tensor(15) tensor(8)
18 0 0 14 tensor(18) tensor(32)
20 0 0 12 tensor(20) tensor(32)
14 9 2 7 tensor(23) tensor(21)
0 22 10 0 tensor(22) tensor(0)
2 12 14 4 tensor(14) tensor(6)
24 0 0 8 tensor(24) tensor(32)
30 0 0 2 tensor(30) tensor(32)
18 12 2 0 tensor(30) tensor(18)
0 25 7 0 tensor(25) tensor(0)
0 25 7 0 tensor(25) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 25 7 0 tensor(25) tensor(0)
0 25 7 0 tensor(25) tensor(0)
0 21 11 0 tensor(21) tensor(0)
14 7 8 3 tensor(21) tensor(17)
0 29 3 0 tensor(29) tensor(0)
0 27 5 0 tensor(27) tensor(0)
18 11 3 0 

0 18 14 0 tensor(18) tensor(0)
6 21 3 2 tensor(27) tensor(8)
7 14 9 2 tensor(21) tensor(9)
7 21 4 0 tensor(28) tensor(7)
29 0 0 3 tensor(29) tensor(32)
7 16 8 1 tensor(23) tensor(8)
0 25 7 0 tensor(25) tensor(0)
0 23 9 0 tensor(23) tensor(0)
0 23 9 0 tensor(23) tensor(0)
0 16 16 0 tensor(16) tensor(0)
8 11 8 5 tensor(19) tensor(13)
18 10 1 3 tensor(28) tensor(21)
3 21 6 2 tensor(24) tensor(5)
0 20 12 0 tensor(20) tensor(0)
14 9 5 4 tensor(23) tensor(18)
0 25 7 0 tensor(25) tensor(0)
0 20 12 0 tensor(20) tensor(0)
0 16 16 0 tensor(16) tensor(0)
0 25 7 0 tensor(25) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 19 13 0 tensor(19) tensor(0)
1 22 8 1 tensor(23) tensor(2)
19 10 0 3 tensor(29) tensor(22)
5 15 7 5 tensor(20) tensor(10)
0 28 4 0 tensor(28) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 24 8 0 tensor(24) tensor(0)
3 22 5 2 tensor(25) tensor(5)
26 3 0 3 tensor(29) tensor(29)
0 26 6 0 tensor(26) tensor(0)
0 18 14 0 tensor(18) tensor(0)
0 21 11 0 tensor(21) tensor(0)
0 25 7 0 tensor(25) t

3 13 13 3 tensor(16) tensor(6)
0 26 6 0 tensor(26) tensor(0)
0 17 15 0 tensor(17) tensor(0)
0 15 17 0 tensor(15) tensor(0)
7 12 9 4 tensor(19) tensor(11)
9 18 3 2 tensor(27) tensor(11)
0 23 9 0 tensor(23) tensor(0)
1 16 13 2 tensor(17) tensor(3)
24 0 0 8 tensor(24) tensor(32)
9 16 4 3 tensor(25) tensor(12)
1 17 14 0 tensor(18) tensor(1)
9 10 6 7 tensor(19) tensor(16)
1 18 11 2 tensor(19) tensor(3)
3 16 13 0 tensor(19) tensor(3)
0 14 18 0 tensor(14) tensor(0)
0 12 20 0 tensor(12) tensor(0)
0 27 5 0 tensor(27) tensor(0)
0 24 8 0 tensor(24) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 13 19 0 tensor(13) tensor(0)
2 11 14 5 tensor(13) tensor(7)
6 11 8 7 tensor(17) tensor(13)
1 12 19 0 tensor(13) tensor(1)
2 21 8 1 tensor(23) tensor(3)
0 20 12 0 tensor(20) tensor(0)
0 26 6 0 tensor(26) tensor(0)
0 19 13 0 tensor(19) tensor(0)
0 15 17 0 tensor(15) tensor(0)
0 10 22 0 tensor(10) tensor(0)
13 8 5 6 tensor(21) tensor(19)
20 4 0 8 tensor(24) tensor(28)
0 18 14 0 tensor(18) tensor(0)
0 17 15 0 tens

In [18]:
len(test_loader)

1496

## Chat data .json & label.json data


In [43]:
import pandas as pd
metadata=pd.read_csv('../share_data/new_metadata.csv')


In [44]:
import os
import math
data1={}
for idx,gi in enumerate(os.listdir("./result")):
    print(gi)
    gameid=gi.split('_')[0]
    chat_data=pd.read_csv('./result/'+gi)
    delay=int(gi.split('_')[1])
    duration=float(gi.split('_')[2][:-4])
    gameid=int(gi.split('_')[0])
    meta=metadata.loc[metadata['gameid']==gameid]

    st=meta['new_st']
    et=meta['new_et']

    chat=[' ' for i in range(math.floor(duration)+math.floor(delay)+1)]


    init_time=int(st)-int(delay)
    for c_idx,c in enumerate(chat_data['duration']):
        chat[int(c)-init_time]=chat[int(c)-init_time]+str(chat_data['chat'][c_idx])+'\n'
    data1[idx]=chat
    


102844412704890154_0_2079.0.csv
102844224147717245_0_1836.000000000001.csv
102844235748703677_0_1829.0.csv
102844412723567946_0_2159.0.csv
102844412708953395_0_2060.0.csv
102844212429092040_179_2254.0.csv
102844341908026005_0_2917.999999999998.csv
102844235753749959_0_2218.0.csv
102844294674286796_0_2680.0.csv
102844412721339716_0_1821.0.csv
102844341906256529_0_1751.0000000000018.csv
102844412722519367_0_2072.0.csv
102844235747982779_0_2731.0.csv
102844412716686654_0_1647.0.csv
102844401154168486_0_1924.0.csv
102844412704496937_0_2632.0.csv
102844212430075086_0_2282.999999999999.csv
102844212429288649_0_2922.0.csv
102844235750997440_1_1675.0.csv
102844294670026952_0_1708.0.csv
102844224148896895_0_1807.0.csv
102844294669109446_0_1562.0.csv
102844212431516886_0_1821.0.csv
102844294674876621_0_1718.0.csv
102844401154430631_0_1655.0000000000036.csv
102844401151874719_0_2103.0.csv
102844412712164667_0_2088.0.csv
102844294670878922_0_2199.0.csv
102844412705545516_0_1541.0.csv
1028444127052

In [45]:
import json
with open('chat.json', 'a') as f:
    json.dump(data1, f, indent=2)

In [46]:
with open('../share_data/label.json','r') as f:
    labels=json.load(f)
data2={}
for idx,gi in enumerate(os.listdir("./result")):
    if '.csv' in gi:
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]

        st=meta['new_st']
        et=meta['new_et']

        gt=[0 for i in range(math.floor(duration)+math.floor(delay)+1)]    
        label=labels[str(gameid)]
        for (x,y) in label:
            start=int(x.split(':')[0])*60+int(x.split(':')[1])
            end=int(y.split(':')[0])*60+int(y.split(':')[1])
            for i in range(start,end+1):
                gt[i]=1  
        data2[idx]=gt


102844412704890154_0_2079.0.csv
102844224147717245_0_1836.000000000001.csv
102844235748703677_0_1829.0.csv
102844412723567946_0_2159.0.csv
102844412708953395_0_2060.0.csv
102844212429092040_179_2254.0.csv
102844341908026005_0_2917.999999999998.csv
102844235753749959_0_2218.0.csv
102844294674286796_0_2680.0.csv
102844412721339716_0_1821.0.csv
102844341906256529_0_1751.0000000000018.csv
102844412722519367_0_2072.0.csv
102844235747982779_0_2731.0.csv
102844412716686654_0_1647.0.csv
102844401154168486_0_1924.0.csv
102844412704496937_0_2632.0.csv
102844212430075086_0_2282.999999999999.csv
102844212429288649_0_2922.0.csv
102844235750997440_1_1675.0.csv
102844294670026952_0_1708.0.csv
102844224148896895_0_1807.0.csv
102844294669109446_0_1562.0.csv
102844212431516886_0_1821.0.csv
102844294674876621_0_1718.0.csv
102844401154430631_0_1655.0000000000036.csv
102844401151874719_0_2103.0.csv
102844412712164667_0_2088.0.csv
102844294670878922_0_2199.0.csv
102844412705545516_0_1541.0.csv
1028444127052

In [47]:
import json
with open('label.json', 'a') as f:
    json.dump(data2, f, indent=2)

## Old ver

In [None]:
data=[]
for idx,gi in enumerate(os.listdir("./result")):
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]
        
        st=meta['new_st']
        et=meta['new_et']
        
        data=['' for i in range(math.floor(duration)+math.floor(delay)+1)]


        init_time=int(st)-int(delay)
        for c_idx,c in enumerate(chat_data['duration']):
            data[int(c)-init_time]=data[int(c)-init_time]+str(chat_data['chat'][c_idx])+'\n'
        data=np.array(data)
        np.save('./exp_chat/{}'.format(gameid),data)


In [None]:
os.listdir("./result")
#metadata=pd.read_csv('new_metadata.csv')

In [None]:
metadata.loc[metadata['gameid']==102844212430271695]['new_st']

In [2]:
metadata

NameError: name 'metadata' is not defined

In [45]:
if __name__ == "__main__":
    chat_data='./exp_chat/102844412706659630.npy'
    chat_arr=np.load(chat_data)

In [49]:
chat_arr[4]

'Slow down kobe\nI love Kobe hahah\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime\nI CALLED IT ON R/RENGARMAINS\nRangar top Kreygasm Kreygasm Kreygasm\nMorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC !\nmonkaW HOLD ME CHAT\nQUICKSHOT PepeLaugh\nTREVOR 1\nHEY GUYS WHEN IS NA PLAYING? Pepega\nLULW\nHeHe\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC\nJebaited Jebaited\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime\n'

In [29]:
aa=linesToTensor(chat_arr)

In [35]:
aa.size()

torch.Size([2435, 642, 100])

In [42]:
aa[0][-2]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [53]:
data=[]
metadata=pd.read_csv('../share_data/new_metadata.csv')
for idx,gi in enumerate(['102844412706659630_0_1567.0.csv']):
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]
        
        st=meta['new_st']
        et=meta['new_et']
        
        data=['' for i in range(math.floor(duration)+math.floor(delay)+1)]


        init_time=int(st)-int(delay)
        for c_idx,c in enumerate(chat_data['duration']):
            data[int(c)-init_time]=data[int(c)-init_time]+str(chat_data['chat'][c_idx])+'\n'
        data=np.array(data)
        np.save('./exp_chat/{}'.format('test'),data)


102844412706659630_0_1567.0.csv


In [11]:
data1.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112])

In [15]:
data2[0]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [48]:
        with open('./label.json') as f1:  
            gt=json.load(f1)

In [36]:
gt[0]

KeyError: 0

In [34]:
        with open('./chat.json') as f1:  
            gt=json.load(f1)

In [36]:
len(gt['1'])

1837

In [42]:
os.listdir('./result/')

['102844412704890154_0_2079.0.csv',
 '102844224147717245_0_1836.000000000001.csv',
 '102844235748703677_0_1829.0.csv',
 '102844412723567946_0_2159.0.csv',
 '102844412708953395_0_2060.0.csv',
 '102844212429092040_179_2254.0.csv',
 '102844341908026005_0_2917.999999999998.csv',
 '102844235753749959_0_2218.0.csv',
 '102844294674286796_0_2680.0.csv',
 '102844412721339716_0_1821.0.csv',
 '102844341906256529_0_1751.0000000000018.csv',
 '102844412722519367_0_2072.0.csv',
 '102844235747982779_0_2731.0.csv',
 '102844412716686654_0_1647.0.csv',
 '102844401154168486_0_1924.0.csv',
 '102844412704496937_0_2632.0.csv',
 '102844212430075086_0_2282.999999999999.csv',
 '102844212429288649_0_2922.0.csv',
 '102844235750997440_1_1675.0.csv',
 '102844294670026952_0_1708.0.csv',
 '102844224148896895_0_1807.0.csv',
 '102844294669109446_0_1562.0.csv',
 '102844212431516886_0_1821.0.csv',
 '102844294674876621_0_1718.0.csv',
 '102844401154430631_0_1655.0000000000036.csv',
 '102844401151874719_0_2103.0.csv',
 '102