In [2]:
import numpy as np
import torch
import string
import torch.nn as nn
from torch.autograd import Variable
from torch.nn.utils.rnn import pad_packed_sequence as unpack
from torch.nn.utils.rnn import pack_padded_sequence as pack
import math
import torch.utils.data as data
import json
import os
import pandas as pd
import random
import copy
import torch.utils.data.sampler as sampler
import visdom


In [3]:
all_letters = string.printable
n_letters = len(all_letters)
def letterToIndex(letter):
    return all_letters.find(letter)
def linesToTensor(lines):
    line_length = 15000
    if max([ len(line) for line in lines]) < line_length:
        line_length = max( [len(line) for line in lines] )
    #line_length = max( [len(line) for line in lines] )
    #xx = [max(len(line)-15000,0) for line in lines]
    #print float(np.sum(xx)) / float(np.sum([len(line) for line in lines]))
    tensor = torch.zeros(len(lines), line_length, n_letters)
    for b, line in enumerate(lines): 
        line = line[:15000]
        for li, letter in enumerate(line):
            tensor[b][li + line_length - len(line)][letterToIndex(letter)] = 1 #뒤로 맞춰줌

    return tensor

In [4]:
class LangModel(nn.Module):
    def __init__(self, preTrained='True', input=100):
        super(LangModel, self).__init__()

        # Language Model
        self.lang = nn.LSTM(input, 128, 3, batch_first=True) 
 
        # Output 
        self.output = nn.Linear(128, 2)
        n = self.output.in_features * self.output.out_features
        self.output.weight.data.normal_(0, math.sqrt(2. / n))
        self.output.bias.data.zero_()

    def forward(self, text):
        text.cuda()
        h0 = ( Variable(torch.zeros(3, text.size(0), 128)).cuda(),  Variable(torch.zeros(3, text.size(0), 128)).cuda())

        lang_feature, hn = self.lang(text, h0 )
        lang_feature = lang_feature[:,-1,:]

        pred = self.output(lang_feature)
        return pred


In [None]:
import torch.utils.data as data

class chat_ds(data.Dataset):
    def __init__(self,d_type):
        self.gt_range =  1-0.25
        self.d_type=d_type
        with open('./label.json') as f1:  
            self.gt=json.load(f1)
        with open('./chat.json') as f2:  
            self.text=json.load(f2)
        self.sample=list(map(int,list(self.gt.keys())))
        if d_type=='train':
            self.sample=self.sample[:60]
        if d_type=='val':
            self.sample=self.sample[60:90]
        if d_type=='test':
            self.sample=self.sample[90:]
            
        self.WeightedSampling=[]
        for i in self.sample:
            self.WeightedSampling.extend(copy.copy(self.gt[str(i)]))
        
        sampling = np.array(self.WeightedSampling)
        neg_idx = np.where(sampling == 0)[0] #general
        pos_idx = np.where(sampling == 1)[0] #highlight
        sampling = sampling.astype(np.float32)

        begin_pos = 0 
        hl_frames = []
        for it, cur_pos in enumerate(pos_idx):
            if it+1 < len(pos_idx): 
                if((pos_idx[it+1] - cur_pos) > 1):#cur_pos와 cur_pos+1 간격이 1보다 크면, 즉 다른 구간이면
                    begin = int((it+1 - begin_pos) * self.gt_range) + begin_pos
                    hl_frames.extend( pos_idx[begin: it] ) #한구간의 하이라이트 25%만 사용하겠다.
                    begin_pos = it+1


        sampling.fill(0)
        sampling[neg_idx] = len(sampling) / float(len(neg_idx))
       # self.WeightedSampling[pos_idx] = len(self.WeightedSampling) / float(len(pos_idx))
        sampling[hl_frames] = len(sampling) / float(len(hl_frames))
        self.WeightedSampling = sampling

        self.sum=np.insert(np.cumsum([len(self.gt[str(i)]) for i in self.sample]),0,0)
        print("data load fin")

        
    def __len__(self):
        return len(self.WeightedSampling)
    def __getitem__(self,index):
        if self.d_type=='train':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]
            vframe=index-self.sum[vid]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]+'\n'
            label=self.gt[str(vid)][vframe]
            return win_text,label
        if self.d_type=='val':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]+60
            vframe=index-self.sum[vid-60]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]+'\n'
            label=self.gt[str(vid)][vframe]
            return win_text,label
        if self.d_type=='test':
            vid=np.histogram(index,self.sum)
            vid = np.where(vid[0]>0)[0][0]+90
            vframe=index-self.sum[vid-90]

            win_text=''
            for idx in range(7): #7 : window size
                if vframe+idx<len(self.text[str(vid)]):
                    win_text+=self.text[str(vid)][vframe+idx]+'\n'
            label=self.gt[str(vid)][vframe]
            return win_text,label
        
            
            



In [45]:
class SampleSequentialSampler(sampler.Sampler):
    """Samples elements sequentially, always in the same order.
    Arguments:
        data_source (Dataset): dataset to sample from
        offset (int): offset between the samples
    """

    def __init__(self, data_source, offset=10):
        self.num_samples = len(data_source) 
        self.offset = offset

    def __iter__(self):
        return iter(np.arange(0, self.num_samples, self.offset ))

    def __len__(self):
        return len(np.arange(0, self.num_samples, self.offset ))


In [46]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    if epoch<20:
        lr = 0.01
    else:
        lr = 0.001
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

    return lr

In [47]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    torch.save(state, filename)
    if is_best:
        shutil.copyfile(filename, 'model_best.pth.tar')

In [48]:
def fmeasure(output, target):
    _, pred = output.topk(1, 1, True, True)
    pred = pred.view(-1,1)
    target = target.view(-1,1)

    #overlap = ((pred== 1) + (target == 1)).gt(1)
    #overlap = overlap.view(-1,1)
    TP = len(np.where((pred==1)&(target==1)==True)[0]) # True positive
    FP = len(np.where((pred==1)&(target==0)==True)[0]) # Condition positive = TP + FN
    TN = len(np.where((pred==0)&(target==0)==True)[0])
    FN = len(np.where((pred==0)&(target==1)==True)[0])

    
    #overlap_len = overlap.data.long().sum()
    pred_len = pred.data.long().sum()
    gt_len   =  target.data.long().sum()

    return TP,FP,TN,FN,pred_len, gt_len

In [49]:
def clip_gradient(model, clip_norm):
    """Computes a gradient clipping coefficient based on gradient norm."""
    totalnorm = 0
    for pm in model.parameters():
        if pm.requires_grad:
            if pm.grad is not None:
                modulenorm = pm.grad.data.norm()
                totalnorm += modulenorm ** 2

    totalnorm = np.sqrt(totalnorm.cpu())
    norm = clip_norm / max(totalnorm, clip_norm)
    for p in model.parameters():
        if p.requires_grad:
            if p.grad is not None:
                p.grad.mul_(norm)

In [58]:
#game_id='102844212428895431'
train=chat_ds('train')
val=chat_ds('val')

data load fin
data load fin


In [59]:
sampler1 = torch.utils.data.sampler.WeightedRandomSampler(weights=train.WeightedSampling.tolist(), num_samples=10000)
train_loader=torch.utils.data.DataLoader(train,batch_size=32,sampler=sampler1)
sampler2 =  SampleSequentialSampler(val, 30)
val_loader=torch.utils.data.DataLoader(val,batch_size=32,sampler= sampler2)

In [60]:
###### model load #####
model=LangModel().cuda()
criterion = nn.CrossEntropyLoss().cuda()

In [61]:
weight_dir='./1e-2_1e-3_newdata2/'

In [None]:
#train set

if not os.path.exists(weight_dir):
    os.makedirs(weight_dir)
optimizer = torch.optim.SGD(model.parameters(), 0.01,momentum=0.9,weight_decay=1e-4)
with open(weight_dir+'train_result','a') as f:
    f.write('========= >20 0.01 / <20 0.001 ================\n')


best_losses=1000000
best_acc
for epoch in range(60):
    lr = adjust_learning_rate(optimizer, epoch)
    train_loss=0
    print(epoch)
    model.train()
    for i, (text,labels) in enumerate(train_loader):
        inputs=linesToTensor(text)
        inputs = Variable(inputs).cuda()
        labels = Variable(labels).cuda()
        optimizer.zero_grad()
        output=model(inputs)

        loss=criterion(output,labels)
        train_loss+=loss
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.) #rnn 계열 gradient 장치
        loss.backward()
        optimizer.step()


    #validation
    model.eval()
    val_loss=0
    acc=0
    gt_sum=0
    with open(weight_dir+'train_result','a') as f:
        with torch.no_grad():
            for it, (text,labels) in enumerate(val_loader):
                inputs=linesToTensor(text)
                inputs = Variable(inputs).cuda()
                labels = Variable(labels).cuda()
                output=model(inputs)
                loss=criterion(output,labels)
                val_loss+=loss
                TP,FP,TN,FN,pred_len, gt_len=fmeasure(output.cpu(),labels.cpu())
                acc=acc+TP+TN
                gt_sum+=len(output)
            val_acc=acc/gt_sum
            print("epoch {} train_loss : {} , val_loss : {}, val_acc : {}".format(epoch,train_loss,val_loss,val_acc))
            f.write("epoch {} train_loss : {} , val_loss : {}, val_acc : {}\n".format(epoch,train_loss,val_loss,val_acc))
            if best_losses>val_loss:
                best_losses=val_loss
                torch.save(model.state_dict(),'{}'.format(weight_dir+str(epoch)+"train_best"))
                f.write("epoch {} saved\n".format(epoch))
            else:
                torch.save(model.state_dict(),'{}'.format(weight_dir+str(epoch)+"train"))



0
epoch 0 train_loss : 217.11148071289062 , val_loss : 46.5375862121582, val_acc : 0.18247472315840155
1
epoch 1 train_loss : 217.239013671875 , val_loss : 48.0689582824707, val_acc : 0.18247472315840155
2
epoch 2 train_loss : 217.10658264160156 , val_loss : 46.23237228393555, val_acc : 0.18247472315840155
3
epoch 3 train_loss : 217.06063842773438 , val_loss : 44.341339111328125, val_acc : 0.8175252768415985
4
epoch 4 train_loss : 217.06613159179688 , val_loss : 46.230003356933594, val_acc : 0.18247472315840155
5
epoch 5 train_loss : 216.9745635986328 , val_loss : 45.27091598510742, val_acc : 0.18632643235435725
6
epoch 6 train_loss : 216.79965209960938 , val_loss : 41.953731536865234, val_acc : 0.8175252768415985
7
epoch 7 train_loss : 216.8076934814453 , val_loss : 46.055999755859375, val_acc : 0.18247472315840155
8
epoch 8 train_loss : 216.58135986328125 , val_loss : 46.48965835571289, val_acc : 0.18247472315840155
9
epoch 9 train_loss : 216.36630249023438 , val_loss : 47.4586601257

In [57]:
#test

test=chat_ds('test')
test_loader=torch.utils.data.DataLoader(test,batch_size=32)
dataset=weight_dir+'./19train_best'
checkpoint=torch.load(dataset,map_location='cuda:0')
model.load_state_dict(checkpoint)
model.cuda()
model.eval()
pred_sum = 0#model output
gt_sum = 0#label
tp_sum=0
fp_sum=0
fn_sum=0
acc=0
sum=0
with torch.no_grad():
    for it, (text,labels) in enumerate(test_loader):
        inputs=linesToTensor(text)
        inputs = inputs.cuda()
        labels = labels.cuda()
        output=model(inputs)

        TP,FP,TN,FN,pred_len, gt_len=fmeasure(output.cpu(),labels.cpu())
        print(TP,FP,TN,FN,pred_len, gt_len)
        tp_sum += TP
        fp_sum += FP
        fn_sum += FN
        pred_sum += pred_len
        gt_sum += gt_len
        acc=acc+TP+TN
        sum+=len(output)
    with open(weight_dir+'/train_result','a') as f:
        if tp_sum>0 and fp_sum>0 and fn_sum>0:
            precision = tp_sum/(tp_sum+fp_sum)
            recall = tp_sum / (tp_sum+fn_sum)
            f1 = (2*precision*recall / (precision + recall)) * 100
            accuracy=acc/sum
            print( tp_sum, fp_sum, fn_sum)
            print('[{}/{}], prec:{}, recall:{}, f1:{}, acc: {}'.format(it, len(val_loader), precision, recall, f1,accuracy))
            f.write('{}, prec:{}, recall:{}, f1:{}, acc : {}\n'.format(dataset, precision, recall, f1,accuracy))

data load fin
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 25 7 tensor(0) tensor(7)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 18 14 tensor(0) tensor(14)
0 0 19 13 tensor(0) tensor(13)
0 0 32 0 tensor(0) tensor(0)
0 0 23 9 tensor(0) tensor(9)
0 0 23 9 tensor(0) tensor(9)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 17 15 tensor(0) tensor(15)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tens

0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 4 28 tensor(0) tensor(28)
0 0 29 3 tensor(0) tensor(3)
0 0 29 3 tensor(0) tensor(3)
0 0 8 24 tensor(0) tensor(24)
0 0 32 0 tensor(0) tensor(0)
0 0 27 5 tensor(0) tensor(5)
0 0 13 19 tensor(0) tensor(19)
0 0 27 5 tensor(0) tensor(5)
0 0 17 15 tensor(0) tensor(15)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 7 25 tensor(0) tensor(25)
0 0 9 23 tensor(0) tensor(23)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 12 20 tensor(0) tensor(20)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 26 6 tensor(0) tensor(6)
0 0 10 22 tensor(0) tensor(22)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 22 10 tensor(0) tensor(10)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)


0 0 26 6 tensor(0) tensor(6)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 14 18 tensor(0) tensor(18)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 1 31 tensor(0) tensor(31)
0 0 22 10 tensor(0) tensor(10)
0 0 32 0 tensor(0) tensor(0)
0 0 26 6 tensor(0) tensor(6)
0 0 0 32 tensor(0) tensor(32)
0 0 0 32 tensor(0) tensor(32)
0 0 0 32 tensor(0) tensor(32)
0 0 24 8 tensor(0) tensor(8)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32

0 0 0 32 tensor(0) tensor(32)
0 0 29 3 tensor(0) tensor(3)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 9 23 tensor(0) tensor(23)
0 0 32 0 tensor(0) tensor(0)
0 0 20 12 tensor(0) tensor(12)
0 0 8 24 tensor(0) tensor(24)
0 0 25 7 tensor(0) tensor(7)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 13 19 tensor(0) tensor(19)
0 0 24 8 tensor(0) tensor(8)
0 0 32 0 tensor(0) tensor(0)
0 0 23 9 tensor(0) tensor(9)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 26 6 tensor(0) tensor(6)
0 0 4 28 tensor(0) tensor(28)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 24 8 tensor(0) tensor(8)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 17 15 tensor(0) tensor(15)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 

0 0 20 12 tensor(0) tensor(12)
0 0 30 2 tensor(0) tensor(2)
0 0 26 6 tensor(0) tensor(6)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 19 13 tensor(0) tensor(13)
0 0 0 32 tensor(0) tensor(32)
0 0 14 18 tensor(0) tensor(18)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 23 9 tensor(0) tensor(9)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 22 10 tensor(0) tensor(10)
0 0 14 18 tensor(0) tensor(18)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 22 10 tensor(0) tensor(10)
0 0 17 15 tensor(0) tensor(15)
0 0 0 32 tensor(0) tensor(32)
0 0 24 8 tensor(0) tensor(8)
0 0 32 0 tensor(0) tensor(0)
0 0 21 11 tensor(0) tensor(11)
0 0 6 26 tensor(0) tensor(26)
0 0 32 0 tensor(0) tenso

0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 24 8 tensor(0) tensor(8)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 22 10 tensor(0) tensor(10)
0 0 31 1 tensor(0) tensor(1)
0 0 32 0 tensor(0) tensor(0)
0 0 1 31 tensor(0) tensor(31)
0 0 11 21 tensor(0) tensor(21)
0 0 32 0 tensor(0) tensor(0)
0 0 32 0 tensor(0) tensor(0)
0 0 16 16 tensor(0) tensor(16)
0 0 17 15 tensor(0) tensor(15)
0 0 26 6 tensor(0) tensor(6)
0 0 32 0 tensor(0) tensor(0)
0 0 21 11 tensor(0) tensor(11)
0 0 9 23 tensor(0) tensor(23)
0 0 0 32 tensor(0) tensor(32)
0 0 28 4 tensor(0) tensor(4)
0 0 32 0 tensor(0) tensor(0)
0 0 26 6 tensor(0) tensor(6)
0 0 7 25 tensor(0) tensor(25)
0 0 16 16 tensor(0) tensor(16)
0 0 12 20 tensor(0) tensor(20)
0 0 13 19 tensor(0) tensor(19)
0 0 25 7 tensor(0) tensor(7)
0 0 32 0 tensor(0) tens

In [16]:
len(output)

27

## Chat data .json & label.json data


In [39]:
import pandas as pd
metadata=pd.read_csv('../share_data/new_metadata.csv')


In [40]:
import os
import math
data1={}
for idx,gi in enumerate(os.listdir("./result")):
    print(gi)
    gameid=gi.split('_')[0]
    chat_data=pd.read_csv('./result/'+gi)
    delay=int(gi.split('_')[1])
    duration=float(gi.split('_')[2][:-4])
    gameid=int(gi.split('_')[0])
    meta=metadata.loc[metadata['gameid']==gameid]

    st=meta['new_st']
    et=meta['new_et']

    chat=[' ' for i in range(math.floor(duration)+math.floor(delay)+1)]


    init_time=int(st)-int(delay)
    for c_idx,c in enumerate(chat_data['duration']):
        chat[int(c)-init_time]=chat[int(c)-init_time]+str(chat_data['chat'][c_idx])
    data1[idx]=chat
    


102844412704890154_0_2079.0.csv
102844224147717245_0_1836.000000000001.csv
102844235748703677_0_1829.0.csv
102844412723567946_0_2159.0.csv
102844412708953395_0_2060.0.csv
102844212429092040_179_2254.0.csv
102844341908026005_0_2917.999999999998.csv
102844235753749959_0_2218.0.csv
102844294674286796_0_2680.0.csv
102844412721339716_0_1821.0.csv
102844341906256529_0_1751.0000000000018.csv
102844412722519367_0_2072.0.csv
102844235747982779_0_2731.0.csv
102844412716686654_0_1647.0.csv
102844401154168486_0_1924.0.csv
102844412704496937_0_2632.0.csv
102844212430075086_0_2282.999999999999.csv
102844212429288649_0_2922.0.csv
102844235750997440_1_1675.0.csv
102844294670026952_0_1708.0.csv
102844224148896895_0_1807.0.csv
102844294669109446_0_1562.0.csv
102844212431516886_0_1821.0.csv
102844294674876621_0_1718.0.csv
102844401154430631_0_1655.0000000000036.csv
102844401151874719_0_2103.0.csv
102844412712164667_0_2088.0.csv
102844294670878922_0_2199.0.csv
102844412705545516_0_1541.0.csv
1028444127052

In [41]:
import json
with open('chat2.json', 'a') as f:
    json.dump(data1, f, indent=2)

In [46]:
with open('../share_data/label.json','r') as f:
    labels=json.load(f)
data2={}
for idx,gi in enumerate(os.listdir("./result")):
    if '.csv' in gi:
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]

        st=meta['new_st']
        et=meta['new_et']

        gt=[0 for i in range(math.floor(duration)+math.floor(delay)+1)]    
        label=labels[str(gameid)]
        for (x,y) in label:
            start=int(x.split(':')[0])*60+int(x.split(':')[1])
            end=int(y.split(':')[0])*60+int(y.split(':')[1])
            for i in range(start,end+1):
                gt[i]=1  
        data2[idx]=gt


102844412704890154_0_2079.0.csv
102844224147717245_0_1836.000000000001.csv
102844235748703677_0_1829.0.csv
102844412723567946_0_2159.0.csv
102844412708953395_0_2060.0.csv
102844212429092040_179_2254.0.csv
102844341908026005_0_2917.999999999998.csv
102844235753749959_0_2218.0.csv
102844294674286796_0_2680.0.csv
102844412721339716_0_1821.0.csv
102844341906256529_0_1751.0000000000018.csv
102844412722519367_0_2072.0.csv
102844235747982779_0_2731.0.csv
102844412716686654_0_1647.0.csv
102844401154168486_0_1924.0.csv
102844412704496937_0_2632.0.csv
102844212430075086_0_2282.999999999999.csv
102844212429288649_0_2922.0.csv
102844235750997440_1_1675.0.csv
102844294670026952_0_1708.0.csv
102844224148896895_0_1807.0.csv
102844294669109446_0_1562.0.csv
102844212431516886_0_1821.0.csv
102844294674876621_0_1718.0.csv
102844401154430631_0_1655.0000000000036.csv
102844401151874719_0_2103.0.csv
102844412712164667_0_2088.0.csv
102844294670878922_0_2199.0.csv
102844412705545516_0_1541.0.csv
1028444127052

In [47]:
import json
with open('label.json', 'a') as f:
    json.dump(data2, f, indent=2)

## Old ver

In [None]:
data=[]
for idx,gi in enumerate(os.listdir("./result")):
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]
        
        st=meta['new_st']
        et=meta['new_et']
        
        data=['' for i in range(math.floor(duration)+math.floor(delay)+1)]


        init_time=int(st)-int(delay)
        for c_idx,c in enumerate(chat_data['duration']):
            data[int(c)-init_time]=data[int(c)-init_time]+str(chat_data['chat'][c_idx])+'\n'
        data=np.array(data)
        np.save('./exp_chat/{}'.format(gameid),data)


In [None]:
os.listdir("./result")
#metadata=pd.read_csv('new_metadata.csv')

In [None]:
metadata.loc[metadata['gameid']==102844212430271695]['new_st']

In [2]:
metadata

NameError: name 'metadata' is not defined

In [45]:
if __name__ == "__main__":
    chat_data='./exp_chat/102844412706659630.npy'
    chat_arr=np.load(chat_data)

In [49]:
chat_arr[4]

'Slow down kobe\nI love Kobe hahah\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime\nI CALLED IT ON R/RENGARMAINS\nRangar top Kreygasm Kreygasm Kreygasm\nMorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC !\nmonkaW HOLD ME CHAT\nQUICKSHOT PepeLaugh\nTREVOR 1\nHEY GUYS WHEN IS NA PLAYING? Pepega\nLULW\nHeHe\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC\nJebaited Jebaited\nCOPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime COPY MorphinTime THIS MorphinTime STATIC MorphinTime TO MorphinTime HELP MorphinTime FNATIC MorphinTime\n'

In [29]:
aa=linesToTensor(chat_arr)

In [35]:
aa.size()

torch.Size([2435, 642, 100])

In [42]:
aa[0][-2]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [53]:
data=[]
metadata=pd.read_csv('../share_data/new_metadata.csv')
for idx,gi in enumerate(['102844412706659630_0_1567.0.csv']):
        print(gi)
        gameid=gi.split('_')[0]
        chat_data=pd.read_csv('./result/'+gi)
        delay=int(gi.split('_')[1])
        duration=float(gi.split('_')[2][:-4])
        gameid=int(gi.split('_')[0])
        meta=metadata.loc[metadata['gameid']==gameid]
        
        st=meta['new_st']
        et=meta['new_et']
        
        data=['' for i in range(math.floor(duration)+math.floor(delay)+1)]


        init_time=int(st)-int(delay)
        for c_idx,c in enumerate(chat_data['duration']):
            data[int(c)-init_time]=data[int(c)-init_time]+str(chat_data['chat'][c_idx])+'\n'
        data=np.array(data)
        np.save('./exp_chat/{}'.format('test'),data)


102844412706659630_0_1567.0.csv


In [11]:
data1.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112])

In [15]:
data2[0]

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [48]:
        with open('./label.json') as f1:  
            gt=json.load(f1)

In [36]:
gt[0]

KeyError: 0

In [34]:
        with open('./chat.json') as f1:  
            gt=json.load(f1)

In [36]:
len(gt['1'])

1837

In [42]:
os.listdir('./result/')

['102844412704890154_0_2079.0.csv',
 '102844224147717245_0_1836.000000000001.csv',
 '102844235748703677_0_1829.0.csv',
 '102844412723567946_0_2159.0.csv',
 '102844412708953395_0_2060.0.csv',
 '102844212429092040_179_2254.0.csv',
 '102844341908026005_0_2917.999999999998.csv',
 '102844235753749959_0_2218.0.csv',
 '102844294674286796_0_2680.0.csv',
 '102844412721339716_0_1821.0.csv',
 '102844341906256529_0_1751.0000000000018.csv',
 '102844412722519367_0_2072.0.csv',
 '102844235747982779_0_2731.0.csv',
 '102844412716686654_0_1647.0.csv',
 '102844401154168486_0_1924.0.csv',
 '102844412704496937_0_2632.0.csv',
 '102844212430075086_0_2282.999999999999.csv',
 '102844212429288649_0_2922.0.csv',
 '102844235750997440_1_1675.0.csv',
 '102844294670026952_0_1708.0.csv',
 '102844224148896895_0_1807.0.csv',
 '102844294669109446_0_1562.0.csv',
 '102844212431516886_0_1821.0.csv',
 '102844294674876621_0_1718.0.csv',
 '102844401154430631_0_1655.0000000000036.csv',
 '102844401151874719_0_2103.0.csv',
 '102

In [27]:
optimizer = torch.optim.SGD(model.parameters(), 0.01,momentum=0.9,weight_decay=1e-4)
lr = adjust_learning_rate(optimizer, 60)


In [28]:
optimizer.param_groups[0]['lr']

0.001

In [42]:
with open('./chat2.json') as f:
    z=json.load(f)

In [43]:
z['1'][1]

' OMEGALUL - 3OpieOP EU is so ba... OpieOP I need to take a breath OpieOP EU is s... OpieOP I need to take another one OpieOP fuc just give me another burger OpieOP OpieOP EU is so ba... OpieOP I need to take a breath OpieOP EU is s... OpieOP I need to take another one OpieOP fuc just give me another burger OpieOPshoe maker'