# AbstractGEN: PyTorch Seq2seq

이 포스트는 6개로 이루어진 "AbstractGEN"의 4번째 포스트입니다. 

이전의 포스트는 정확하게 이야기하면 title을 입력으로 받아 $\rightarrow$ abstract을 생성한다고 이야기하기 어렵습니다. 

따라서 이번 포스트에서는 입력과 출력의 관계를 구분하여 모델을 구현해보겠습니다. 
최종적인 아이디어는 title-abstract을 seq2seq으로 구분하고, abstract을 만드는 decoder에 LM을 또 적용하는 것입니다. 

앞으로의 포스트는 다음과 같습니다:
- #0. AbstractGEN: DataAnalysis
- #1. AbstractGEN: Char-level RNN
- #2. AbstractGEN: Word-level RNN
- **#3. AbstractGEN: Seq2seq**
- #4. AbstractGEN: Seq2seq with attention+teaching force
- #5. AbstractGEN: Applying gpt-2 


In [0]:
import torch
from torch import nn, optim
from torch.utils.data import TensorDataset, DataLoader, Dataset

import pandas as pd
import numpy as np

import re
import collections
import itertools

import tqdm

In [0]:
# 파일을 불러와서 원하는 만큼 잘라, 새로 파일을 만들기 

with open('/content/titles_and_abstracts.txt','r') as f:
  data = f.read()

data = [i for i in data.split('\n') if i != '']
data_dict = []
for i in range(0, len(data), 2):
  data_i = []
  data_i.append(data[i])
  data_i.append(data[i+1])
  data_dict.append(data_i)

df = pd.DataFrame(data_dict[:10000])
df.to_csv('abstract.txt', sep='\t', header=False, index=False)

In [0]:
df.head()

Unnamed: 0,0,1
0,evaluation technology from speaker identificat...,we propose a multi-step system for the analysi...
1,acquisition of semantic classes for adjectives...,"in this paper , we present a clustering experi..."
2,features for detecting hedge cues,we present a sequential labeling approach to h...
3,semantic extraction with wide-coverage lexical...,we report on results of combining graphical mo...
4,evaluating a crosslinguistic grammar resource ...,this paper evaluates the lingo grammar matrix ...


In [0]:
# cuda gpu 환경이 사용 가능한지 체크

print(torch.cuda.is_available())

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

True


## 전처리

In [0]:
# re 모듈을 통해 삭제할 목록들을 정리

FILTERS = "([~,!?\"':;)(])"
SHIFTS = re.compile("([?!.])")
CHANGE_FILTER = re.compile(FILTERS)

In [0]:
def normalize(text):
  text = text.lower()
  text = CHANGE_FILTER.sub("", text)
  text = re.sub('[0-9]+', "", text)
  text = SHIFTS.sub(r" \1", text)
  return text

def parse_line(line):
  line = normalize(line.strip())
  src, trg = line.split('\t')
  # print("title:", src)
  # print("abstract:", trg)
  src_tokens = src.strip().split()
  trg_tokens = trg.strip().split()
  #print(src_tokens)
  return src_tokens, trg_tokens

def build_vocab(words): 
  counts = collections.Counter(words)
  sort_words = sorted(counts.items(),
                      key=lambda x:x[1], reverse = True)
  word_list = ["<UNK>", "<SOS>", "<EOS>"]+ [x[0] for x in sort_words]
  word_dict = dict((w,i) for i,w in enumerate(word_list))
  return word_list, word_dict

def word2tensor(words, word_dict, max_len, padding = 0):
  #  print(words)
  words = words+['<EOS>']
  words = [word_dict.get(w,0) for w in words]
 
 # print(words)
  seq_len = len(words)
  if seq_len < max_len+1:
    words += [padding]*(max_len+1 -seq_len)
  return torch.tensor(words, dtype=torch.int64), seq_len

## DataLoader

In [0]:
class PairDataset(Dataset):
  def __init__(self, path, max_len=300):
    def filter_pair(p):
      return not (len(p[0]) > max_len or len(p[1]) > max_len)

    with open(path) as fp:
      pairs = map(parse_line, fp)
      pairs = filter(filter_pair, pairs)
      pairs = list(pairs)
    
    src = [p[0] for p in pairs]
    trg = [p[1] for p in pairs]

    self.src_word_list, self.src_word_dict = build_vocab(
        itertools.chain.from_iterable(src)
    )
    self.trg_word_list, self.trg_word_dict = build_vocab(
        itertools.chain.from_iterable(trg)
    )

    self.src_data = [word2tensor(
        words, self.src_word_dict, max_len) for words in src]
    self.trg_data = [word2tensor(
        words, self.trg_word_dict, max_len) for words in trg]
  
  def __len__(self):
      return len(self.src_data)
      
  def __getitem__(self, idx):
      src, lsrc = self.src_data[idx]
      trg, ltrg = self.trg_data[idx]
      return src, lsrc, trg, ltrg

In [0]:
batch_size = 64
max_len = 300
path = "/content/abstract.txt"
ds = PairDataset(path, max_len=max_len)
loader = DataLoader(ds, batch_size=batch_size, shuffle=True, num_workers=4)

## Encoder

In [0]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim=50, hid_dim=50, n_layers=1, dropout=0.2):
        super().__init__()

        self.embedding = nn.Embedding(input_dim, 
                                      emb_dim, 
                                      padding_idx=0)
        self.lstm = nn.LSTM(emb_dim, 
                            hid_dim, 
                            n_layers,
                            batch_first = True,
                            dropout = dropout)
        
    def forward(self,x, h0=None, l = None):
        x = self.embedding(x)
        if l is not None:
          x = nn.utils.rnn.pack_padded_sequence(x,l,batch_first=True)

        outputs, h = self.lstm(x, h0)
      
        #outputs are always from the top hidden layer
        return h

## Decoder

In [0]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim=50, hid_dim=50, n_layers=1, dropout=0.2):
        super().__init__()
        
        self.embedding = nn.Embedding(output_dim, 
                                      emb_dim, 
                                      padding_idx = 0)
        self.lstm = nn.LSTM(emb_dim, 
                            hid_dim, 
                            n_layers, 
                            batch_first = True,
                            dropout = dropout)
        
        self.linear = nn.Linear(hid_dim, output_dim)
        
    def forward(self, x, h,l=None):
        x = self.embedding(x)
        if l is not None:
          x = nn.utils.rnn.pack_padded_sequence(x,l,batch_first=True)
        x,h = self.lstm(x,h)
        if l is not None:
          x = nn.utils.rnn.pad_packed_sequence(x,batch_first=True)[0]
        
        prediction = self.linear(x)

        
        return prediction, h

### 문장을 생성해줄 함수 정의

In [0]:
def generateSEN(input_string, enc, dec, max_len=140, device='cpu'):
  words = normalize(input_string).split()
  input_tensor, seq_len = word2tensor(words, ds.src_word_dict, max_len)

  input_tensor=input_tensor.unsqueeze(0)
  seq_len = [seq_len]

  sos_inputs = torch.tensor(sos, dtype=torch.int64)
  input_tensor = input_tensor.to(device)
  sos_inputs = sos_inputs.to(device)

  ctx = enc(input_tensor, l=seq_len)
  
  # 시작 토큰과 콘텍스트 벡터를 디코더의 초기값으로!
  z = sos_inputs
  h = ctx
  results = []
  for i in range(max_len):
    o,h = dec(z.view(1,1),h)
    wi = o.detach().view(-1).max(0)[1]
    if wi.item() == eos:
      break
    results.append(wi.item())
    z= wi
  return " ".join(ds.trg_word_list[i] for i in results)

In [0]:
a = torch.randn(1,1)
print(a.size())
print(a)

torch.Size([1, 1])
tensor([[0.1331]])


In [0]:
# 함수 동작 확인 - 학습을 시키지 않았을 때의 결과

enc = Encoder(len(ds.src_word_list), 100, 100, 2)
dec = Decoder(len(ds.trg_word_list), 100,100,2)
generateSEN("the benefit of stochastic pp attachment to a rule-based parser", enc, dec, 140,"cpu")

'barge-in kyotoebmt kyotoebmt inuktitut clear-cut kyotoebmt supercomputer lgpl conjecture gist-it courses kyotoebmt aggregated analysers advantages higherorder conjecture fluctuations mouth mouth reliably mouth mouth childes mouth mouth learning/nlp mouth cutaneous conjecture mouth mouth grapples grapples grapples grapples grapples grapples grapples incident exclude concrete concrete declared kyotoebmt analysers icd--cm gist-it cutaneous conjecture conjecture conjecture inuktitut inuktitut inuktitut turkic happens characterization formation formation sensory-motor class-conditional sensory-motor setups defense lgpl lgpl class-conditional aspossible aspossible kyotoebmt aspossible adaboost adaboost agglomeratively kyotoebmt agglomeratively disinformation wit courses subphrase superfluous grapples grapples grapples grapples grapples grapples spring spring spring gist-it hapaxlegomena grapples grapples grapples grapples grapples grapples spring spring spring gist-it hapaxlegomena grapples

# 모델 학습

In [0]:
enc = Encoder(len(ds.src_word_list), 100, 100, 2)
dec = Decoder(len(ds.trg_word_list), 100,100,2)
enc.to("cuda:0")
dec.to("cuda:0")

opt_enc = optim.Adam(enc.parameters(), 0.002)
opt_dec = optim.Adam(dec.parameters(), 0.002)

loss_f = nn.CrossEntropyLoss()

In [0]:
print(ds.trg_word_dict)
print(ds.src_word_dict)

{'<UNK>': 0, '<SOS>': 1, '<EOS>': 2, 'for': 3, 'of': 4, 'and': 5, 'a': 6, 'in': 7, 'the': 8, 'using': 9, 'to': 10, 'with': 11, 'translation': 12, 'language': 13, 'learning': 14, 'semantic': 15, 'from': 16, 'machine': 17, 'word': 18, 'on': 19, 'an': 20, 'parsing': 21, 'model': 22, 'models': 23, 'text': 24, 'statistical': 25, 'extraction': 26, 'automatic': 27, 'system': 28, 'information': 29, 'based': 30, 'analysis': 31, 'approach': 32, 'dependency': 33, 'chinese': 34, 'unsupervised': 35, 'dialogue': 36, 'evaluation': 37, 'recognition': 38, 'classification': 39, 'by': 40, 'features': 41, 'corpus': 42, 'lexical': 43, 'data': 44, 'sense': 45, 'systems': 46, 'detection': 47, 'disambiguation': 48, 'annotation': 49, 'generation': 50, 'web': 51, 'entity': 52, 'speech': 53, 'syntactic': 54, 'alignment': 55, 'segmentation': 56, 'sentiment': 57, 'spoken': 58, 'named': 59, 'similarity': 60, 'summarization': 61, 'corpora': 62, 'discourse': 63, 'natural': 64, 'multilingual': 65, 'study': 66, 'toward

In [0]:
print(len(ds.src_data))

9999


In [0]:
print(ds.src_data[0])
print(len(ds.src_data))
p = []
for (a,b) in ds.src_data:
  p.append(b)
print(max(p))
print(p)

(tensor([  37,   95,   16, 1571,   75,   10, 1254,   31,    6, 2596,   28,    3,
         594, 1406, 1143,    2,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,  

In [0]:
o = []
for (a,b) in ds.trg_data:
  o.append(b)
  w = len(a)
print(max(o))
print(w)

292
301


In [0]:
def to2D(x):
  shapes = x.shape
  return x.reshape(shapes[0] * shapes[1], -1)

for epoch in range(1):
  enc.train(), dec.train()
  losses = []

  for x, lx, y, ly in tqdm.tqdm(loader):
    # x의 PackedSequence를 만들기 위해 번역 소스의 길이로 내림차순 정렬한다.
    lx, sort_idx = lx.sort(descending=True)
    x,y,ly = x[sort_idx], y[sort_idx], ly[sort_idx]
    x,y = x.to("cuda:0"), y.to("cuda:0")

    ctx = enc(x,l=lx)

    # y의 PackedSequence를 만들기 위해 번역 소스의 길이로 내림차순 정렬한다.
    ly, sort_idx = ly.sort(descending=True)
    y  = y[sort_idx]

    
    print("ctx shape:", ctx[0].shape)
    print()
    h0 = (ctx[0][:, sort_idx,:], ctx[1][:, sort_idx, :])

    z = y[:,:-1].detach()
    z[z==-100] = 0

    o,_ = dec(z, h0, l=ly-1)

    loss = loss_f(to2D(o[:]), to2D(y[:,1:max(ly)]).squeeze())

    enc.zero_grad(), dec.zero_grad()
    loss.backward()

    opt_enc.step(), opt_dec.step()
    losses.append(loss.item())

  enc.eval(), dec.eval()
  print(epoch)
  with torch.no_grad():
    sen = "cross-document coreference on a large scale corpus"
    print(sen)
    print(generateSEN(sen, enc, dec, max_len=140, device='cuda:0'))

  0%|          | 0/157 [00:00<?, ?it/s]

ctx shape: torch.Size([2, 64, 100])



  1%|          | 1/157 [00:00<01:42,  1.53it/s]

ctx shape: torch.Size([2, 64, 100])



  1%|▏         | 2/157 [00:01<01:36,  1.60it/s]

ctx shape: torch.Size([2, 64, 100])



  2%|▏         | 3/157 [00:01<01:26,  1.78it/s]

ctx shape: torch.Size([2, 64, 100])



  3%|▎         | 4/157 [00:02<01:22,  1.86it/s]

ctx shape: torch.Size([2, 64, 100])



  3%|▎         | 5/157 [00:02<01:18,  1.94it/s]

ctx shape: torch.Size([2, 64, 100])



  4%|▍         | 6/157 [00:02<01:13,  2.06it/s]

ctx shape: torch.Size([2, 64, 100])



  4%|▍         | 7/157 [00:03<01:10,  2.14it/s]

ctx shape: torch.Size([2, 64, 100])



  5%|▌         | 8/157 [00:03<01:07,  2.22it/s]

ctx shape: torch.Size([2, 64, 100])



  6%|▌         | 9/157 [00:04<01:06,  2.22it/s]

ctx shape: torch.Size([2, 64, 100])



  6%|▋         | 10/157 [00:04<01:02,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



  7%|▋         | 11/157 [00:05<01:02,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



  8%|▊         | 12/157 [00:05<01:01,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



  8%|▊         | 13/157 [00:05<01:01,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



  9%|▉         | 14/157 [00:06<01:01,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 10%|▉         | 15/157 [00:06<01:00,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 10%|█         | 16/157 [00:07<01:01,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 11%|█         | 17/157 [00:07<00:58,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 11%|█▏        | 18/157 [00:08<00:58,  2.37it/s]

ctx shape: torch.Size([2, 64, 100])



 12%|█▏        | 19/157 [00:08<00:56,  2.43it/s]

ctx shape: torch.Size([2, 64, 100])



 13%|█▎        | 20/157 [00:08<00:56,  2.44it/s]

ctx shape: torch.Size([2, 64, 100])



 13%|█▎        | 21/157 [00:09<00:56,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 14%|█▍        | 22/157 [00:09<00:54,  2.49it/s]

ctx shape: torch.Size([2, 64, 100])



 15%|█▍        | 23/157 [00:10<00:54,  2.45it/s]

ctx shape: torch.Size([2, 64, 100])



 15%|█▌        | 24/157 [00:10<00:54,  2.42it/s]

ctx shape: torch.Size([2, 64, 100])



 16%|█▌        | 25/157 [00:10<00:52,  2.49it/s]

ctx shape: torch.Size([2, 64, 100])



 17%|█▋        | 26/157 [00:11<00:53,  2.45it/s]

ctx shape: torch.Size([2, 64, 100])



 17%|█▋        | 27/157 [00:11<00:52,  2.48it/s]

ctx shape: torch.Size([2, 64, 100])



 18%|█▊        | 28/157 [00:12<00:54,  2.38it/s]

ctx shape: torch.Size([2, 64, 100])



 18%|█▊        | 29/157 [00:12<00:50,  2.52it/s]

ctx shape: torch.Size([2, 64, 100])



 19%|█▉        | 30/157 [00:12<00:51,  2.46it/s]

ctx shape: torch.Size([2, 64, 100])



 20%|█▉        | 31/157 [00:13<00:50,  2.47it/s]

ctx shape: torch.Size([2, 64, 100])



 20%|██        | 32/157 [00:13<00:52,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 21%|██        | 33/157 [00:14<00:55,  2.24it/s]

ctx shape: torch.Size([2, 64, 100])



 22%|██▏       | 34/157 [00:14<00:52,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 22%|██▏       | 35/157 [00:15<00:50,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 23%|██▎       | 36/157 [00:15<00:52,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 24%|██▎       | 37/157 [00:15<00:51,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 24%|██▍       | 38/157 [00:16<00:49,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 25%|██▍       | 39/157 [00:16<00:48,  2.44it/s]

ctx shape: torch.Size([2, 64, 100])



 25%|██▌       | 40/157 [00:17<00:47,  2.47it/s]

ctx shape: torch.Size([2, 64, 100])



 26%|██▌       | 41/157 [00:17<00:45,  2.56it/s]

ctx shape: torch.Size([2, 64, 100])



 27%|██▋       | 42/157 [00:17<00:44,  2.57it/s]

ctx shape: torch.Size([2, 64, 100])



 27%|██▋       | 43/157 [00:18<00:45,  2.50it/s]

ctx shape: torch.Size([2, 64, 100])



 28%|██▊       | 44/157 [00:18<00:47,  2.38it/s]

ctx shape: torch.Size([2, 64, 100])



 29%|██▊       | 45/157 [00:19<00:46,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 29%|██▉       | 46/157 [00:19<00:46,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 30%|██▉       | 47/157 [00:20<00:48,  2.26it/s]

ctx shape: torch.Size([2, 64, 100])



 31%|███       | 48/157 [00:20<00:46,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 31%|███       | 49/157 [00:20<00:46,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 32%|███▏      | 50/157 [00:21<00:45,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 32%|███▏      | 51/157 [00:21<00:47,  2.21it/s]

ctx shape: torch.Size([2, 64, 100])



 33%|███▎      | 52/157 [00:22<00:46,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 34%|███▍      | 53/157 [00:22<00:47,  2.21it/s]

ctx shape: torch.Size([2, 64, 100])



 34%|███▍      | 54/157 [00:23<00:45,  2.28it/s]

ctx shape: torch.Size([2, 64, 100])



 35%|███▌      | 55/157 [00:23<00:43,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 36%|███▌      | 56/157 [00:23<00:44,  2.29it/s]

ctx shape: torch.Size([2, 64, 100])



 36%|███▋      | 57/157 [00:24<00:42,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 37%|███▋      | 58/157 [00:24<00:42,  2.32it/s]

ctx shape: torch.Size([2, 64, 100])



 38%|███▊      | 59/157 [00:25<00:42,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 38%|███▊      | 60/157 [00:25<00:43,  2.24it/s]

ctx shape: torch.Size([2, 64, 100])



 39%|███▉      | 61/157 [00:26<00:41,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 39%|███▉      | 62/157 [00:26<00:40,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 40%|████      | 63/157 [00:27<00:40,  2.29it/s]

ctx shape: torch.Size([2, 64, 100])



 41%|████      | 64/157 [00:27<00:41,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 41%|████▏     | 65/157 [00:27<00:40,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 42%|████▏     | 66/157 [00:28<00:38,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 43%|████▎     | 67/157 [00:28<00:38,  2.32it/s]

ctx shape: torch.Size([2, 64, 100])



 43%|████▎     | 68/157 [00:29<00:37,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 44%|████▍     | 69/157 [00:29<00:38,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 45%|████▍     | 70/157 [00:30<00:37,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 45%|████▌     | 71/157 [00:30<00:36,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 46%|████▌     | 72/157 [00:30<00:36,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 46%|████▋     | 73/157 [00:31<00:34,  2.43it/s]

ctx shape: torch.Size([2, 64, 100])



 47%|████▋     | 74/157 [00:31<00:33,  2.47it/s]

ctx shape: torch.Size([2, 64, 100])



 48%|████▊     | 75/157 [00:32<00:32,  2.53it/s]

ctx shape: torch.Size([2, 64, 100])



 48%|████▊     | 76/157 [00:32<00:31,  2.55it/s]

ctx shape: torch.Size([2, 64, 100])



 49%|████▉     | 77/157 [00:32<00:32,  2.48it/s]

ctx shape: torch.Size([2, 64, 100])



 50%|████▉     | 78/157 [00:33<00:33,  2.37it/s]

ctx shape: torch.Size([2, 64, 100])



 50%|█████     | 79/157 [00:33<00:32,  2.37it/s]

ctx shape: torch.Size([2, 64, 100])



 51%|█████     | 80/157 [00:34<00:32,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 52%|█████▏    | 81/157 [00:34<00:32,  2.30it/s]

ctx shape: torch.Size([2, 64, 100])



 52%|█████▏    | 82/157 [00:35<00:32,  2.29it/s]

ctx shape: torch.Size([2, 64, 100])



 53%|█████▎    | 83/157 [00:35<00:34,  2.17it/s]

ctx shape: torch.Size([2, 64, 100])



 54%|█████▎    | 84/157 [00:35<00:32,  2.22it/s]

ctx shape: torch.Size([2, 64, 100])



 54%|█████▍    | 85/157 [00:36<00:34,  2.12it/s]

ctx shape: torch.Size([2, 64, 100])



 55%|█████▍    | 86/157 [00:36<00:33,  2.15it/s]

ctx shape: torch.Size([2, 64, 100])



 55%|█████▌    | 87/157 [00:37<00:32,  2.17it/s]

ctx shape: torch.Size([2, 64, 100])



 56%|█████▌    | 88/157 [00:37<00:31,  2.19it/s]

ctx shape: torch.Size([2, 64, 100])



 57%|█████▋    | 89/157 [00:38<00:31,  2.18it/s]

ctx shape: torch.Size([2, 64, 100])



 57%|█████▋    | 90/157 [00:38<00:29,  2.30it/s]

ctx shape: torch.Size([2, 64, 100])



 58%|█████▊    | 91/157 [00:39<00:30,  2.15it/s]

ctx shape: torch.Size([2, 64, 100])



 59%|█████▊    | 92/157 [00:39<00:29,  2.23it/s]

ctx shape: torch.Size([2, 64, 100])



 59%|█████▉    | 93/157 [00:40<00:27,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 60%|█████▉    | 94/157 [00:40<00:26,  2.36it/s]

ctx shape: torch.Size([2, 64, 100])



 61%|██████    | 95/157 [00:40<00:25,  2.42it/s]

ctx shape: torch.Size([2, 64, 100])



 61%|██████    | 96/157 [00:41<00:25,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 62%|██████▏   | 97/157 [00:41<00:25,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 62%|██████▏   | 98/157 [00:42<00:25,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 63%|██████▎   | 99/157 [00:42<00:26,  2.20it/s]

ctx shape: torch.Size([2, 64, 100])



 64%|██████▎   | 100/157 [00:43<00:24,  2.32it/s]

ctx shape: torch.Size([2, 64, 100])



 64%|██████▍   | 101/157 [00:43<00:23,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 65%|██████▍   | 102/157 [00:43<00:22,  2.47it/s]

ctx shape: torch.Size([2, 64, 100])



 66%|██████▌   | 103/157 [00:44<00:23,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 66%|██████▌   | 104/157 [00:44<00:22,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 67%|██████▋   | 105/157 [00:45<00:22,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 68%|██████▊   | 106/157 [00:45<00:21,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 68%|██████▊   | 107/157 [00:46<00:21,  2.29it/s]

ctx shape: torch.Size([2, 64, 100])



 69%|██████▉   | 108/157 [00:46<00:20,  2.41it/s]

ctx shape: torch.Size([2, 64, 100])



 69%|██████▉   | 109/157 [00:46<00:21,  2.24it/s]

ctx shape: torch.Size([2, 64, 100])



 70%|███████   | 110/157 [00:47<00:22,  2.14it/s]

ctx shape: torch.Size([2, 64, 100])



 71%|███████   | 111/157 [00:47<00:21,  2.18it/s]

ctx shape: torch.Size([2, 64, 100])



 71%|███████▏  | 112/157 [00:48<00:21,  2.11it/s]

ctx shape: torch.Size([2, 64, 100])



 72%|███████▏  | 113/157 [00:48<00:19,  2.21it/s]

ctx shape: torch.Size([2, 64, 100])



 73%|███████▎  | 114/157 [00:49<00:18,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 73%|███████▎  | 115/157 [00:49<00:19,  2.20it/s]

ctx shape: torch.Size([2, 64, 100])



 74%|███████▍  | 116/157 [00:50<00:18,  2.26it/s]

ctx shape: torch.Size([2, 64, 100])



 75%|███████▍  | 117/157 [00:50<00:18,  2.19it/s]

ctx shape: torch.Size([2, 64, 100])



 75%|███████▌  | 118/157 [00:50<00:17,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 76%|███████▌  | 119/157 [00:51<00:17,  2.19it/s]

ctx shape: torch.Size([2, 64, 100])



 76%|███████▋  | 120/157 [00:51<00:16,  2.19it/s]

ctx shape: torch.Size([2, 64, 100])



 77%|███████▋  | 121/157 [00:52<00:17,  2.03it/s]

ctx shape: torch.Size([2, 64, 100])



 78%|███████▊  | 122/157 [00:52<00:16,  2.13it/s]

ctx shape: torch.Size([2, 64, 100])



 78%|███████▊  | 123/157 [00:53<00:15,  2.13it/s]

ctx shape: torch.Size([2, 64, 100])



 79%|███████▉  | 124/157 [00:53<00:14,  2.23it/s]

ctx shape: torch.Size([2, 64, 100])



 80%|███████▉  | 125/157 [00:54<00:14,  2.26it/s]

ctx shape: torch.Size([2, 64, 100])



 80%|████████  | 126/157 [00:54<00:12,  2.44it/s]

ctx shape: torch.Size([2, 64, 100])



 81%|████████  | 127/157 [00:55<00:12,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 82%|████████▏ | 128/157 [00:55<00:13,  2.23it/s]

ctx shape: torch.Size([2, 64, 100])



 82%|████████▏ | 129/157 [00:55<00:11,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 83%|████████▎ | 130/157 [00:56<00:12,  2.25it/s]

ctx shape: torch.Size([2, 64, 100])



 83%|████████▎ | 131/157 [00:56<00:11,  2.24it/s]

ctx shape: torch.Size([2, 64, 100])



 84%|████████▍ | 132/157 [00:57<00:10,  2.28it/s]

ctx shape: torch.Size([2, 64, 100])



 85%|████████▍ | 133/157 [00:57<00:10,  2.29it/s]

ctx shape: torch.Size([2, 64, 100])



 85%|████████▌ | 134/157 [00:58<00:09,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 86%|████████▌ | 135/157 [00:58<00:09,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 87%|████████▋ | 136/157 [00:58<00:08,  2.45it/s]

ctx shape: torch.Size([2, 64, 100])



 87%|████████▋ | 137/157 [00:59<00:08,  2.42it/s]

ctx shape: torch.Size([2, 64, 100])



 88%|████████▊ | 138/157 [00:59<00:08,  2.24it/s]

ctx shape: torch.Size([2, 64, 100])



 89%|████████▊ | 139/157 [01:00<00:07,  2.30it/s]

ctx shape: torch.Size([2, 64, 100])



 89%|████████▉ | 140/157 [01:00<00:07,  2.28it/s]

ctx shape: torch.Size([2, 64, 100])



 90%|████████▉ | 141/157 [01:01<00:06,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 90%|█████████ | 142/157 [01:01<00:06,  2.26it/s]

ctx shape: torch.Size([2, 64, 100])



 91%|█████████ | 143/157 [01:01<00:06,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 92%|█████████▏| 144/157 [01:02<00:05,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



 92%|█████████▏| 145/157 [01:02<00:04,  2.46it/s]

ctx shape: torch.Size([2, 64, 100])



 93%|█████████▎| 146/157 [01:03<00:04,  2.48it/s]

ctx shape: torch.Size([2, 64, 100])



 94%|█████████▎| 147/157 [01:03<00:03,  2.51it/s]

ctx shape: torch.Size([2, 64, 100])



 94%|█████████▍| 148/157 [01:03<00:03,  2.54it/s]

ctx shape: torch.Size([2, 64, 100])



 95%|█████████▍| 149/157 [01:04<00:03,  2.40it/s]

ctx shape: torch.Size([2, 64, 100])



 96%|█████████▌| 150/157 [01:04<00:02,  2.39it/s]

ctx shape: torch.Size([2, 64, 100])



 96%|█████████▌| 151/157 [01:05<00:02,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 97%|█████████▋| 152/157 [01:05<00:02,  2.34it/s]

ctx shape: torch.Size([2, 64, 100])



 97%|█████████▋| 153/157 [01:06<00:01,  2.35it/s]

ctx shape: torch.Size([2, 64, 100])



 98%|█████████▊| 154/157 [01:06<00:01,  2.31it/s]

ctx shape: torch.Size([2, 64, 100])



 99%|█████████▊| 155/157 [01:06<00:00,  2.33it/s]

ctx shape: torch.Size([2, 64, 100])



100%|██████████| 157/157 [01:07<00:00,  2.32it/s]


ctx shape: torch.Size([2, 15, 100])

0
cross-document coreference on a large scale corpus
paper paper a a a the the the the the the the of of of of of the the the . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .


당연한 이야기겠지만, 기계번역과 같은 방식은 논문의 초록을 생성하는데 적합하지 않았다. 
