In [1]:
#----------------------
# english前処理
#----------------------
import torch.nn.functional as F
from torchtext import data
import re
import mojimoji
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 文字列処理定義
def preprocessing(text):
    # 改行、半角スペース、全角スペースを削除
    text = re.sub('\r', '', text)
    text = re.sub('\n', '', text)
    # 数字文字の一律「0」化
    # 全角化
    return text

# Tokenizer定義
def one_hot_preprocessing(text):
    ones = torch.sparse.torch.eye(5).to(device)
    trg=ones.index_select(0,trg).long()
# 文字列処理 + Tokenizer
def tokenizer_with_preprocessing(text):
    text = preprocessing(text)
    text = text.split(' ')
    return text

In [2]:
TEXT = data.Field(
    sequential=True, 
    init_token='<sos>', 
    eos_token='<eos>', 
    tokenize=tokenizer_with_preprocessing, 
    lower=True, 
    use_vocab=True, 
    batch_first=True,
    fix_length=50
)

TEXT_act = data.Field(
    sequential=False, 
    tokenize=tokenizer_with_preprocessing, 
    use_vocab=True, 
    batch_first=True
)


In [3]:
#----------------------
# データの読込
#----------------------
train_ds, test_ds = data.TabularDataset.splits(
    path='act_emo_text',
    train='train.csv',
    test='test.csv',
    format='csv',
    skip_header=False,
    fields=[('dial', TEXT), ('act', TEXT_act)]
)

# 確認
train_ds[0].__dict__.keys()
test_ds[0].__dict__.keys()
# for i in range(0, 10):
#     print(vars(train_ds[i]))
#     print(vars(test_ds[i]))

# 辞書作成
TEXT.build_vocab(train_ds, test_ds, min_freq=2)
TEXT_act.build_vocab(train_ds, test_ds, min_freq=2)
# 単語カウント
with open('vocab.txt','w') as file:

    file.write('\n'.join(TEXT.vocab.freqs))
# print(TEXT.vocab.freqs)
print('語彙数:{}'.format(len(TEXT.vocab)))

# イテレータの作成
# --> 事前に「ランタイムのタイプを変更」からGPUを選択しておく。
train_iter = data.Iterator(train_ds, batch_size=16, shuffle=True, device=device)
test_iter = data.Iterator(test_ds, batch_size=16, shuffle=False, device=device)

# 確認
batch = next(iter(train_iter))
# print(batch.dial)
# print(batch.act)

batch = next(iter(test_iter))
print(batch.dial[2])
print(batch.act.size())

語彙数:20915
tensor([ 2, 68, 23,  8,  3,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
         1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1],
       device='cuda:0')
torch.Size([16])


In [4]:
import torch.nn.functional as F
from torch import nn 
class Seq2Seq(nn.Module):
    def __init__(self, 
                 encoder, 
                 decoder, 
                 src_pad_idx, 
                 trg_pad_idx, 
                 device):
        super().__init__()

        self.encoder = encoder
        self.decoder = decoder
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        self.device = device

    def make_src_mask(self, src):

        #src = [batch size, src len]

        src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)

        #src_mask = [batch size, 1, 1, src len]

        return src_mask

    def make_trg_mask(self, trg):

        #trg = [batch size, trg len]

        trg_pad_mask = (trg != self.trg_pad_idx).unsqueeze(1).unsqueeze(3)

        #trg_pad_mask = [batch size, 1, trg len, 1]

        trg_len = trg.shape[1]

        trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len), device = self.device)).bool()

        #trg_sub_mask = [trg len, trg len]

        trg_mask = trg_pad_mask & trg_sub_mask

        #trg_mask = [batch size, 1, trg len, trg len]

        return trg_mask

    def forward(self, src, trg):

        #src = [batch size, src len]
        #trg = [batch size, trg len]

        src_mask = self.make_src_mask(src)
#          trg_mask = self.make_trg_mask(trg)

        #src_mask = [batch size, 1, 1, src len]
        #trg_mask = [batch size, 1, trg len, trg len]

        enc_src = self.encoder(src, src_mask)
        enc_src = enc_src.view(16,-1)
        output = self.decoder(enc_src)
        return F.softmax(output, dim=1)
        #enc_src = [batch size, src len, hid dim]

#         output, attention = self.decoder(trg, enc_src, trg_mask, src_mask)
#          output, attention = self.decoder(enc_src,)
        #output = [batch size, trg len, output dim]
        #attention = [batch size, n heads, trg len, src len]

#         return output, attention

In [5]:
import torch
from torch import nn
from transformer_model import Encoder,Decoder
#  パラメータの設定
INPUT_DIM = len(TEXT.vocab)
OUTPUT_DIM = 5
HID_DIM = 256
ENC_LAYERS = 3
DEC_LAYERS = 3
ENC_HEADS = 8
DEC_HEADS = 8
ENC_PF_DIM = 512
DEC_PF_DIM = 512
ENC_DROPOUT = 0.1
DEC_DROPOUT = 0.1

# Encoderの初期化
enc = Encoder(INPUT_DIM, 
              HID_DIM, 
              ENC_LAYERS, 
              ENC_HEADS, 
              ENC_PF_DIM, 
              ENC_DROPOUT, 
              device)

# Decoderの初期化
dec = Decoder(OUTPUT_DIM, 
              HID_DIM, 
              DEC_LAYERS, 
              DEC_HEADS, 
              DEC_PF_DIM, 
              DEC_DROPOUT, 
              device)

# padding用のIDの指定
PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token]


linear = nn.Linear(HID_DIM*50, OUTPUT_DIM)

# モデルの初期化
# model = Seq2Seq(enc, dec, PAD_IDX, PAD_IDX, device).to(device)
model = Seq2Seq(enc, linear, PAD_IDX, PAD_IDX, device).to(device)



In [6]:
src_mask = (src != self.src_pad_idx).unsqueeze(1).unsqueeze(2)
src_mask = self.make_src_mask(src)
enc(batch.dial)

NameError: name 'src' is not defined

In [14]:
a=torch.randn(16,50).long().to(device)
b=torch.randn(16,5).long().to(device)
model(batch.dial,batch.act).size()
trg = F.one_hot(batch.act,num_classes=5)
print(trg.size())

torch.Size([16, 5])


In [9]:
from train import evaluate,epoch_time,translate_sentence
# オプティマイザーの設定

LEARNING_RATE = 0.0005
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 損失関数の設定
criterion = nn.CrossEntropyLoss(ignore_index=PAD_IDX)

# 重みの初期化
def initialize_weights(m):
    if hasattr(m, 'weight') and m.weight.dim() > 1:
        nn.init.xavier_uniform_(m.weight.data)
model.apply(initialize_weights)
print(1)

1


In [19]:
def train(model, iterator, optimizer, criterion, clip):

    model.train()

    epoch_loss = 0

    for i, batch in enumerate(iterator):

        src = batch.dial
        trg = F.one_hot(batch.act,num_classes=5)

        optimizer.zero_grad()

#         output, _ = model(src, trg[:,:-1])
        output = model(src, trg)
        #output = [batch size, trg len - 1, output dim]
        #trg = [batch size, trg len]
#         output_size = output.size()
#         output_dim = output.shape[-1]

#         output = output.contiguous().view(-1, output_dim)
#         trg = trg[:,1:].contiguous().view(-1)

        #output = [batch size * trg len - 1, output dim]
        #trg = [batch size * trg len - 1]
        
#         trg1 = trg.view(16,1,5)
#         trg2 = torch.cat(([trg1]*output_size[1]),dim=1)
#         assert (output.tolist() >= 0. & output.tolist() <= 1.).all()
#         assert (trg.tolist() >= 0. & trg.tolist() <= 1.).all()

        loss = criterion(output,torch.max(trg, 1)[1])
        
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)

        optimizer.step()

        epoch_loss += loss.item()

    return epoch_loss / len(iterator)


In [None]:
#----------------------
# モデルの学習
#----------------------
import time
import math

N_EPOCHS = 100
CLIP = 1

# サンプル1作品を取得
example_idx = 8
src_sample = vars(train_ds.examples[example_idx])['dial']
trg_sample = vars(train_ds.examples[example_idx])['act']

# タイトルと本文を表示
print(f'src = {src_sample}')
print(f'trg = {trg_sample}')

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):

    start_time = time.time()

    train_loss = train(model, train_iter, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, test_iter, criterion)

    end_time = time.time()

    epoch_mins, epoch_secs = epoch_time(start_time, end_time)

    #if valid_loss < best_valid_loss:
    #    best_valid_loss = valid_loss
    #    torch.save(model.state_dict(), 'drive/My Drive/trained_model.pt')
    # 参考スクリプトではバリデーションデータの精度を指標にしているが、今回の試行では学習が進むほどバリデーションデータの精度が低下したため、オーバーフィットは無視して最終エポック後のモデルを採用（途中終了も考慮して各エポック後に保存）
    torch.save(model.state_dict(), './Transformer_classify.pt')

    # エポックごとに学習用・バリデーション用データの精度を表示
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

    # 10エポックごとにサンプル1作品のタイトルから本文を生成した結果を表示
#     if epoch % 10 == 0:
#       translation, attention =  m(src_sample, TEXT, TEXT, model, device)
#       print(f'predicted trg = {translation}')

src = ['i', 'guess', 'you', 'are', 'right.but', 'what', 'shall', 'we', 'do', '?', 'i', "don't", 'feel', 'like', 'sitting', 'at', 'home', '.']
trg = 2


In [76]:
x=torch.randn(2,3)
list=x.tolist()
print(list)
for i in enumerate(list):
    print(2)

[[-0.34789544343948364, 1.4711216688156128, 0.7256169319152832], [-0.1784655898809433, 0.4272070527076721, -0.24276989698410034]]
2
2


In [None]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear1=nn.Linear(,10)
        self.linear2=nn.Linear(10,5)
    def forward(self, src):
        enc_src = self.linear1(src)
        
        output = self.linear2(enc_src)
        return F.softmax(output, dim=1)

In [None]:
model=Model().to(device)
model.zero_grad()

In [None]:
from torch import nn
# trc=batch.act
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

model.train()
epoch_loss = 0
for i, batch in enumerate(train_iter):
    trg=F.one_hot(batch.act)
    src=batch.dial
    optimizer.zero_grad()
    output = model(src)
    loss = criterion(output, trg)
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1)
    optimizer.step()
    print(loss.item())