In [156]:
import getopt
import sys
import os
import math
import time
import argparse
import torch
import torch as T
from torch.autograd import Variable as var
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
from torch.nn.utils import clip_grad_norm_

#torch.manual_seed(1)


import os
from io import open
import inspect
import numpy as np
import pandas as pd

import torch
import torchtext
from torchtext.datasets import BABI20
from torchtext.data import Dataset, Field, Example, Iterator

import argparse
from torchtext import datasets
from torchtext.datasets.babi import BABI20Field
#from models.UTransformer import BabiUTransformer
#from models.common_layer import NoamOpt
import torch.nn as nn
from copy import deepcopy

ModuleNotFoundError: No module named 'pandas'

In [2]:
parser = argparse.ArgumentParser(description='PyTorch Differentiable Neural Computer')
parser.add_argument('-input_size', type=int, default=6, help='dimension of input feature')
parser.add_argument('-rnn_type', type=str, default='lstm', help='type of recurrent cells to use for the controller')
parser.add_argument('-nhid', type=int, default=64, help='number of hidden units of the inner nn')
parser.add_argument('-dropout', type=float, default=0, help='controller dropout')
parser.add_argument('-memory_type', type=str, default='dnc', help='dense or sparse memory: dnc | sdnc | sam')

parser.add_argument('-nlayer', type=int, default=1, help='number of layers')
parser.add_argument('-nhlayer', type=int, default=2, help='number of hidden layers')
parser.add_argument('-lr', type=float, default=1e-4, help='initial learning rate')
parser.add_argument('-optim', type=str, default='adam', help='learning rule, supports adam|rmsprop')
parser.add_argument('-clip', type=float, default=50, help='gradient clipping')

parser.add_argument('-batch_size', type=int, default=100, metavar='N', help='batch size')
parser.add_argument('-mem_size', type=int, default=20, help='memory dimension')
parser.add_argument('-mem_slot', type=int, default=16, help='number of memory slots')
parser.add_argument('-read_heads', type=int, default=4, help='number of read heads')
parser.add_argument('-sparse_reads', type=int, default=10, help='number of sparse reads per read head')
parser.add_argument('-temporal_reads', type=int, default=2, help='number of temporal reads')

parser.add_argument('-sequence_max_length', type=int, default=4, metavar='N', help='sequence_max_length')
parser.add_argument('-curriculum_increment', type=int, default=0, metavar='N', help='sequence_max_length incrementor per 1K iterations')
parser.add_argument('-curriculum_freq', type=int, default=1000, metavar='N', help='sequence_max_length incrementor per 1K iterations')
parser.add_argument('-cuda', type=int, default=-1, help='Cuda GPU ID, -1 for CPU')

parser.add_argument('-iterations', type=int, default=100000, metavar='N', help='total number of iteration')
parser.add_argument('-summarize_freq', type=int, default=100, metavar='N', help='summarize frequency')
parser.add_argument('-check_freq', type=int, default=100, metavar='N', help='check point frequency')
parser.add_argument('-visdom', action='store_true', help='plot memory content on visdom per -summarize_freq steps')
args = parser.parse_args(args=[])

In [102]:
class MyLSTMBABI(nn.Module):
    # モデルで使う各ネットワークをコンストラクタで定義
    def __init__(self, input_dim, hidden_dim, tagset_size):
        # 親クラスのコンストラクタ。決まり文句
        super(MyLSTMBABI, self).__init__()
        # 隠れ層の次元数。これは好きな値に設定しても行列計算の過程で出力には出てこないので。
        self.hidden_dim = hidden_dim
        # LSTMの隠れ層。これ１つでOK。超便利。
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        # LSTMの出力を受け取って全結合してsoftmaxに食わせるための１層のネットワーク
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.flag=True
    # 順伝播処理はforward関数に記載
    def forward(self, sentences,hidden=None):
        # 上記で説明した様にmany to oneのタスクを解きたいので、第二戻り値＝hiddenだけ使う。　babiもmany to oneのはず
        sentences = sentences.permute(1,0,2) #(seq_len,batch, vocab_size)

        _, lstm_out = self.lstm(sentences)  # _ は(seq_len, batch, hidden_dim)
        
        
        # lstm_out[0]は３次元テンソルになってしまっているので2次元に調整して全結合。
        tag_space = self.hidden2tag(lstm_out[0].view(sentences.size()[1], self.hidden_dim)) #(batch, hidden_dim)にしてから食わせる
        """if self.flag:
            print("linear out")
            print(tag_space)
            self.flag=False"""
        tag_score =self.softmax(tag_space)
        return tag_score #(batch, vocab_size) のはず

In [43]:
def my_criterion(predictions, targets):
  return T.mean(
      -1 * F.logsigmoid(predictions) * (targets) - T.log(1 - F.sigmoid(predictions) + 1e-9) * (1 - targets)
  )

In [19]:
def get_babidata(config):
    """(batch_size=config.batch_size, 
                                                            root='.data', 
                                                            memory_size=70, 
                                                            task=config.task, 
                                                            joint=False,
                                                            tenK=False, 
                                                            only_supporting=False, 
                                                            sort=False, 
                                                            shuffle=True)"""
    text = BABI20Field(50)
    train, val, test = datasets.BABI20.splits(text, root='.data', task=2, joint=False,
                                            tenK=True, only_supporting=False)
    text.build_vocab(train)
    vocab_len1 = len(text.vocab.freqs) 
    print("VOCAB LEN:",vocab_len1 )
    train_iter,val_iter,test_iter = Iterator.splits((train, val, test),batch_size=32)
    return train_iter, val_iter, test_iter,vocab_len1+1,text

In [103]:
def get_babidata_task(task):
    """(batch_size=config.batch_size, 
                                                            root='.data', 
                                                            memory_size=70, 
                                                            task=config.task, 
                                                            joint=False,
                                                            tenK=False, 
                                                            only_supporting=False, 
                                                            sort=False, 
                                                            shuffle=True)"""
    text = BABI20Field(50)
    train, val, test = datasets.BABI20.splits(text, root='.data', task=task, joint=False,
                                            tenK=True, only_supporting=False)
    text.build_vocab(train)
    #text.vocab.append_token
    vocab_len = len(text.vocab.freqs) +1
    vocab_lenplus =vocab_len+1 # "?"
    print("VOCAB LEN PLUS:",vocab_lenplus )
    train_iter,val_iter,test_iter = Iterator.splits((train, val, test),batch_size=32)
    return train_iter, val_iter, test_iter,vocab_lenplus,text

    babi

In [105]:
def arrangestory(batch_story): # (batch,length,size)
    strysum =torch.sum(batch_story,(0,2)) #1D
    fill_length=len(strysum[torch.where(strysum!=0)])
    red_story=batch_story[:,:fill_length,:]
    red_story =torch.flip(red_story, dims=[1])
    flat_story =red_story.view(len(batch_story),-1)
    return flat_story #(batch, red_length * size ) 2D

In [114]:
def prep_data(batch,vocab_size):
    batch_size=batch.story.size()[0]
    flat_story =arrangestory(batch.story)
    story_OH =torch.nn.functional.one_hot(flat_story,num_classes=vocab_size) #(batch, seq_len, vocab_len)
    query_OH=torch.nn.functional.one_hot(batch.query,num_classes=vocab_size) #(batch, que_len=3, vocab_len)
    answer_OH=torch.nn.functional.one_hot(batch.answer,num_classes=vocab_size) #(batch,1, vocab_len)
    querystop =torch.nn.functional.one_hot(torch.tensor([vocab_size-1]*batch_size),num_classes=vocab_size)
    querystop=querystop.view(batch_size,1,-1) #2D -> 3D
    x =torch.cat((story_OH,query_OH,querystop),1)
    y =answer_OH.view(batch_size,vocab_size)

    x = var(x.to(torch.float).cuda() )
    y = var(y.to(torch.float).cuda() )
    ans_id =batch.answer.view(-1) #1D (batch_size)
    #ans_id=var(ans_id.to(torch.float).cuda() )
    ans_id=var(ans_id.cuda() )

    return x,y,ans_id #yはonehotなので2Dだけどans_idは1D !!

In [66]:
def IDtensor2word(tns,text_field):
  itos_list=text_field.vocab.itos
  id_np =tns.to('cpu').detach().numpy().copy()
  shp =id_np.shape
  id_np=id_np.flatten()
  word_np =[itos_list[i] for i in id_np]
  word_np=np.reshape(word_np,shp)
  return word_np

In [97]:
def softmaxORonehot2word(tns,text_field):
    #ans_id =batch.answer.view(-1) #1D (batch_size)
    tns_dim=tns.dim()
    print(type(tns_dim))
    tns_id = torch.argmax(tns,tns_dim-1)
    return IDtensor2word(tns_id,text_field)
    

In [183]:
train_iter, val_iter, test_iter, vocab_size, text_field =get_babidata_task(1)



VOCAB LEN PLUS: 21




In [184]:
for epoch,batch in enumerate(train_iter):
    batch_size=batch.batch_size
    #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    batch_story=batch.story
    print(IDtensor2word(batch_story[:2],text_field))
    print(IDtensor2word(batch.query[:2],text_field))
    print(IDtensor2word(batch.answer[:2],text_field))
    print(batch_story.size())
    print(batch.query.size())
    print(batch.answer.size())

    strysum =torch.sum(batch_story,(0,2)) #1D
    fill_length=len(strysum[torch.where(strysum!=0)])
    red_story=batch_story[:,:fill_length,:]
    red_story =torch.flip(red_story, dims=[1])
    flat_story =red_story.view(len(batch_story),-1)
    print(fill_length)
    print(red_story.size())
    print(flat_story.size())
    print(IDtensor2word(flat_story[:2],text_field))
    print(IDtensor2word(torch.cat((flat_story[:4],batch.query[:4]),1),text_field))

    story_OH =torch.nn.functional.one_hot(flat_story,num_classes=vocab_size) #(batch, seq_len, vocab_len)
    query_OH=torch.nn.functional.one_hot(batch.query,num_classes=vocab_size) #(batch, que_len=3, vocab_len)
    answer_OH=torch.nn.functional.one_hot(batch.answer,num_classes=vocab_size) #(batch,1, vocab_len)
    querystop =torch.nn.functional.one_hot(torch.tensor([vocab_size-1]*batch_size),num_classes=vocab_size)
    querystop=querystop.view(batch_size,1,-1) #2D -> 3D
    train_x =torch.cat((story_OH,query_OH,querystop),1)
    train_y =answer_OH.view(batch_size,-1)
    
    train_x = var(train_x.to(torch.float).cuda() )
    train_y = var(train_y.to(torch.float).cuda() )

    print(train_x.size())
    print(train_y.size())
    #argmaxによるone-hot再変換でidが変わってないか確認
    print(flat_story[:2])
    print(torch.argmax(torch.nn.functional.one_hot(flat_story[:2],num_classes=vocab_size) ,2))
    
    #one-hotをargmaxで復元するのが間違ってないか確認
    print(softmaxORonehot2word(torch.cat((story_OH,query_OH),1)[:2],text_field))
    print(softmaxORonehot2word(torch.cat((story_OH,query_OH),1),text_field).shape)
    print(softmaxORonehot2word(train_y[:2],text_field))
    print(softmaxORonehot2word(train_y,text_field).shape)

    seq_len=train_x.size()[1]
    print(seq_len)

    print(querystop)
    break


[[['John' 'travelled' 'to' 'the' 'bedroom' '<pad>']
  ['Sandra' 'travelled' 'to' 'the' 'garden' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' '<pad>' '<pad>' '<pad>' '<pad>' '<pad>']
  ['<pad>' 



In [180]:
"""config = parse_config(argtext="--task 4 --batch_size 32 --cuda --verbose")

for t in range(1,21):
    config.task = t
    acc = []
    for i in range(config.run_avg):"""

def main_lstm(task):
    lr=0.005
    print("==========TASK: ",task," learning_rate:",lr)
    train_iter, val_iter, test_iter, vocab_size, text_field =get_babidata_task(task)
    # model generate, optimizer and criterion setting
    model= MyLSTMBABI(input_dim=vocab_size, hidden_dim=512, tagset_size=vocab_size).cuda()
    optimizer = optim.Adam(model.parameters(),lr=lr)
    criterion = nn.CrossEntropyLoss()
    save_dir="lstm_param"
    save_path=os.path.join(save_dir,"task"+str(task)+".pth")

    iterations=40
    summarize_freq=5
    last_save_losses = []
    last_val_losses =[]
    val_acc=[]
    val_bestacc=0
    val_bestloss=0
    bestepoch =0

    model.train()
    for epoch in range(iterations):
        for i,batch in enumerate(train_iter):
            #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
            batch_size=batch.batch_size
            train_x,train_y,ans_id =prep_data(batch,vocab_size)

            output = model(train_x)

            optimizer.zero_grad()    
            loss = criterion(output,ans_id)
            loss.backward()
            optimizer.step()
            loss_value = loss.item()
            last_save_losses.append(loss_value)
        
        #validation
        with torch.no_grad():
            for i,batch in enumerate(val_iter):
                #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
                batch_size=batch.batch_size
                val_x,val_y,val_ansid =prep_data(batch,vocab_size)
                seq_len=val_x.size()[1]

                output = model(val_x)

                optimizer.zero_grad()    
                loss = criterion(output,val_ansid)
                loss_value = loss.item()
                last_val_losses.append(loss_value)

                pred_id = torch.argmax(output,1) #1D
                pred_id = pred_id.cpu().numpy() # (batch_size, vocab_len)　のはず
                val_ansid = val_ansid.cpu().numpy()
                val_acc.append(((pred_id == val_ansid).sum()/ len(val_ansid) ))

        summarize = ((epoch+1) % summarize_freq == 0)
        if summarize:
            loss = np.mean(last_save_losses)
            print("epoch:",epoch," loss:",loss)
            val_loss = np.mean(last_val_losses)
            print("    val_loss:",val_loss)
            mean_acc =np.mean(val_acc)
            print("    val_acc:",mean_acc)
            if mean_acc>val_bestacc:
                val_bestacc =mean_acc
                val_bestloss=val_loss
                bestepoch=epoch
                torch.save(model.state_dict(), save_path)
            last_save_losses = []
            last_val_losses =[]
            val_acc=[]
    print("\n val best accuracy: ",val_bestacc," epoch: ",bestepoch)

    #Test
    print("=====TEST=====")

    last_save_losses=[]
    accuracy_rates =[]
    test_loss=0
    test_acc=0

    model.load_state_dict(torch.load(save_path))
    model.eval()
    with torch.no_grad():
        for i,batch in enumerate(test_iter):
            #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
            batch_size=batch.batch_size
            test_x,test_y,ans_id =prep_data(batch,vocab_size)
            seq_len=test_x.size()[1]

            output = model(test_x)
            optimizer.zero_grad() 

            #loss計算   
            loss = criterion(output,ans_id)
            loss_value = loss.item()
            last_save_losses.append(loss_value)

            #予測に変換
            """if output.dim()!=2:
                print("error")"""
            pred_id = torch.argmax(output,1) #1D
            
            #print("accuracy rate:",((pred_id == ans_id).sum().float() / len(ans_id) ).item() ) #32バッチ内での正答率
            pred_id = pred_id.cpu().numpy() # (batch_size, vocab_len)　のはず
            ans_id = ans_id.cpu().numpy()
            accuracy_rates.append(((pred_id == ans_id).sum()/ len(ans_id) ))
            #numpy型にしたのでfloatへの変換は必要なし

        test_loss = np.mean(last_save_losses)
        test_acc =np.mean(accuracy_rates)
        print(" loss:",test_loss)
        print("accuracy_rate:",test_acc)

    return {"task":task,"best_epoch":bestepoch,
    "best_val_loss":val_bestloss,"best_val_acc":val_bestacc,
    "test_loss":test_loss,"test_acc":test_acc
    }

In [182]:
state=[]
for task in range(5,6):
    state .append(main_lstm(task))

print(pd.DataFrame(state))





VOCAB LEN PLUS: 41




epoch: 4  loss: 1.5878346913249781
    val_loss: 1.5246030598878861
    val_acc: 0.24453125
epoch: 9  loss: 1.4876092597947899
    val_loss: 1.5502733200788499
    val_acc: 0.2849609375
epoch: 14  loss: 1.3135680327178738
    val_loss: 1.3495812237262725
    val_acc: 0.3482421875
epoch: 19  loss: 1.2810428208493172
    val_loss: 1.3390481486916541
    val_acc: 0.350390625
epoch: 24  loss: 1.2577998370988994
    val_loss: 1.34085821993649
    val_acc: 0.3650390625
epoch: 29  loss: 1.2339473258519003
    val_loss: 1.3494028739631176
    val_acc: 0.3720703125
epoch: 34  loss: 1.2174526939155361
    val_loss: 1.3717475671321153
    val_acc: 0.3537109375
epoch: 39  loss: 1.2025334834629762
    val_loss: 1.3369005836546421
    val_acc: 0.3767578125

 val best accuracy:  0.3767578125  epoch:  39
=====TEST=====
 loss: 1.3201708868145943
accuracy_rate: 0.359375
   task  best_epoch  best_val_loss  best_val_acc  test_loss  test_acc
0     5          39       1.336901      0.376758   1.320171  0.35

In [58]:
#1timestepずつ入れてた残骸
"""config = parse_config(argtext="--task 4 --batch_size 32 --cuda --verbose")

for t in range(1,21):
    config.task = t
    acc = []
    for i in range(config.run_avg):"""

# model generate, optimizer and criterion setting
#vocab_size=

model= MyLSTMCopyFirst(input_dim=vocab_size, hidden_dim=512, tagset_size=vocab_size).cuda()
optimizer = optim.Adam(model.parameters(),lr=0.001)

#Learn
summarize_freq=50
last_save_losses = []

model.train()
for epoch,batch in enumerate(train_iter):
    batch_size=batch.batch_size
    #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
    flat_story =arrangestory(batch.story)
    story_OH =torch.nn.functional.one_hot(flat_story,num_classes=vocab_size) #(batch, seq_len, vocab_len)
    query_OH=torch.nn.functional.one_hot(batch.query,num_classes=vocab_size) #(batch, que_len=3, vocab_len)
    answer_OH=torch.nn.functional.one_hot(batch.answer,num_classes=vocab_size) #(batch,1, vocab_len)
    querystop =torch.nn.functional.one_hot(torch.tensor([vocab_size-1]*batch_size),num_classes=vocab_size)
    querystop=querystop.view(batch_size,1,-1) #2D -> 3D
    train_x =torch.cat((story_OH,query_OH,querystop),1)
    train_y =answer_OH.view(batch_size,-1)

    train_x = var(train_x.to(torch.float).cuda() )
    train_y = var(train_y.to(torch.float).cuda() )
    seq_len=train_x.size()[1]

    hidden=None
    for time in range(seq_len): #DataLoader　ではないと思う。layer forwardにあたる
        sentence =train_x[:,time,:]

        if time==0:
            output,hidden = model(sentence)
        else:
            output,hidden = model(sentence,hidden)

        optimizer.zero_grad()
        #最後のステップだけloss計算する
        if (time==seq_len-1):
            loss = my_criterion((output),train_y)
            loss.backward()
            optimizer.step()
            loss_value = loss.item()
            last_save_losses.append(loss_value)
    hidden=None
    summarize = (epoch % summarize_freq == 0)
    if summarize:
        loss = np.mean(last_save_losses)
        print("epoch:",epoch," loss:",loss)
        last_save_losses = []




epoch: 0  loss: 0.6926040649414062
epoch: 50  loss: 0.17907388955354692
epoch: 100  loss: 0.12981969431042673
epoch: 150  loss: 0.13114346712827682
epoch: 200  loss: 0.13026186019182207
epoch: 250  loss: 0.13087139889597893


In [49]:
c=0
for bb in test_iter:
    c+=1
print(c)

32




In [59]:
#Test

last_save_losses=[]
accuracy_rates =[]

epoch_size =32
model.eval()
with torch.no_grad():
    for epoch,batch in enumerate(test_iter):
        batch_size=batch.batch_size
        #print("\rIteration {ep}/{tot}".format(ep=epoch, tot=iterations))
        flat_story =arrangestory(batch.story)
        story_OH =torch.nn.functional.one_hot(flat_story,num_classes=vocab_size) #(batch, seq_len, vocab_len)
        query_OH=torch.nn.functional.one_hot(batch.query,num_classes=vocab_size) #(batch, que_len=3, vocab_len)
        answer_OH=torch.nn.functional.one_hot(batch.answer,num_classes=vocab_size) #(batch,1, vocab_len)
        querystop =torch.nn.functional.one_hot(torch.tensor([vocab_size-1]*batch_size),num_classes=vocab_size)
        querystop=querystop.view(batch_size,1,-1) #2D -> 3D
        test_x =torch.cat((story_OH,query_OH,querystop),1)
        test_y =answer_OH.view(batch_size,-1) #3D->2D
        test_x = var(test_x.to(torch.float).cuda() )
        test_y = var(test_y.to(torch.float).cuda() )
        seq_len=test_x.size()[1]

        hidden=None
        for time in range(seq_len): #
            sentence =test_x[:,time,:]

            if time==0:
                output,hidden = model(sentence)
            else:
                output,hidden = model(sentence,hidden)
            
            #最後のステップだけloss計算する
            if (time==seq_len-1):
                loss = my_criterion((output),test_y)
                loss_value = loss.item()
                last_save_losses.append(loss_value)

                ans_id =batch.answer.view(-1) #1D (batch_size)
                ans_id=var(ans_id.to(torch.float).cuda() )
                pred_id = torch.argmax(output,1)

                output = output.cpu().numpy() # (batch_size, vocab_len)　のはず
                test_y = test_y.cpu().numpy()

                #print("accuracy rate:",((pred_id == ans_id).sum().float() / len(ans_id) ).item() ) #32バッチ内での正答率
                accuracy_rates.append(((pred_id == ans_id).sum().float() / len(ans_id) ).item())

        hidden=None
        summarize =(epoch==epoch_size-1)
        if summarize:
            loss = np.mean(last_save_losses)
            print("epoch:",epoch," loss:",loss)
            print("accuracy_rate:",np.mean(accuracy_rates))
            last_save_losses = []
            #print("test_loss:",loss)



epoch: 31  loss: 0.13347184797748923
accuracy_rate: 0.1484375


hiddenとoutputのサイズが共通な以上、全結合層を挟まないとhidden_sizeが分類クラス数あるいはvocab_sizeに固定されてしまうよ　　
扱うのがコピーから文章になると、embedding層やvocab_size引数が必要になるよ

In [8]:
class MyLSTMClassifier(nn.Module):
    # モデルで使う各ネットワークをコンストラクタで定義
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        # 親クラスのコンストラクタ。決まり文句
        super(MyLSTMClassifier, self).__init__()
        # 隠れ層の次元数。これは好きな値に設定しても行列計算の過程で出力には出てこないので。
        self.hidden_dim = hidden_dim
        # LSTMの隠れ層。これ１つでOK。超便利。
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        # LSTMの出力を受け取って全結合してsoftmaxに食わせるための１層のネットワーク
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        # softmaxのLog版。dim=0で列、dim=1で行方向を確率変換。
        self.softmax = nn.Softmax(dim=1)

    # 順伝播処理はforward関数に記載
    def forward(self, sentence):
        # 2次元テンソルをLSTMに食わせられる様にviewで３次元テンソルにした上でLSTMへ流す。
        # 上記で説明した様にmany to oneのタスクを解きたいので、第二戻り値＝hiddenだけ使う。　babiもmany to oneのはず
        _, lstm_out = self.lstm(sentence.view(len(sentence), 1, -1))
        # lstm_out[0]は３次元テンソルになってしまっているので2次元に調整して全結合。
        tag_space = self.hidden2tag(lstm_out[0].view(-1, self.hidden_dim))
        # softmaxに食わせて、確率として表現
        tag_scores = self.softmax(tag_space)
        return tag_scores
        #hiddenは明示的にforwardの入力にしなくても大丈夫

In [9]:
# model generate, optimizer and criterion setting

model= MyLSTMClassifier().cuda()
optimizer = optim.SGD(model.parameters(),lr=0.1)
criterion = nn.CrossEntropyLoss()

TypeError: __init__() missing 4 required positional arguments: 'embedding_dim', 'hidden_dim', 'vocab_size', and 'tagset_size'

In [None]:
#Learn

n =     ##  データのサイズ
bs =    ##  バッチのサイズ
itr=5

model.train()
for i in range(itr):
    idx = np.random.permutation(n)
    for j in range(0,n,bs):
        xtm = xtrain[idx[j:(j+bs) if (j+bs)<n else n]]
        ytm = ytrain[idx[j:(j+bs) if (j+bs) < n else n]]
        output = model(xtm)
        loss = criterion(output,ytm)
        print(i,j,loss.item())
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
#Test

model.eval()
with torch.no_grad():
    output1= model(xtest)
    ans = torch.argmax(output1,1)
    print(((ytest == ans).sum().float() / len(ans) ).item() )

↓は文章をあつかうやつ　コピーの次に

In [None]:
# Define model

class MyLSTMClassifier(nn.Module):
    def __init__(self):
        super(MyLSTM,self).__init__()
        self.l1=nn.Linear(4,6)
        self.l2=nn.Linear(6,3)
    def forward(self,x):
        h1=torch.sigmoid(self.l1(x))
        h2=self.l2(h1)
        return h2

class MyLSTMClassifier(nn.Module):
    # モデルで使う各ネットワークをコンストラクタで定義
    def __init__(self, embedding_dim, hidden_dim, vocab_size, tagset_size):
        # 親クラスのコンストラクタ。決まり文句
        super(MyLSTMClassifier, self).__init__()
        # 隠れ層の次元数。これは好きな値に設定しても行列計算の過程で出力には出てこないので。
        self.hidden_dim = hidden_dim
        # インプットの単語をベクトル化するために使う
        self.word_embeddings = nn.Embedding(vocab_size, embedding_dim)
        # LSTMの隠れ層。これ１つでOK。超便利。
        self.lstm = nn.LSTM(embedding_dim, hidden_dim)
        # LSTMの出力を受け取って全結合してsoftmaxに食わせるための１層のネットワーク
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        # softmaxのLog版。dim=0で列、dim=1で行方向を確率変換。
        self.softmax = nn.LogSoftmax(dim=1)

    # 順伝播処理はforward関数に記載
    def forward(self, sentence):
        # 文章内の各単語をベクトル化して出力。2次元のテンソル
        embeds = self.word_embeddings(sentence)
        # 2次元テンソルをLSTMに食わせられる様にviewで３次元テンソルにした上でLSTMへ流す。
        # 上記で説明した様にmany to oneのタスクを解きたいので、第二戻り値だけ使う。
        _, lstm_out = self.lstm(embeds.view(len(sentence), 1, -1))
        # lstm_out[0]は３次元テンソルになってしまっているので2次元に調整して全結合。
        tag_space = self.hidden2tag(lstm_out[0].view(-1, self.hidden_dim))
        # softmaxに食わせて、確率として表現
        tag_scores = self.softmax(tag_space)
        return tag_scores

In [None]:
copyfirstをbabiに適用しようとした残骸
class MyLSTMBABItest(nn.Module):
    # モデルで使う各ネットワークをコンストラクタで定義
    def __init__(self, input_dim, hidden_dim, tagset_size):
        # 親クラスのコンストラクタ。決まり文句
        super(MyLSTMBABI, self).__init__()
        # 隠れ層の次元数。これは好きな値に設定しても行列計算の過程で出力には出てこないので。
        self.hidden_dim = hidden_dim
        # LSTMの隠れ層。これ１つでOK。超便利。
        self.lstm = nn.LSTM(input_dim, hidden_dim)
        # LSTMの出力を受け取って全結合してsoftmaxに食わせるための１層のネットワーク
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        #self.softmax = nn.LogSoftmax(dim=1)
        self.flag=True
    # 順伝播処理はforward関数に記載
    def forward(self, sentence,hidden=None):
        # 2次元テンソルをLSTMに食わせられる様にviewで３次元テンソルにした上でLSTMへ流す。
        # 上記で説明した様にmany to oneのタスクを解きたいので、第二戻り値＝hiddenだけ使う。　babiもmany to oneのはず

        if (hidden==None):
            _, lstm_out = self.lstm(sentence.view(1,len(sentence),  -1)) #(1,batch, size)
        else:
            _,lstm_out = self.lstm(sentence.view(1,len(sentence),  -1),hidden)
        
        #sentence = sentence.permute(1,0,2)
        # lstm_out[0]は３次元テンソルになってしまっているので2次元に調整して全結合。
        #if self.flag:
        #    print("lstm_out[0] reshape")
        #    print(lstm_out[0].view(-1, self.hidden_dim))
        tag_space = self.hidden2tag(lstm_out[0].view(-1, self.hidden_dim))
        """if self.flag:
            print("linear out")
            print(tag_space)
            self.flag=False"""
        #tag_space=tag_space.view(len(sentence),1,-1)
        return tag_space,lstm_out