In [None]:
# 按照 seq2seq的paper所说 增加了 bidirectional 翻转，同时，layer深层时效果比浅层的要好，所以 layer=4
import json
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn 
from torch.utils.data import Dataset,DataLoader
import torch.optim as optim
from tqdm.notebook import tqdm
# 变长序列的处理
from torch.nn.utils.rnn import pad_sequence,pack_padded_sequence,pad_packed_sequence
from visdom import Visdom
import random
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
beatMod = 4
g_AttrIndex = [] # 谱面notes中信息的映射

testId = 6 # 全部数据
# testId = 7  # 10 数据 去掉dropout 想办法搞成过拟合
# testId = 8 # 500数据
modelId = 31
bVisdom = False

splitPercent = 0.95
N_EPOCHS = 50
de_feature_size = 256
hidden_dim = 512
enc_hid_dim = 512
dec_hid_dim = 512
n_layers = 4
learning_rate = 0.00001
batch_size = 1
dropout = 0.1
teacher_forcing_ratio = 0.8
PosibleKind = 433
output_size = 433  
seq_size = 64         # 一句话的长度
en_feature_size = 32  # 音频标识数组尺寸
bidirectional = True # 是否翻转

with open("./datasetAll" + str(testId) + ".json", "r") as f:
    dataset = json.load(f)
X = dataset['X']
Y = dataset['Y']

class GetLoader(Dataset):
    def __init__(self, data_root, data_label):
        self.data = data_root
        self.label = data_label
        
    def __getitem__(self, index):
        data = self.data[index]
        labels = self.label[index]
        return data, labels
    
    def __len__(self):
        return len(self.data)

def get_Data():
    # 获取分割比例
    train_index = int(len(X) * splitPercent)
    
    train_x = []
    for i in range (0, train_index):
        train_x.append([X[i][j] for j in range (0, seq_size)])
    train_x = torch.FloatTensor(train_x)
    
    valid_x = []
    for i in range (train_index, len(X)):
        valid_x.append([X[i][j] for j in range (0, seq_size)])
    valid_x = torch.FloatTensor(valid_x)
    
    train_y = []
    for i in range (0, train_index):
        train_y.append([Y[i][j] for j in range (0, seq_size)])
    train_y = torch.FloatTensor(train_y)

    valid_y = []
    for i in range (train_index, len(Y)):
        valid_y.append([Y[i][j] for j in range (0, seq_size)])
    valid_y = torch.FloatTensor(valid_y)
    
    train_data = GetLoader(train_x, train_y)
    valid_data = GetLoader(valid_x, valid_y)
    
    return (
        DataLoader(train_data, batch_size=batch_size, shuffle=True),
        DataLoader(valid_data, batch_size=batch_size, shuffle=False),
    )

class EncoderRNN(nn.Module):
    def __init__(self, feature_size, hidden_size, n_layers, dropout):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.n_layers = n_layers
        self.gru = nn.GRU(feature_size, hidden_size, n_layers, bidirectional = bidirectional)
        self.dropout = nn.Dropout(dropout)
        self.hid_dim = hidden_size
        self.n_layers = n_layers
        self.fc = nn.Linear(enc_hid_dim * 2, dec_hid_dim)
        
    # def forward(self, src, hidden):
    #     out, h = self.gru(src, hidden)
    #     return out, h
    
    def combine_bidir(self, outs, bsz: int):
        out = outs.view(self.n_layers, 2, bsz, -1).transpose(1, 2).contiguous()
        return out.view(self.n_layers, bsz, -1)
    
    def forward(self, src):
        out, hidden = self.gru(src)
        # hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:], hidden[-1,:,:]), dim = 1)))
        b_size = src.shape[1]
        hidden = self.combine_bidir(hidden, b_size)
        hidden = torch.tanh(self.fc(hidden))
        return out, hidden
    
class DecoderRNN(nn.Module):
    def __init__(self, embedding, hidden_size, output_size, n_layers, dropout):
        super(DecoderRNN, self).__init__()
        self.hid_dim = hidden_size
        self.n_layers = n_layers
        self.output_dim = output_size
        self.embedding = embedding
        self.gru = nn.GRU(de_feature_size, hidden_size, n_layers)
        self.fc_out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden):
        embedded = self.dropout(self.embedding(input)).to(device)
        embedded = F.relu(embedded)
        output, hidden = self.gru(embedded, hidden)
        # [seq,batch_size,hidden_size]
        prediction = self.fc_out(output)
        # prediction = self.softmax(prediction)
        return prediction, hidden
        

def load_word_embeddings(file_name, dim):
    term_ids = {}
    we_matrix = []
    term_ids['NULL'] = 0
    term_by_id = ['NULL']
    we_matrix.append([0] * dim)
    term_num = 1
    with open(file_name) as FileObj:
        for line in FileObj:
            line = line.split()
            term_ids[line[0].strip()] = term_num
            term_by_id.append(line[0].strip())
            norm = 1
            we_matrix.append([float(i) / norm for i in line[-de_feature_size:]])
            term_num += 1
    return term_ids, term_by_id, we_matrix


def get_glove_embedding(classes, filename):
    term_to_id, id_to_term, we_matrix = load_word_embeddings("glove/jsonfile" + str(testId) + "/" + filename, de_feature_size)
    embedding_matrix = np.random.rand(classes, de_feature_size)
    for i in range(classes):
        if str(i) in term_to_id:
            tid = term_to_id[str(i)]
            embedding_matrix[i] = we_matrix[tid]
    return torch.FloatTensor(embedding_matrix)

class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
        assert encoder.hid_dim == decoder.hid_dim, "Hidden dimensions of encoder and decoder must be equal!"
        assert encoder.n_layers == decoder.n_layers, "Encoder and decoder must have equal number of layers!"
        
    def forward(self, src, trg, teacher_forcing_ratio = teacher_forcing_ratio):
        b_size = trg.shape[1]
        trg_len = trg.shape[0]
        trg_vocab_size = self.decoder.output_dim
        
        _, hidden = self.encoder(src)
        # print(f"hidden.shape {hidden.shape}") # hidden.shape torch.Size([1, 1, 512])
        # src_len = src.shape[0]
        # hidden = torch.zeros(n_layers, b_size, hidden_dim).to(device)
        # for t in range(1, src_len):
        #     input_en = src[t].unsqueeze(0).to(device)
        #     _, hidden = self.encoder(input_en, hidden)
        
        # 正交初始化
        outputs = torch.zeros(trg_len, b_size, trg_vocab_size).to(self.device)
        nn.init.orthogonal_(outputs)
        # outputs = torch.rand(trg_len, b_size, trg_vocab_size).to(self.device)

        trgInput = trg[0]
        for t in range(1, trg_len):
            trgInput = trgInput.unsqueeze(0).to(device)
            
            pred, hidden = self.decoder(trgInput, hidden)
            pred = pred.squeeze(0).to(device)
            outputs[t] = pred
            teacher_force = random.random() < teacher_forcing_ratio
            trgInput = trg[t] if teacher_force else pred.argmax(1)
            
        return outputs

embedding = nn.Embedding.from_pretrained(get_glove_embedding(PosibleKind, "vectors.txt"), freeze=False).to(device)
train_iter, valid_iter = get_Data()
encoder = EncoderRNN(en_feature_size, hidden_dim, n_layers, dropout).to(device)
decoder = DecoderRNN(embedding, hidden_dim, output_size, n_layers, dropout).to(device)
model = Seq2Seq(encoder, decoder, device).to(device)

optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss().to(device)

def train(model, iterator, optimizer, criterion):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(train_iter):
        src = batch[0]
        trg = batch[1].long()
        src = src.transpose(0,1).to(device)
        trgInput = trg.transpose(0,1).to(device)
        # print(f" src.shape : { src.shape }  trgInput.shape : { trgInput.shape }")
        output = model(src, trgInput) 

        output_dim = output.shape[-1]
        output = output[:].view(-1, output_dim).to(device)
        trg = trg[:].view(-1).to(device)

        loss = criterion(output, trg)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
    # print(f'train epoch_loss / len(iterator): {epoch_loss / len(iterator)}') 
    return epoch_loss / len(iterator)

def evaluate(model, iterator, criterion):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            src = batch[0]
            trg = batch[1].long()
            src = src.transpose(0,1).to(device)
            trgInput = trg.transpose(0,1).to(device)
            output = model(src, trgInput) 
            output_dim = output.shape[-1]
            output = output[:].view(-1, output_dim).to(device)
            trg = trg[:].view(-1).to(device)
            loss = criterion(output, trg).to(device)
            epoch_loss += loss.item()
            
    # print(f'evaluate epoch_loss / len(iterator): {epoch_loss / len(iterator)}') 
    return epoch_loss / len(iterator)
    
import math, time
best_valid_loss = float('inf')

if bVisdom:
    viz = Visdom() 
    viz.line([[0., 0.]], [0], win='train_valid_loss' + str(modelId), opts=dict(title='train_valid_loss' + str(modelId)))

for epoch in range(N_EPOCHS):
    startTime = time.time()
    train_loss = train(model, train_iter, optimizer, criterion)
    valid_loss = evaluate(model, valid_iter, criterion)
    endTime = time.time()
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), "tut1-model" + str(modelId) + ".pt")

    if bVisdom:
        viz.line([[valid_loss], [train_loss]], [epoch], win='train_valid_loss' + str(modelId), update='append')
    print(f'\tTrain Loss: {train_loss:.5f} | Train PPL: {math.exp(train_loss):7.5f} | task runs {(endTime - startTime):.3f} seconds | epoch {epoch}')
    print(f'\t Val. Loss: {valid_loss:.5f} |  Val. PPL: {math.exp(valid_loss):7.5f} | task runs {(endTime - startTime):.3f} seconds | epoch {epoch}')

cuda:0
	Train Loss: 1.56845 | Train PPL: 4.79922 | task runs 20013.717 seconds | epoch 0
	 Val. Loss: 1.40822 |  Val. PPL: 4.08865 | task runs 20013.717 seconds | epoch 0
	Train Loss: 1.39011 | Train PPL: 4.01530 | task runs 20016.917 seconds | epoch 1
	 Val. Loss: 1.31343 |  Val. PPL: 3.71892 | task runs 20016.917 seconds | epoch 1
	Train Loss: 1.32654 | Train PPL: 3.76797 | task runs 20026.169 seconds | epoch 2
	 Val. Loss: 1.27523 |  Val. PPL: 3.57952 | task runs 20026.169 seconds | epoch 2
	Train Loss: 1.29855 | Train PPL: 3.66398 | task runs 20017.582 seconds | epoch 3
	 Val. Loss: 1.25757 |  Val. PPL: 3.51688 | task runs 20017.582 seconds | epoch 3
	Train Loss: 1.28155 | Train PPL: 3.60222 | task runs 20048.054 seconds | epoch 4
	 Val. Loss: 1.24651 |  Val. PPL: 3.47817 | task runs 20048.054 seconds | epoch 4
	Train Loss: 1.26890 | Train PPL: 3.55694 | task runs 13805.829 seconds | epoch 5
	 Val. Loss: 1.23706 |  Val. PPL: 3.44546 | task runs 13805.829 seconds | epoch 5
	Train Lo