In [3]:
from __future__ import unicode_literals,print_function,division
from io import open
import unicodedata
import string
import re
import random
import os
import librosa
import pickle
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import time
import math
import librosa.display
import numpy as np

device =torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [2]:
'''
Summary:
    将音乐转换为note
Parameters:
    data - libsora读取到的y
    rate - libsora读取到的rate
Return:
    Note - note列表
'''
def music2note(data,rate):
    #转置 降噪
    data=np.transpose(data)
    data=data/data.max()
    data[data<0.75]=0

    #单位频率
    unit_rate=rate/(data.shape[1]-1)
    Note=[]

    for i in data:
        note=[]
        for j in range(len(i)):
            #第一个非0值
            if i[j]>0 and len(note)==0:
                #频率转换音符
                note_str = librosa.hz_to_note((j+1)*unit_rate)
                #print(note_str)
                #if 'Db' == note_str[:2]
                if note_str[1] == '#':
                    note_str = note_str[0]+note_str[2]+'#'
                note.append(note_str)
                
        if len(note)!=0:
            Note.append(note[0])
    return  Note

'''
Summary:
    调用该函数返回最终的note列表

Return:
    data - note列表
'''
def ChangeMuisc2Note(filpath):
    y,rate=librosa.load(filpath,44100)
    #短时傅里叶
    fft=librosa.stft(y,n_fft=1024*2)
    #转换为分贝值
    D=librosa.amplitude_to_db(abs(fft),ref=np.max)
    #数据>0
    D=D+80
    
    data=music2note(D,rate/2)

#     output=open('note.txt','w+')
#     for i in range(len(data)):
#         output.write(data[i])
#         output.write('\n')
#     output.close()
    return data



def getData():
    path='wavDatasets//'
    listdir=os.listdir(path)
    data={}
    for x in listdir:
        if 'wav' in x:
            y = ChangeMuisc2Note('wavDatasets//'+x)
            y = one_hot(y)
            #print(y)
            data[x[0:-4]]=y
    return data

def getLabel():
    df=open('wavDatasets//music_dic.pkl','rb')#注意此处是rb
    #此处使用的是load(目标文件)
    music_dic=pickle.load(df)
    df.close()
    label={}
    for x in music_dic.keys():
        y=music_dic[x]
        label[x]=one_hot(y)
    return label

note_label_list = ['<S>','<E>','C5', 'C5#', 'C6', 'C4', 'C6#', 'C4#', 'D5', 'D5#', 'D6', 
            'D4', 'D6#', 'D4#', 'E5', 'E6', 'E4', 'F5', 'F5#', 'F6', 'F4', 'F6#',
            'F4#', 'G5', 'G5#', 'G6', 'G4', 'G6#', 'G4#', 'A5', 'A5#', 'A6', 'A4', 
            'A6#', 'A4#', 'B5', 'B6', 'B4', 'C5-', 'C5--', 'C5---', 'C5.', '<C5>',
            '<<C5>>', 'C5#-', 'C5#--', 'C5#---', 'C5#.', '<C5#>', '<<C5#>>', 'C6-',
            'C6--', 'C6---', 'C6.', '<C6>', '<<C6>>', 'C4-', 'C4--', 'C4---', 'C4.', 
            '<C4>', '<<C4>>', 'C6#-', 'C6#--', 'C6#---', 'C6#.', '<C6#>', '<<C6#>>',
            'C4#-', 'C4#--', 'C4#---', 'C4#.', '<C4#>', '<<C4#>>', 'D5-', 'D5--',
            'D5---', 'D5.', '<D5>', '<<D5>>', 'D5#-', 'D5#--', 'D5#---', 'D5#.', 
            '<D5#>', '<<D5#>>', 'D6-', 'D6--', 'D6---', 'D6.', '<D6>', '<<D6>>', 
            'D4-', 'D4--', 'D4---', 'D4.', '<D4>', '<<D4>>', 'D6#-', 'D6#--', 'D6#---',
            'D6#.', '<D6#>', '<<D6#>>', 'D4#-', 'D4#--', 'D4#---', 'D4#.', '<D4#>', 
            '<<D4#>>', 'E5-', 'E5--', 'E5---', 'E5.', '<E5>', '<<E5>>', 'E6-', 'E6--',
            'E6---', 'E6.', '<E6>', '<<E6>>', 'E4-', 'E4--', 'E4---', 'E4.', '<E4>', 
            '<<E4>>', 'F5-', 'F5--', 'F5---', 'F5.', '<F5>', '<<F5>>', 'F5#-', 'F5#--',
            'F5#---', 'F5#.', '<F5#>', '<<F5#>>', 'F6-', 'F6--', 'F6---', 'F6.',
            '<F6>', '<<F6>>', 'F4-', 'F4--', 'F4---', 'F4.', '<F4>', '<<F4>>', 
            'F6#-', 'F6#--', 'F6#---', 'F6#.', '<F6#>', '<<F6#>>', 'F4#-', 'F4#--',
            'F4#---', 'F4#.', '<F4#>', '<<F4#>>', 'G5-', 'G5--', 'G5---', 'G5.',
            '<G5>', '<<G5>>', 'G5#-', 'G5#--', 'G5#---', 'G5#.', '<G5#>', '<<G5#>>', 
            'G6-', 'G6--', 'G6---', 'G6.', '<G6>', '<<G6>>', 'G4-', 'G4--', 'G4---', 
            'G4.', '<G4>', '<<G4>>', 'G6#-', 'G6#--', 'G6#---', 'G6#.', '<G6#>', 
            '<<G6#>>', 'G4#-', 'G4#--', 'G4#---', 'G4#.', '<G4#>', '<<G4#>>', 'A5-',
            'A5--', 'A5---', 'A5.', '<A5>', '<<A5>>', 'A5#-', 'A5#--', 'A5#---', 'A5#.',
            '<A5#>', '<<A5#>>', 'A6-', 'A6--', 'A6---', 'A6.', '<A6>', '<<A6>>', 'A4-', 'A4--',
            'A4---', 'A4.', '<A4>', '<<A4>>', 'A6#-', 'A6#--', 'A6#---', 'A6#.', '<A6#>', 
            '<<A6#>>', 'A4#-', 'A4#--', 'A4#---', 'A4#.', '<A4#>', '<<A4#>>', 'B5-',
            'B5--', 'B5---', 'B5.', '<B5>', '<<B5>>', 'B6-', 'B6--', 'B6---', 'B6.',
            '<B6>', '<<B6>>', 'B4-', 'B4--', 'B4---', 'B4.', '<B4>', '<<B4>>','other']

#得到note和num字典
def getDict():
    note_dic = {}
    for x in range(len(note_label_list)):
        if note_label_list[x] not in note_dic.keys():
            # onehot = np.zeros(len(note))
            # onehot[x] = 1
            note_dic[note_label_list[x]] = x
    num_dic=dict(zip(note_dic.values(),note_dic.keys()))

    return note_dic,num_dic

note_dic,num_dic = getDict()

def one_hot(y):
    note=[]
    note.append(note_dic['<S>'])
    for x in y:
        try:
            note.append(note_dic[x])
        except:
            note.append(note_dic['other'])
    note.append(note_dic['<E>'])
    return note

#会调用前面的函数，最后返回训练集的数据
#形状为（x_train,y_train)，x_train是训练集音频数据，y_train是训练集数据标签
#数据类型均为torch.longtensor
def getPair():
    x=getData()
    #print(x)
    y=getLabel()
    data=[]
    label=[]
    for key in x.keys():
        a=x[key]
        a=list(a)
        #print(a)
        a=torch.Tensor(a).view(len(a),1).long()
        b=torch.Tensor(y[key]).view(len(y[key]),1).long()
        a = a.to(device)
        #print(a)
        b = b.to(device)
        data.append(a)
        label.append(b)
    return data,label

In [5]:
#不要动它
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.gru(output, hidden)

        return output, hidden

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

#不要动它
class AttnDecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=100):
        super(AttnDecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.dropout_p = dropout_p
        self.max_length=max_length

        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        self.dropout = nn.Dropout(self.dropout_p)
        self.gru = nn.GRU(self.hidden_size, self.hidden_size)
        self.out = nn.Linear(self.hidden_size, self.output_size)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, 1, -1)
        embedded = self.dropout(embedded)

        attn_weights = F.softmax(
            self.attn(torch.cat([embedded[0], hidden[0]], 1)), dim=1)
        attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                 encoder_outputs.unsqueeze(0))

        output = torch.cat([embedded[0], attn_applied[0]], 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [6]:
#不要动它
def evaluate(encoder, decoder, sentence, max_length=100):
    with torch.no_grad():
        input_tensor = sentence
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[S_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(max_length, max_length)

        for di in range(max_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            decoder_attentions[di] = decoder_attention.data
            topv, topi = decoder_output.data.topk(1)
            # print(topi.item())
            if topi.item() == E_token:
                # print('<E>')
                decoded_words.append('<E>')
                break
            else:
                # print(di,num_dic[topi.item()])
                decoded_words.append(num_dic[topi.item()])


            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]