# 음악 생성 인공지능 제작

## Data:  '나비야' 노래를 활용

## 0. 필요 라이브러리 호출

In [None]:
# 기본 연산 라이브러리 numpy 호출
import numpy as np
# 딥러닝 프레임워크 torch 호출
import torch

## 1. 노래 데이터(시계열 데이터)를 호출

- 소리 데이터를 활용하기 위해서는 음계-박자 페어를 컴퓨터가 이해 가능한 숫자로 바꿀 필요가 있음

In [None]:
# 코드 사전 정의
code2idx = {'c4':0, 'd4':1, 'e4':2, 'f4':3, 'g4':4, 'a4':5, 'b4':6,
            'c8':7, 'd8':8, 'e8':9, 'f8':10, 'g8':11, 'a8':12, 'b8':13}

idx2code = {0:'c4', 1:'d4', 2:'e4', 3:'f4', 4:'g4', 5:'a4', 6:'b4',
            7:'c8', 8:'d8', 9:'e8', 10:'f8', 11:'g8', 12:'a8', 13:'b8'}

In [None]:
# 시퀀스 데이터 정의 - '나비야'

seq = ['g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'd8', 'e8', 'f8', 'g8', 'g8', 'g4',
       'g8', 'e8', 'e8', 'e8', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4',
       'd8', 'd8', 'd8', 'd8', 'd8', 'e8', 'f4', 'e8', 'e8', 'e8', 'e8', 'e8', 'f8', 'g4',
       'g8', 'e8', 'e4', 'f8', 'd8', 'd4', 'c8', 'e8', 'g8', 'g8', 'e8', 'e8', 'e4']

## 2. Data Preprocessing

In [None]:
# 데이터셋 생성 함수        
def seq2dataset(seq, window_size):
    dataset = []
    for i in range(len(seq)-window_size):
        subset = seq[i:(i+window_size+1)]
        dataset.append([code2idx[item] for item in subset])
    return np.array(dataset)

In [None]:
# 데이터셋 생성하기
WINDOW_SIZE = 4
dataset = seq2dataset(seq, window_size = WINDOW_SIZE)

In [None]:
print(dataset.shape)
print(dataset)

In [None]:
# 입력(X)과 출력(Y) 변수로 분리하기
X_train = dataset[:,0:4]
y_train = dataset[:,4]

In [None]:
y_train

In [None]:
#코드의 종류 갯수
max_idx_value = 13

# 입력값 정규화 시키기
X_train = X_train / float(max_idx_value)

X_train = torch.from_numpy(X_train).float()
X_train = X_train.view(-1,1,WINDOW_SIZE)

# 라벨값에 대한 one-hot 인코딩 수행
y_train = torch.from_numpy(y_train).long()

In [None]:
print('Training shape:', X_train.shape, y_train.shape)

In [None]:
# Dataset 및 DataLoader 정의

from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_dataset = TensorDataset(X_train, y_train)

train_loader = DataLoader(train_dataset, batch_size = 4)

## 3. Model Building

In [None]:
# 모델 정의

import torch.nn as nn

class Net(nn.Module):
    def __init__(self, input_size, rnn_h_size, fnn_h_size):
        super(Net, self).__init__()
        
        # RNN 함수 정의
        #self.rnn = nn.RNN(input_size= input_size, hidden_size=rnn_h_size, num_layers=1, batch_first=True)
        self.rnn = nn.LSTM(input_size= input_size, hidden_size=rnn_h_size, num_layers=1, batch_first=True)
        #self.rnn = nn.GRU(input_size= input_size, hidden_size=rnn_h_size, num_layers=1, batch_first=True)

        # fully-connected layer 함수 정의
        self.fc1 = nn.Linear(rnn_h_size, fnn_h_size)
        self.fc2 = nn.Linear(fnn_h_size, 14)

        # nonlinearity - ReLU 함수 정의
        self.relu = nn.ReLU()

    def forward(self, x):
        
        # RNN layer 
        x, _ = self.rnn(x)
        #x, (hidden, c) = self.rnn(x)

        # fully-connected layers
        x = self.fc1(x[:,-1])
        x = self.relu(x)
        x = self.fc2(x)

        return x

In [None]:
input_size = X_train.shape[-1]
rnn_h_size = input_size
fnn_h_size = 32

net = Net(input_size = input_size, rnn_h_size = rnn_h_size, fnn_h_size = fnn_h_size )

## 4. Define a Loss Function and Optimizer

In [None]:
# hyperparameter 설정

learning_rate = 1e-3
num_epochs = 2000

In [None]:
# loss function 및 optimizer 설정

import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=learning_rate)

## 5. Train the Network

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
# 여러 epoch을 반복하며 RNN 모델 학습

for epoch in range(num_epochs):
    
    for i, data in enumerate(train_loader):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

    if epoch % 100 == 0:
        pred_list = []
        labels_list = []
        
        with torch.no_grad():
            for data in train_loader:
                sequences, labels = data
                
                # 예측값 출력
                pred = net(sequences)
                pred_list.append(pred)
                labels_list.append(labels)

        pred_list = torch.vstack(pred_list)
        labels_list = torch.hstack(labels_list)
        
        train_acc = accuracy_score(pred_list.argmax(1).numpy(), labels_list)


        print('epoch: %d, loss: %.3f, train_acc: %.3f'%((epoch+1), loss.item(), train_acc))


print('Finished Training')

In [None]:
# 학습된 모델 저장하기

PATH = './butterfly.pth'
torch.save(net.state_dict(), PATH)

In [None]:
# 저장된 모델의 Parameter 불러오기

net = Net(input_size = X_train.shape[-1], rnn_h_size = rnn_h_size, fnn_h_size = fnn_h_size)
net.load_state_dict(torch.load(PATH))

## 5. Test the Network on the Test Data

In [None]:
pred_count = 50 # 최대 예측 개수 정의

In [None]:
# 한 스텝 예측
seq_out = ['g8', 'e8', 'e4', 'f8']
pred_list = []
labels_list = []
# 모델 학습을 종료하였으므로 gradient 계산을 할 필요가 없음
with torch.no_grad():
    for data in train_loader:
        sequences, labels = data
        
        # 예측값 출력
        pred = net(sequences)
        pred_list.append(pred)
        labels_list.append(labels)

pred_list = torch.vstack(pred_list)
labels_list = torch.hstack(labels_list)

In [None]:
for i in range(pred_count):
    idx = pred_list[i].argmax().item() # one-hot 인코딩을 인덱스 값으로 변환
    seq_out.append(idx2code[idx]) # seq_out는 최종 악보이므로 인덱스 값을 코드로 변환하여 저장
    
print("one step prediction : ", seq_out)

one_step_acc = accuracy_score(pred_list.argmax(1).numpy(), labels_list)
print('accuracy:',one_step_acc)

In [None]:
# 곡 전체 예측
seq_in = ['g8', 'e8', 'e4', 'f8']
seq_out = seq_in
seq_in = [code2idx[it] / float(max_idx_value) for it in seq_in] # 코드를 인덱스값으로 변환

In [None]:
pred_list = []

for i in range(pred_count):
    sample_in = torch.tensor(seq_in).view(1,1,4)
    pred_out = net(sample_in)
    pred_list.append(pred_out)
    
    idx = pred_out.argmax(1).item()
    seq_out.append(idx2code[idx])
    seq_in.append(idx / float(max_idx_value))
    seq_in.pop(0)

print("full song prediction : ", seq_out)

pred_list = torch.vstack(pred_list)
one_step_acc = accuracy_score(pred_list.argmax(1).numpy(), labels_list)
print('accuracy:',one_step_acc)