<a href="https://colab.research.google.com/github/jikerbug/jibaek_project_generator/blob/master/Audio_Chord_Estimator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 개발 과정
1. 인공지능 알고리즘 개념 및 원리 학습
2. 코드 데이터 확보
3. 딥러닝 모델 생성
4. 딥러닝 모델 이용해 음악의 시간별 코드를 출력하는 프로그램 제작
5. 시간별 코드를 보여주는 GUI 제작

-

##**인공지능 모델 1차 개발 일지 ( 2020-07-03 ~ 2020-07-12 )**

### 1단계 : 인공지능 알고리즘 개념 및 원리 학습
1. 2020-07-03 ~ 2020-07-09 : [Deep Learning (for Audio) with Python ](https://www.youtube.com/playlist?list=PL-wATfeyAMNrtbkCNsLcpoAyBBRJZVlnf) 동영상 강의를 통해 신경망, CNN, RNN, LSTM 개념 학습과 음악 장르 분류 실습 진행
        오디오 신호에서 특성 값들을 추출해 신경망에 입력하여 음악장르를 예측하고 분류할 수 있다.


### 2단계 : 코드(chord) 데이터 확보
1. 2020-07-11 : [McGill-Billboard Songs and Chord Annotations
Chord Recognition with Chromagram Data](https://www.kaggle.com/jacobvs/mcgill-billboard)에서 데이터 확보 //주의! : code 설명을 보기전 데이터의 구조 파악 필수
        오디오 신호에서 추출한 Chromagram Data라는 24가지 특성 값을 통해 코드를 예측해보자

### 3단계 : 딥러닝 모델 생성 및 학습(code 설명)
1. 2020-07-11 ~ 2020-07-12 : 결과 : 48개의 코드 분류에 대해 약 31%의 정확도
       (Amaj, Amin, Abmaj, Abmin, A#maj, A#min, Bmaj .... G#min, None(코드 없는 audio)) : 총 48개

이상 1차 개발 내용은 아래의 코드에서 설명 : //주의! 설명이 다소 부정확할 수 있음




In [None]:
import csv # 데이터를 csv 파일에서 불러오기 위함
import numpy as np # numpy array로 데이터를 변환하여 선형대수 연산을 하기 위함
from sklearn.model_selection import train_test_split # 모델을 학습시킬 training data와 모델의 예측 성능을 검증할 validation data, test data를 나눠주기 위함
import tensorflow.keras as keras # 인공지능 모델을 학습시키기 위함
import matplotlib.pyplot as plt # 학습된 결과를 그래플 시각화하기 위함
import os.path # 존재하지 않는 파일을 불러와서 오류가 나는 경우를 방지하기 위함 (by os.path.isfile(file_path))



# 1. 코드 데이터와 를 불러오는 함수
def load_data():

    #####목표 : 
    #####1. metadata 폴더에 있는 chromagram 데이터와 해당 chromagram의 시간값 확보 (인공지능 모델의 input : chromagram 데이터. ex : [1.214124, 2.141215, 0.1321, 0, 0, 2.21312, 1.21412 ....] )
    #####2. annotations 폴더에 있는 시간별 chord데이터를, chromagram의 시간값과 비교해 chromagram 데이터와 해당 chord값을 맵핑 (인공지능 모델의 output : chord 데이터. ex : 1(A:min이라는 뜻) )
    #####3. input과 output을 인공지능 모델에 맞게 가공
    
    ##1. 중복해서 사용하는 파일 경로를 저장
    path_metadata = './metadata/metadata'
    csv_metadata = '/bothchroma.csv'
    path_annotations = './annotations/annotations/'
    csv_annotations = '/majmin.lab'
    
    input_metadata_chroma_list =[] # csv 파일들에서 Chromagram Data를 전부 받아올 리스트
    output_annotations_chord_list =[] # lab 파일들(csv와 똑같이 취급 가능)에서 Chord Data를 전부 받아올 리스트
    chord_time_segmentation = [] # 특정 음악의 특정 코드의 시작과 끝 시간을 담을 리스트

    for folder_num in range(3,60): #데이터가 너무 많아서 일단은 0003 폴더 부터 0060 폴더 까지의 데이터로만 진행. 

        ##2. folder_num번째 폴더 파일 경로 저장
        path_variable = ''
        if(folder_num < 10):
            path_variable = '/000' + str(folder_num)
        elif(folder_num < 100):
            path_variable = '/00' + str(folder_num)
        elif(folder_num < 1000):
            path_variable = '/0' + str(folder_num)
        else:
            path_variable = '/' + str(folder_num)
        
        
        ##3. folder_num번째 폴더 파일 경로 저장()
        complete_path_metadata = path_metadata + path_variable + csv_metadata # chromagram data
        complete_path_annotations = path_annotations + path_variable + csv_annotations # chord data
        
        ##4. 파일이 존재하는지 체크
        if(os.path.isfile(complete_path_metadata)):
            pass
        else:
            continue #존재하지 않으면 아래에서 파일 open하는 과정 생략하고 위로 올라가서 for문 반복
        
        ##5. Chromagram Data를 불러와서 저장
        with open(complete_path_metadata, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for segmented_metadata in reader:
                input_metadata_chroma_list.append(segmented_metadata)

       ##6. Chord Data를 불러와서 저장
        with open(complete_path_annotations, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            flag = 0
            for segmented_chord_data in reader:
                if(segmented_chord_data != []):
                    output_annotations_chord_list.append(segmented_chord_data[0].split('\t')[-1])
                    segmentation =[]
                    segmentation.append(segmented_chord_data[0].split('\t')[0])
                    segmentation.append(segmented_chord_data[0].split('\t')[1])

                    chord_time_segmentation.append(segmentation)
                else:
                    break
                #
                # flag +=1

    ##7. Chord Data를 0~41까지의 int형 변수로 저장 (7(코드) * 6(코드의 variation) = 42) + none(코드 없는 값) : 48로 설정
    #-> 48설정 이유 : 마지막 코드인 G#:min의 int변수 바로 다음인 42로 하기 보다는 결과값의 차이가 좀더 현저하게 나는 48로 설정해야 chord를 none으로 오판하는 일이 적을거라 생각)
    retyped_chord_to_int_list = [] #int로 변환된 코드들을 저장
    
    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type_table = [type1, type2, type3, type4, type5, type6]

    for chord in output_annotations_chord_list:
        escape_flag = False
        for chord_num, root_chord in enumerate(root_chord_list):
            if (escape_flag):
                break
            for type_num, type in enumerate(type_table):
                if (chord == (root_chord + type)):
                    retyped_chord_to_int_list.append(chord_num * 4 + type_num)
                    escape_flag = True
                    break
                elif (chord == 'N' or chord == 'X'):
                    retyped_chord_to_int_list.append(48)
                    escape_flag = True
                    break


    ##8. input_metadata_chroma_list의 시간 데이터를 아래의 리스트로 옮기는 데이터 분류작업
    chroma_time_index_list = []
    for chroma in input_metadata_chroma_list:
        chroma_time_index_list.append(chroma[1])
        del chroma[0]
        del chroma[0]
        #len(chroma) == 24


    ##9. input_metadata_chroma_list의 string으로 되어있는 숫자값을 float으로 바꿔서 아래의 리스트에 넣어주기 (인공지능 모델에는 float 타입의 chromagram 데이터가 입력되어야 함!)
    retyped_chroma_string_to_float = []
    for chroma in input_metadata_chroma_list:
        retyped_list = []
        for value in chroma:
            temp_list = []
            temp_list.append(float(value))
            retyped_list.append(temp_list)
        retyped_chroma_string_to_float.append(retyped_list)


    # print(retyped_chroma_string_to_float[0])
    # print(chroma_time_index_list[0])
    # print(chord_time_segmentation[0])
    # print(retyped_chord_to_int_list[0])


    ##10. chromagram의 시간값(from chroma_time_index_list)이 위치하는 시간대의 chord정보(from chord_time_segmentation(시간대), retyped_chord_to_int_list(chord)))를 아래의 리스트에 저장
    chroma_time_related_chord_to_int_list = []
    for chroma_time in chroma_time_index_list:
        for time, chord in zip(chord_time_segmentation, retyped_chord_to_int_list):
            start = time[0]
            end = time[1]
            #print(f'chroma_time:{chroma_time} start:{start} end:{end}')
            if(float(chroma_time) >= float(start) and float(chroma_time) < float(end)):
                chroma_time_related_chord_to_int_list.append(chord)
                break


    # 데이터 구조 파악을 위한 출력함수
    # print("here")
    # print(len(chroma_time_related_chord_to_int_list))
    # print(len(retyped_chroma_string_to_float))
    # print(retyped_chroma_string_to_float[0])
    # print(chroma_time_related_chord_to_int_list[0])
    # for i in output_annotations_chord_list:
    #     print(i)

    # none 타입을 학습시키지 않기를 원하는 경우 사용할 코드
    #n_deleted_output_annotations_chord_list = output_annotations_chord_list
    # for chroma, chord in zip(input_metadata_chroma_list, output_annotations_chord_list):
    #     print("test" + chord)
        
    #     if chord == 'N':
            
    #         n_deleted_input_metadata_chroma_list.remove(chroma)
    #         n_deleted_output_annotations_chord_list.remove(chord)
    # print(len(retyped_chroma))
    # print(len(input_metadata_chroma_list))

    ##11. 인공지능 모델 학습을 위해 선형대수 연산이 가능하고 속도가 빠른 numpy array로 변환
    X = np.array(retyped_chroma_string_to_float)
    y = np.array(chroma_time_related_chord_to_int_list)


    return X, y




# 2. 인공지능 모델 학습결과를 시각화하는 함수
def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


# 3. 인공지능 모델에 입력할 데이터셋을 test, training, validation으로 구분하는 함수
def prepare_datasets(test_size, validation_size):


    # load data
    X, y= load_data()
    print("Look at here")
    print(len(X))
    print(len(y))

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # cnn과 달리 rnn에서는 이러한 3rd 차원이 필요 없다.
    # X_train = X_train[..., np.newaxis] # 3d array -> (num_samples = 130, 13, 1)
    # X_validation = X_validation[..., np.newaxis] # ... : 기존의 것들
    # X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


# 4. 인공지능 모델을 구현하는 함수
def build_model(input_shape):

    # create RNN model
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(256, input_shape=input_shape, return_sequences=True)) #256개의 레이어나 64개의 레이어나 정확도는 약 31%로 똑같았다...
    # return_sequences : second lstm에서 이 시퀀스를 사용하고 싶기 떄문에 true로 한다.
    model.add(keras.layers.LSTM(256))

    # dense layer
    model.add(keras.layers.Dense(256, activation='relu'))
    model.add(keras.layers.Dropout(0.3))


    # output layer
    model.add(keras.layers.Dense(49, activation='softmax'))

    return model


# 5. 특정 데이터를 입력하여 결과값을 예측하는 함수
def predict(model, X, y):

    X = X[np.newaxis, ...]

    # prediction = [[0.1, 0.2, ...]] # softmax의 결과물
    prediction = model.predict(X) # X -> (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1) # [4]
    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))

# 6. 메인 실행함수
if __name__ == "__main__":
    pass
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    # 데이터 구조 파악을 위한 print문
    # print(len(y_train))
    # print(y_train[0])
    # print(y_test[0])
    # print(y_validation[0])
    # print(len(X_train))
    # print(len(X_train[0]))
    # print(X_train[0])
    # print(X_test[0])
    # print(X_validation[0])
   
    # create network
    print("heoollo")
    print(X_train.shape[1],print(X_train.shape[1], ))
    input_shape = (X_train.shape[1], X_train.shape[2]) # (130, 13) (number of slices extract mfccs, mfccs)
    model = build_model(input_shape)

    # compile model
    optimizer = keras.optimizers.Adam(learning_rate=0.0001)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    #plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]


    predict(model, X, y)



##인공지능 모델 2차 개발일지 (2020-07-17 ~ 2020-07-18)
* 좀더 다양한 음악의 데이터를 넣어보았더니 예측 성능이 50%정도가 나왔다. 그러다가 데이터 범위를 좀더 다양하게 잡아보았는데, 결과가 다시 30%정도로 좋지 않았다. 더 큰 문제는 학습이 진행됨에 따라 정확도가 개선되는 현상이 관측되지 않는다는 것이었다. 아무래도 데이터 입력에 문제가 있어서 인공지능 모델이 제대로 데이터를 학습하지 못하는 것 같았다. 따라서 chromagram data를 묶음으로 만들어줘서 다시 입력해주기로 했다. 다시 살펴보니 지금의 data의 shape는 (24,1)으로 잘못된 형태를 띄고 있었다. 현재 나의 코드는 유튜브 강의 코드를 참고하고 있는데 그곳에서는 (130,13)의 형태를 사용하고 있었다. 그러나 130은 오디오 트따라서 나의 data의 shape를 (130, 24)로 맞춰주는 작업을 진행하기로 했다. 

* 다시한번 데이터를 살펴보니 코드는 보통 3~4초 사이동안 지속이 되었다. 그러나 하나의 코드를 학습하는 데에 주어진 데이터는 0.05초정도 뿐이었다. 따라서, 나는 코드의 지속시간 평균을 구해본 뒤에 해당 평균 정도의 시간에 맞춰서 데이터를 재구성하여 (50,24)정도의 수준으로 데이터 셋을 구성해보기로 하였다.

In [None]:
import csv
import sys
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import os.path
import pandas as pd


def load_data():
    input_metadata_chroma_list = []
    output_annotations_chord_list = []
    chord_time_segmentation = []

    path_metadata = './metadata/metadata'
    csv_metadata = '/bothchroma.csv'
    path_annotations = './annotations/annotations/'
    csv_annotations = '/majmin.lab'

    for folder_num in range(3, 50):
        if (folder_num < 10):
            path_variable = '/000' + str(folder_num)
        elif (folder_num < 100):
            path_variable = '/00' + str(folder_num)
        elif (folder_num < 1000):
            path_variable = '/0' + str(folder_num)
        else:
            path_variable = '/' + str(folder_num)

        complete_path_metadata = path_metadata + path_variable + csv_metadata
        complete_path_annotations = path_annotations + path_variable + csv_annotations

        # 파일이 존재하는지 체크
        if (os.path.isfile(complete_path_metadata)):
            pass
        else:
            continue

        with open(complete_path_metadata, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            flag = 0
            for _ in range(20):
                next(reader)

            for segmented_metadata in reader:


                input_metadata_chroma_list.append(segmented_metadata)
                # 여러 데이터로 시도해본 결과 개선이 이루어지지 않았다. 이것을 보니 데이터의 입력을 수정해야겠다.
                # if flag == 50:  # 2차 수정으로 인해 더 많은 데이터를 확보한 결과 정확도가 63까지다시금 더 많이 올라갔다
                #     break


        with open(complete_path_annotations, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            flag = 0
            for segmented_chord_data in reader:

                if (segmented_chord_data != []):
                    output_annotations_chord_list.append(segmented_chord_data[0].split('\t')[-1])
                    segmentation = []
                    segmentation.append(segmented_chord_data[0].split('\t')[0])
                    segmentation.append(segmented_chord_data[0].split('\t')[1])

                    chord_time_segmentation.append(segmentation)
                else:
                    break
                #
                # flag +=1

    retyped_chord_to_int_list = []
    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type_table = [type1, type2, type3, type4, type5, type6]

    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    for chord in output_annotations_chord_list:
        escape_flag = False
        for chord_num, root_chord in enumerate(root_chord_list):
            if (escape_flag):
                break
            for type_num, type in enumerate(type_table):
                if (chord == (root_chord + type)):
                    retyped_chord_to_int_list.append(chord_num * 6 + type_num)
                    escape_flag = True
                    break
                elif (chord == 'N' or chord == 'X'):
                    retyped_chord_to_int_list.append(42)
                    escape_flag = True
                    break

    # 시간 데이터 데이터 삭제
    chroma_time_index_list = []
    for chroma in input_metadata_chroma_list:
        chroma_time_index_list.append(chroma[1])
        del chroma[0]
        del chroma[0]
        # len(chroma) == 24

    # string값을 float으로 바꿔서 , (1,24)로 넣어주기
    retyped_chroma_string_to_float = []
    chroma_for_data_analysis = []
    for chroma in input_metadata_chroma_list:
        retyped_list = []
        for value in chroma:
            retyped_list.append(float(value))
        dump_list = []
        dump_list.append(retyped_list)
        chroma_for_data_analysis.append(retyped_list)
        retyped_chroma_string_to_float.append(retyped_list) # 단위시간만으로 하고 싶다면 dump_list로 바꾸자

    chroma_time_related_chord_to_int_list = []

    for chroma_time in chroma_time_index_list:
        for time, chord in zip(chord_time_segmentation, retyped_chord_to_int_list):
            start = time[0]
            end = time[1]
            # print(f'chroma_time:{chroma_time} start:{start} end:{end}')
            if (float(chroma_time) >= float(start) and float(chroma_time) < float(end)):
                chroma_time_related_chord_to_int_list.append(chord)
                break

    chord_data_changing_idx = []
    # idx = 0
    for chroma_chord_num in range(len(chroma_time_related_chord_to_int_list) - 1):
        if chroma_time_related_chord_to_int_list[chroma_chord_num] != chroma_time_related_chord_to_int_list[
            chroma_chord_num + 1]:
            chord_data_changing_idx.append(chroma_chord_num + 1)

    X = np.array(retyped_chroma_string_to_float)
    x_analysis = np.array(chroma_for_data_analysis)
    y = np.array(chroma_time_related_chord_to_int_list)

    df_x = pd.DataFrame(data=x_analysis, index=None, columns=None)
    df_y = pd.DataFrame(data=y, index=None, columns=['chord'])
    print("this is x data")
    print(df_x.iloc)
    print("this is y data")
    print(df_y.iloc)

    # 각각 코드가 바뀌는 시작점기록
    idx = np.array(chord_data_changing_idx)
    df_test = pd.DataFrame(data=idx, index=None, columns=None)
    print(df_test)

    # 시작점의 차이의 정보들 기록
    interval_info = []
    for i in range(len(chord_data_changing_idx) - 1):
        interval_info.append(chord_data_changing_idx[i + 1] - chord_data_changing_idx[i])

    interval = np.array(chord_data_changing_idx)
    df_interval = pd.DataFrame(data=interval_info, index=None, columns=None)
    print(df_interval)
    print(df_interval.describe())

    chroma_bundle_dataset = []
    chord_bundle_dataset = []

    for i in range(len(chord_data_changing_idx) - 1):
        chord_bundle_dataset.append(chroma_time_related_chord_to_int_list[chord_data_changing_idx[i]])

        end_flag = 0
        bundle_list = []
        for j in range(chord_data_changing_idx[i], chord_data_changing_idx[i + 1]):
            bundle_list.append(retyped_chroma_string_to_float[j])
            end_flag += 1
            if end_flag == 50:
                break
        for k in range(50 - end_flag):
            bundle_list.append(retyped_chroma_string_to_float[chord_data_changing_idx[i]])


        chroma_bundle_dataset.append(bundle_list)

    # with open('chroma_test.csv', 'w', encoding='utf-8') as f:
    #     writer = csv.writer(f)
    #     for i in chroma_bundle_dataset:
    #         writer.writerow(i)
    # with open('chord_test.csv', 'w', encoding='utf-8') as f:
    #     writer = csv.writer(f)
    #     for i in chord_bundle_dataset:
    #         writer.writerow(i)

    X_bundle = np.array(chroma_bundle_dataset)
    y_bundle = np.array(chord_bundle_dataset)
    # print(X_bundle.shape[0])
    # print(X_bundle.shape[1])
    # print(X_bundle.shape[2])
    # print(y_bundle.shape)
    # print(chroma_bundle_dataset[3])
    # print(chord_bundle_dataset[3])

    for i in range(10):
        print(retyped_chroma_string_to_float[200 * i][0])
        print(chroma_time_related_chord_to_int_list[200*i])
    return X_bundle, y_bundle


def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size):
    # load data
    X, y = load_data()
    print("How many dataset : ")
    print(len(X))
    print(len(y))

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # cnn과 달리 rnn에서는 이러한 3rd 차원이 필요 없다.
    # X_train = X_train[..., np.newaxis] # 3d array -> (num_samples = 130, 13, 1)
    # X_validation = X_validation[..., np.newaxis] # ... : 기존의 것들
    # X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # create RNN model
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
    # return_sequences : second lstm에서 이 시퀀스를 사용하고 싶기 떄문에 true로 한다.
    # model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True)) #레이어를 하나 더 추가하는 것은 큰 효용이 없었다.
    model.add(keras.layers.LSTM(64))

    # dense layer
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(43, activation='softmax'))

    return model


def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = [[0.1, 0.2, ...]] # softmax의 결과물
    prediction = model.predict(X)  # X -> (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1)  # [4]
    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))


if __name__ == "__main__":
    pass
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    input_shape = (
    X_train.shape[1], X_train.shape[2])  # (130, 13) (number of slices extract mfccs, mfccs) // 여기의 경우에서는 지금은

    print(input_shape)
    model = build_model(input_shape)

    # compile model
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    # plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]

    predict(model, X, y)




In [None]:
###################################### 딕셔너리로 데이터 제대로 정돈하여 약 60%의 예측률 보임! input : (1,24)

import csv
import sys
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import os.path
import pandas as pd

#Chromagram (# time steps, # of coefficient)

DATASET_PATH = 'data'
JSON_PATH = 'data.json'


def chord_to_int(chord_name):
    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type_table = [type1, type2, type3, type4, type5, type6]



    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    if chord_name == 'N' or chord_name == 'X':
        return 42
    for chord_num, root_chord in enumerate(root_chord_list):
        for type_num, type in enumerate(type_table):
            if root_chord+type == chord_name:
                return chord_num*6 + type_num




def prepare_dataset(dataset_path, json_path):

    data = {
        "labels": [],
        "Chromagram_bundle": [],
    }

    chroma_and_start_time = {
        #키값은 각각의 폴더명
    }
    chord_and_interval_time = {
        # 키값은 각각의 폴더명
    }

    # metadata is for chromagram input
    # annotation is for chord output
    path_metadata = './metadata/metadata'
    file_metadata = '/bothchroma.csv'
    path_annotations = './annotations/annotations/'
    file_annotations = '/majmin.lab'

    for folder_num in range(3, 100):
        if (folder_num < 10):
            common_path_variable = '/000' + str(folder_num)
        elif (folder_num < 100):
            common_path_variable = '/00' + str(folder_num)
        elif (folder_num < 1000):
            common_path_variable = '/0' + str(folder_num)
        else:
            common_path_variable = '/' + str(folder_num)

        complete_path_metadata = path_metadata + common_path_variable + file_metadata
        complete_path_annotations = path_annotations + common_path_variable + file_annotations

        # 파일이 존재하는지 체크
        if (os.path.isfile(complete_path_metadata)):
            pass
        else:
            continue

        chroma_and_start_time[str(folder_num)] = []
        chord_and_interval_time[str(folder_num)] = []

        with open(complete_path_metadata, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for _ in range(20):
                next(reader)

            for starting_time_and_chroma in reader:
                del starting_time_and_chroma[0]
                chroma_and_start_time[str(folder_num)].append(list(map(float, starting_time_and_chroma))) #string을 float으로 저장


        with open(complete_path_annotations, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for interval_time_and_chord in reader:
                if (interval_time_and_chord != []):
                    chord_data_list = interval_time_and_chord[0].split('\t')
                    chord_data_list[2] = chord_to_int(chord_data_list[2]) #코드이름
                    chord_data_list[1] = float(chord_data_list[1])
                    chord_data_list[0] = float(chord_data_list[0])
                    chord_and_interval_time[str(folder_num)].append(chord_data_list)

    print(len(chord_and_interval_time))
    print(len(chroma_and_start_time))



    for key, value in chroma_and_start_time.items():
        for chroma_list in value:
            for chord_list in chord_and_interval_time[key]:
                if chroma_list[0] >= chord_list[0] and chroma_list[0] < chord_list[1]:
                    chroma_list.append(chord_list[2])
                    break

    for key, value in chroma_and_start_time.items():
        for chroma_list in value:
            del chroma_list[0]
            data["labels"].append(chroma_list[-1])
            del chroma_list[-1]
            dump_list = []
            dump_list.append(chroma_list)
            data["Chromagram_bundle"].append(dump_list)

    print(len(data["labels"]))
    print(data["Chromagram_bundle"][0])
    print(len(data["Chromagram_bundle"]))

    X = np.array(data["Chromagram_bundle"])
    y = np.array(data["labels"])


    # for key, value in chroma_and_start_time.items():
    #     print(f'key = {key} value = {value}')
    #
    #
    # for key, value in chord_and_interval_time.items():
    #     print(f'key = {key} value = {value}')


    # chord_changing_index = {}
    #
    # for key,value in chroma_and_start_time.items():
    #     chord_changing_index[key] = []
    #     for chord_list_num in range(len(value)-1):
    #         if value[chord_list_num][-1] != value[chord_list_num+1][-1]:
    #             chord_changing_index[key].append(chord_list_num+1)
    #
    # print(chord_changing_index)
    #
    #
    # chroma_bundle_dataset = []
    # chord_bundle_dataset = []
    #
    #
    # for key, value in chroma_and_start_time.items():
    #     pass
    # for i in range(len(chord_data_changing_idx) - 1):
    #     chord_bundle_dataset.append(chroma_time_related_chord_to_int_list[chord_data_changing_idx[i]])
    #
    #     end_flag = 0
    #     bundle_list = []
    #     for j in range(chord_data_changing_idx[i], chord_data_changing_idx[i + 1]):
    #         bundle_list.append(retyped_chroma_string_to_float[j])
    #         end_flag += 1
    #         if end_flag == 50:
    #             break
    #     for k in range(50 - end_flag):
    #         bundle_list.append(retyped_chroma_string_to_float[chord_data_changing_idx[i]])
    #
    #     chroma_bundle_dataset.append(bundle_list)
    #
    # return X_bundle, y_bundle
    #
    #
    # data["labels"].append(i - 1)  # first : root folder, second : first sub folder
    # data["Chromagram"].append(MFCCs.T.tolist())  # json으로 저장할때 리스트여야함
    #
    print(X.shape)
    print(y.shape)
    print(X[0].shape)
    return X,y




def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size):
    # load data
    X, y = prepare_dataset('s', 's')
    print("How many dataset : ")
    print(len(X))
    print(len(y))

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # cnn과 달리 rnn에서는 이러한 3rd 차원이 필요 없다.
    # X_train = X_train[..., np.newaxis] # 3d array -> (num_samples = 130, 13, 1)
    # X_validation = X_validation[..., np.newaxis] # ... : 기존의 것들
    # X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # create RNN model
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
    # return_sequences : second lstm에서 이 시퀀스를 사용하고 싶기 떄문에 true로 한다.
    # model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True)) #레이어를 하나 더 추가하는 것은 큰 효용이 없었다.
    model.add(keras.layers.LSTM(64))

    # dense layer
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(43, activation='softmax'))

    return model


def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = [[0.1, 0.2, ...]] # softmax의 결과물
    prediction = model.predict(X)  # X -> (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1)  # [4]
    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))


if __name__ == "__main__":
    pass
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    input_shape = (X_train.shape[1], X_train.shape[2])  # (130, 13) (number of slices extract mfccs, mfccs) // 여기의 경우에서는 지금은

    print(input_shape)
    model = build_model(input_shape)

    # compile model
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    # plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]

    predict(model, X, y)


In [None]:
#######코드 재배열 & input(30,24) 로 성공!!!!!! 80%의 성능!!!!!

import csv
import sys
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import os.path
import pandas as pd

#Chromagram (# time steps, # of coefficient)

DATASET_PATH = 'data'
JSON_PATH = 'data.json'


def chord_to_int(chord_name):
    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type_table = [type1, type2, type3, type4, type5, type6]



    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    if chord_name == 'N' or chord_name == 'X':
        return 42
    for chord_num, root_chord in enumerate(root_chord_list):
        for type_num, type in enumerate(type_table):
            if root_chord+type == chord_name:
                return chord_num*6 + type_num




def prepare_dataset(dataset_path, json_path):

    data = {
        "labels": [],
        "Chromagram_bundles": [],
    }

    chroma_and_start_time = {
        #키값은 각각의 폴더명
    }
    chord_and_interval_time = {
        # 키값은 각각의 폴더명
    }

    # metadata is for chromagram input
    # annotation is for chord output
    path_metadata = './metadata/metadata'
    file_metadata = '/bothchroma.csv'
    path_annotations = './annotations/annotations/'
    file_annotations = '/majmin.lab'

    for folder_num in range(3, 200):
        if (folder_num < 10):
            common_path_variable = '/000' + str(folder_num)
        elif (folder_num < 100):
            common_path_variable = '/00' + str(folder_num)
        elif (folder_num < 1000):
            common_path_variable = '/0' + str(folder_num)
        else:
            common_path_variable = '/' + str(folder_num)

        complete_path_metadata = path_metadata + common_path_variable + file_metadata
        complete_path_annotations = path_annotations + common_path_variable + file_annotations

        # 파일이 존재하는지 체크
        if (os.path.isfile(complete_path_metadata)):
            pass
        else:
            continue

        chroma_and_start_time[str(folder_num)] = []
        chord_and_interval_time[str(folder_num)] = []

        with open(complete_path_metadata, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for _ in range(20):
                next(reader)

            for starting_time_and_chroma in reader:
                del starting_time_and_chroma[0]
                chroma_and_start_time[str(folder_num)].append(list(map(float, starting_time_and_chroma))) #string을 float으로 저장


        with open(complete_path_annotations, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for interval_time_and_chord in reader:
                if (interval_time_and_chord != []):
                    chord_data_list = interval_time_and_chord[0].split('\t')
                    chord_data_list[2] = chord_to_int(chord_data_list[2]) #코드이름
                    chord_data_list[1] = float(chord_data_list[1])
                    chord_data_list[0] = float(chord_data_list[0])
                    chord_and_interval_time[str(folder_num)].append(chord_data_list)

    print(len(chord_and_interval_time))
    print(len(chroma_and_start_time))






    for key, value in chroma_and_start_time.items():

        for chord_list in chord_and_interval_time[key]:
            chroma_bundle = []
            num_flag = 0
            for chroma_list in value:
                #print(f'chroma_list[0] : {chroma_list[0]}')
                #print(f'chord_list[0] : {chord_list[0]}')
                #print(f'chord_list[1] : {chord_list[1]}')
                if num_flag >= 30:
                    break
                if chroma_list[0] >= chord_list[1]:
                    break
                if chroma_list[0] >= chord_list[0] and chroma_list[0] < chord_list[1]:
                    del chroma_list[0]
                    chroma_bundle.append(chroma_list)
                    #print(f'append {chroma_list}')
                    value.remove(chroma_list)
                    num_flag += 1

            if chroma_bundle != []:
                for i in range(30-num_flag):
                    chroma_bundle.append(chroma_bundle[-1])
                if len(chroma_bundle) != 30:
                    print("문제 발생!!!")
                data["Chromagram_bundles"].append(chroma_bundle)
                data["labels"].append(chord_list[2])



    print(len(data["labels"]))
    print(data["Chromagram_bundles"][0])
    print(len(data["Chromagram_bundles"]))

    print(len(data["Chromagram_bundles"]))
    print(len(data["Chromagram_bundles"][0]))
    print(len(data["Chromagram_bundles"][0][0]))

    x_list = data["Chromagram_bundles"]
    X = np.array(x_list)
    y = np.array(data["labels"])




    print(X.shape)
    # numpy array의 shape가 분명 (N, 30, 24)로 나와야 하는데 (N, )로 나오는 경우때문에 조금 헤맸다.
    # 이 경우는 데이터의 크기가 30,24를 벗어나 있는 경우가 존재한다는 뜻이므로 데이터를 다시 재구성해주어야 한다.
    print(y.shape)
    return X,y




def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()


def prepare_datasets(test_size, validation_size):
    # load data
    X, y = prepare_dataset('s', 's')
    print("How many dataset : ")
    print(len(X))
    print(len(y))

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # cnn과 달리 rnn에서는 이러한 3rd 차원이 필요 없다.
    # X_train = X_train[..., np.newaxis] # 3d array -> (num_samples = 130, 13, 1)
    # X_validation = X_validation[..., np.newaxis] # ... : 기존의 것들
    # X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # create RNN model
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True))
    # return_sequences : second lstm에서 이 시퀀스를 사용하고 싶기 떄문에 true로 한다.
    # model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True)) #레이어를 하나 더 추가하는 것은 큰 효용이 없었다.
    model.add(keras.layers.LSTM(64))

    # dense layer
    model.add(keras.layers.Dense(64, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(43, activation='softmax'))

    return model


def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = [[0.1, 0.2, ...]] # softmax의 결과물
    prediction = model.predict(X)  # X -> (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1)  # [4]
    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))


if __name__ == "__main__":
    pass
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    input_shape = (X_train.shape[1], X_train.shape[2])  # (130, 13) (number of slices extract mfccs, mfccs) // 여기의 경우에서는 지금은

    print(input_shape)
    model = build_model(input_shape)

    # compile model
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=30)

    # plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]

    predict(model, X, y)



#인공지능 3차 개발일지(2020-07-23 ~ 2020-07-24)

In [None]:
### prepare_dataset



import csv
import numpy as np
import os.path
import json

DATASET_PATH = 'data'
JSON_PATH = 'data.json'


def chord_to_int(chord_name):
    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type7 = ':maj7'
    type8 = ':min7'
    type9 = 'b:maj7'
    type10 = 'b:min7'
    type11 = '#:maj7'
    type12 = '#:min7'
    type13 = ':7'
    type14 = 'b:7'
    type15 = '#:7'
    type_table = [type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11, type12, type13, type14, type15]

    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']

    if chord_name == 'N' or chord_name == 'X':
        return 105
    for chord_num, root_chord in enumerate(root_chord_list):
        for type_num, type in enumerate(type_table):
            if root_chord + type == chord_name:
                return chord_num * 15 + type_num


def prepare_dataset(dataset_path, json_path):
    data = {
        "labels": [],
        "Chromagram_bundles": [],
    }

    chroma_and_start_time = {
        # 키값은 각각의 폴더명
    }
    chord_and_interval_time = {
        # 키값은 각각의 폴더명
    }

    # metadata is for chromagram input
    # annotation is for chord output
    path_metadata = './data/metadata/metadata'
    file_metadata = '/bothchroma.csv'
    path_annotations = './data/annotations/annotations/'
    file_annotations = '/majmin.lab'

    for folder_num in range(3, 1301):
        if (folder_num < 10):
            common_path_variable = '/000' + str(folder_num)
        elif (folder_num < 100):
            common_path_variable = '/00' + str(folder_num)
        elif (folder_num < 1000):
            common_path_variable = '/0' + str(folder_num)
        else:
            common_path_variable = '/' + str(folder_num)

        complete_path_metadata = path_metadata + common_path_variable + file_metadata
        complete_path_annotations = path_annotations + common_path_variable + file_annotations

        # 파일이 존재하는지 체크
        if (os.path.isfile(complete_path_metadata)):
            pass
        else:
            continue

        chroma_and_start_time[str(folder_num)] = []
        chord_and_interval_time[str(folder_num)] = []

        with open(complete_path_metadata, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for _ in range(20):
                next(reader)

            for starting_time_and_chroma in reader:
                del starting_time_and_chroma[0]
                chroma_and_start_time[str(folder_num)].append(
                    list(map(float, starting_time_and_chroma)))  # string을 float으로 저장

        with open(complete_path_annotations, 'r', encoding='utf-8') as f:
            reader = csv.reader(f)
            for interval_time_and_chord in reader:
                if (interval_time_and_chord != []):
                    chord_data_list = interval_time_and_chord[0].split('\t')
                    chord_data_list[2] = chord_to_int(chord_data_list[2])  # 코드이름
                    chord_data_list[1] = float(chord_data_list[1])
                    chord_data_list[0] = float(chord_data_list[0])
                    chord_and_interval_time[str(folder_num)].append(chord_data_list)

    print(len(chord_and_interval_time))
    print(len(chroma_and_start_time))

    for key, value in chroma_and_start_time.items():

        for chord_list in chord_and_interval_time[key]:
            chroma_bundle = []
            num_flag = 0
            for chroma_list in value:
                # print(f'chroma_list[0] : {chroma_list[0]}')
                # print(f'chord_list[0] : {chord_list[0]}')
                # print(f'chord_list[1] : {chord_list[1]}')
                if num_flag >= 30:
                    break
                if chroma_list[0] >= chord_list[1]:
                    break
                if chroma_list[0] >= chord_list[0] and chroma_list[0] < chord_list[1]:
                    del chroma_list[0]
                    chroma_bundle.append(chroma_list)
                    # print(f'append {chroma_list}')
                    value.remove(chroma_list)
                    num_flag += 1

            if chroma_bundle != []:
                for i in range(30 - num_flag):
                    chroma_bundle.append(chroma_bundle[-1])
                if len(chroma_bundle) != 30:
                    print("문제 발생!!!")
                data["Chromagram_bundles"].append(chroma_bundle)
                data["labels"].append(chord_list[2])

    print(len(data["labels"]))
    print(data["Chromagram_bundles"][0])
    print(len(data["Chromagram_bundles"]))

    print(len(data["Chromagram_bundles"]))
    print(len(data["Chromagram_bundles"][0]))
    print(len(data["Chromagram_bundles"][0][0]))

    x_list = data["Chromagram_bundles"]
    X = np.array(x_list)
    y = np.array(data["labels"])

    print(X.shape)
    # numpy array의 shape가 분명 (N, 30, 24)로 나와야 하는데 (N, )로 나오는 경우때문에 조금 헤맸다.
    # 이 경우는 데이터의 크기가 30,24를 벗어나 있는 경우가 존재한다는 뜻이므로 데이터를 다시 재구성해주어야 한다.
    print(y.shape)

    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)


if __name__ == "__main__":
    prepare_dataset(DATASET_PATH, JSON_PATH)


In [None]:
#### train model


import json
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt



#Chromagram (# time steps, # of coefficient)


JSON_PATH = 'data.json'
SAVED_MODEL_PATH = "model_256.h5"


def load_dataset(data_path):
    with open(data_path, 'r') as fp:
        data = json.load(fp)

    # extract inputs and targets

    X_array = np.array(data["Chromagram_bundles"])
    y_array = np.array(data["labels"])

    return X_array,y_array


def plot_history(history):
    """Plots accuracy/loss for training/validation set as a function of the epochs
        :param history: Training history of model
        :return:
    """

    fig, axs = plt.subplots(2)

    # create accuracy sublpot
    axs[0].plot(history.history["accuracy"], label="train accuracy")
    axs[0].plot(history.history["val_accuracy"], label="test accuracy")
    axs[0].set_ylabel("Accuracy")
    axs[0].legend(loc="lower right")
    axs[0].set_title("Accuracy eval")

    # create error sublpot
    axs[1].plot(history.history["loss"], label="train error")
    axs[1].plot(history.history["val_loss"], label="test error")
    axs[1].set_ylabel("Error")
    axs[1].set_xlabel("Epoch")
    axs[1].legend(loc="upper right")
    axs[1].set_title("Error eval")

    plt.show()



def prepare_datasets(test_size, validation_size):
    # load data
    X, y = load_dataset(JSON_PATH)
    print("How many dataset : ")
    print(len(X))
    print(len(y))

    # create train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

    # create train/validation split
    X_train, X_validation, y_train, y_validation = train_test_split(X_train, y_train, test_size=validation_size)

    # cnn과 달리 rnn에서는 이러한 3rd 차원이 필요 없다.
    # X_train = X_train[..., np.newaxis] # 3d array -> (num_samples = 130, 13, 1)
    # X_validation = X_validation[..., np.newaxis] # ... : 기존의 것들
    # X_test = X_test[..., np.newaxis]

    return X_train, X_validation, X_test, y_train, y_validation, y_test


def build_model(input_shape):
    # create RNN model
    model = keras.Sequential()

    # 2 LSTM layers
    model.add(keras.layers.LSTM(128, input_shape=input_shape, return_sequences=True))
    # return_sequences : second lstm에서 이 시퀀스를 사용하고 싶기 떄문에 true로 한다.
    # model.add(keras.layers.LSTM(64, input_shape=input_shape, return_sequences=True)) #레이어를 하나 더 추가하는 것은 큰 효용이 없었다.
    model.add(keras.layers.LSTM(128))

    # dense layer
    model.add(keras.layers.Dense(128, activation='relu'))
    model.add(keras.layers.Dropout(0.3))

    # output layer
    model.add(keras.layers.Dense(106, activation='softmax'))

    return model


def predict(model, X, y):
    X = X[np.newaxis, ...]

    # prediction = [[0.1, 0.2, ...]] # softmax의 결과물
    prediction = model.predict(X)  # X -> (1, 130, 13, 1)

    # extract index with max value
    predicted_index = np.argmax(prediction, axis=1)  # [4]
    print("Expected index: {}, Predicted index: {}".format(y, predicted_index))


if __name__ == "__main__":
    pass
    # create train, validation and test sets
    X_train, X_validation, X_test, y_train, y_validation, y_test = prepare_datasets(0.25, 0.2)

    input_shape = (X_train.shape[1], X_train.shape[2])  # (130, 13) (number of slices extract mfccs, mfccs) // 여기의 경우에서는 지금은

    print(input_shape)
    model = build_model(input_shape)

    # compile model
    optimizer = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=optimizer,
                  loss="sparse_categorical_crossentropy",
                  metrics=['accuracy'])

    model.summary()

    # train model
    history = model.fit(X_train, y_train, validation_data=(X_validation, y_validation), batch_size=32, epochs=10)

    # plot accuracy/error for training and validation
    plot_history(history)

    # evaluate model on test set
    test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
    print("Accuracy on test set is: {}".format(test_accuracy))

    # make prediction on a sample
    X = X_test[100]
    y = y_test[100]

    predict(model, X, y)

    model.save(SAVED_MODEL_PATH)

In [None]:
#### audio classification sevice class

import tensorflow.keras as keras
import numpy as np
import librosa

MODEL_PATH = "model.h5"
NUM_SAMPLES_TO_CONSIDER = 22050 # = 1 sec
SEGMENTATION_INTERVAL = 0.365

class _Keyword_Spotting_Service:




    #singleton을 기초로 만든다....!

    model = None
    _instance = None
    _mappings = []

    type1 = ':maj'
    type2 = ':min'
    type3 = 'b:maj'
    type4 = 'b:min'
    type5 = '#:maj'
    type6 = '#:min'
    type7 = ':maj7'
    type8 = ':min7'
    type9 = 'b:maj7'
    type10 = 'b:min7'
    type11 = '#:maj7'
    type12 = '#:min7'
    type13 = ':7'
    type14 = 'b:7'
    type15 = '#:7'
    type_table = [type1, type2, type3, type4, type5, type6, type7, type8, type9, type10, type11, type12, type13, type14,
                  type15]

    root_chord_list = ['A', 'B', 'C', 'D', 'E', 'F', 'G']


    for chord_num, root_chord in enumerate(root_chord_list):
        for type_num, type in enumerate(type_table):
            chord_name = root_chord + type
            _mappings.append(chord_name)
    _mappings.append('None')




    def predict(self, file_path):

        # extract MFCCs
        chromagram_list = self.preprocess(file_path) # (# segments, # coefficients)  (# : number of를 의미)

        # convert 2d chromagram array into 3d array -> (# samples, # segments, # coefficients)
        # make prediction

        prediction_list = []
        for chromagram in chromagram_list:
            chromagram = chromagram[np.newaxis, ...]
            predictions = self.model.predict(chromagram)  # [  [0.1, 0.6, 0.1, ...]  ]
            predicted_index = np.argmax(predictions)
            predicted_chord = self._mappings[predicted_index]
            prediction_list.append(predicted_chord)

        return prediction_list


    def preprocess(self, file_path, n_chroma=24, n_fft=2048, hop_length=760): # hop length 380으로 30,24의 input shape 맞춰줌

        num_samples_to_consider = int(SEGMENTATION_INTERVAL * NUM_SAMPLES_TO_CONSIDER)

        segmentation_list = []

        # load audio file
        signal, sr = librosa.load(file_path)

        # ensure consistency in the audio file length
        length = 1

        while(len(signal) > length * num_samples_to_consider):

            temp_signal = signal[num_samples_to_consider * (length - 1):num_samples_to_consider * length]
            segmentation_list.append(temp_signal)
            length += 1

        # extract chromagram

        chromagram_list = []
        for segment in segmentation_list:
            chromagram = librosa.feature.chroma_stft(segment, n_chroma=n_chroma, n_fft=n_fft, hop_length=int(hop_length*SEGMENTATION_INTERVAL))
            chromagram = chromagram.T
            chromagram_list.append(chromagram)



        return chromagram_list


def Keyword_Sporring_Service():

    #ensure that we only have 1 instance of KSS
    if _Keyword_Spotting_Service._instance is None:
        _Keyword_Spotting_Service._instance = _Keyword_Spotting_Service()
        _Keyword_Spotting_Service.model = keras.models.load_model(MODEL_PATH)
    return _Keyword_Spotting_Service._instance

if __name__ == "__main__":

    kss = Keyword_Sporring_Service()

    chord_list = kss.predict("test/Myself_edited.wav")

    for chord in chord_list:
        print(f'Predicted keywords : {chord}')

    none_num = 0

    for chord in chord_list:
        if chord == 'None':
            none_num += 1

    print(none_num)
    print(none_num/len(chord_list))
