# Model Training for PianoBear Transcriber

In [8]:
import glob
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 데이터 제네레이터
def data_generator(file_pattern, batch_size=32):
    files = sorted(glob.glob(file_pattern))
    while True:
        np.random.shuffle(files)
        for file in files:
            data = np.load(file, allow_pickle=True)
            spectrograms = [item['audio_spectrogram'] for item in data]
            midi_notes = [item['midi_notes'] for item in data]
            
            # 데이터 패딩
            spectrograms_padded = pad_sequences(spectrograms, dtype='float32', padding='post')
            midi_notes_padded = pad_sequences(midi_notes, dtype='float32', padding='post', value=-1)
            
            # MIDI 노트 원핫 인코딩 (88개 키)
            midi_notes_encoded = [tf.keras.utils.to_categorical(m, num_classes=88) for m in midi_notes_padded]
            
            # 배치 생성
            for start in range(0, len(spectrograms_padded), batch_size):
                end = start + batch_size
                yield spectrograms_padded[start:end], np.array(midi_notes_encoded[start:end])

# 데이터 분할
train_gen = data_generator('maestro_data/processed_maestro_data_0_*.npy', batch_size=32)
val_gen = data_generator('maestro_data/processed_maestro_data_50_*.npy', batch_size=32)
test_gen = data_generator('maestro_data/processed_maestro_data_100_*.npy', batch_size=32)

# LSTM 모델 구성
model = Sequential([
    LSTM(128, input_shape=(None, None), return_sequences=True),
    Dropout(0.5),
    TimeDistributed(Dense(88, activation='sigmoid'))
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(train_gen, steps_per_epoch=30, epochs=20, validation_data=val_gen, validation_steps=10)

# 모델 평가
test_loss, test_acc = model.evaluate(test_gen, steps=10)
print('Test accuracy:', test_acc)

# 모델 저장
model.save('piano_to_sheet_music_model.h5')


In [9]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import glob

# 전체 데이터 로드
def load_data(directory_pattern):
    files = glob.glob(directory_pattern)
    all_data = []
    for file in files:
        data = np.load(file, allow_pickle=True)
        all_data.extend(data)
    return all_data

# 데이터 분할
def split_data(data, train_ratio=0.8, val_ratio=0.1):
    np.random.shuffle(data)
    total_length = len(data)
    train_end = int(total_length * train_ratio)
    val_end = train_end + int(total_length * val_ratio)
    
    train_data = data[:train_end]
    val_data = data[train_end:val_end]
    test_data = data[val_end:]
    
    return train_data, val_data, test_data

# 데이터 제네레이터
def data_generator(data, batch_size=32):
    while True:
        np.random.shuffle(data)
        for start in range(0, len(data), batch_size):
            end = start + batch_size
            batch = data[start:end]
            
            spectrograms = [item['audio_spectrogram'] for item in batch]
            midi_notes = [item['midi_notes'] for item in batch]

            # 패딩
            spectrograms_padded = pad_sequences(spectrograms, dtype='float32', padding='post')
            midi_notes_padded = pad_sequences(midi_notes, dtype='float32', padding='post', value=-1)

            # MIDI 노트 원핫 인코딩
            midi_notes_encoded = np.array([tf.keras.utils.to_categorical(m, num_classes=88) for m in midi_notes_padded])

            yield spectrograms_padded, midi_notes_encoded

# 파일 로드 및 분할 실행
all_data = load_data('maestro_data/*.npy')
train_data, val_data, test_data = split_data(all_data, train_ratio=0.8, val_ratio=0.1)

# 제네레이터 생성
train_gen = data_generator(train_data, batch_size=32)
val_gen = data_generator(val_data, batch_size=32)
test_gen = data_generator(test_data, batch_size=32)


In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, TimeDistributed

# LSTM 모델 구성
model = Sequential([
    LSTM(128, input_shape=(None, None), return_sequences=True),
    Dropout(0.5),
    TimeDistributed(Dense(88, activation='sigmoid'))
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
model.fit(train_gen, steps_per_epoch=100, epochs=20, validation_data=val_gen, validation_steps=10)

# 모델 평가
test_loss, test_acc = model.evaluate(test_gen, steps=10)
print('Test accuracy:', test_acc)

# 모델 저장
model.save('piano_to_sheet_music_model.h5')


  super().__init__(**kwargs)


ValueError: Shapes used to initialize variables must be fully-defined (no `None` dimensions). Received: shape=(None, 512) for variable path='sequential/lstm/lstm_cell/kernel'