In [None]:
import os
import numpy as np
from pydub import AudioSegment
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
   

# Функция загрузки данных
def load_data(data_dir, max_length=5000):
    X, y = [], []
    id = 0
    for filename in os.listdir(data_dir):
        print(id)
        id +=1
        if filename.endswith('.lrc') and os.path.exists(os.path.join(data_dir, filename[:-4] + '.mp3')):
            audio_path = os.path.join(data_dir, filename[:-4] + '.mp3')
            lrc_path = os.path.join(data_dir, filename)
            
            audio = AudioSegment.from_file(audio_path)
            audio_data = np.array(audio.get_array_of_samples(), dtype=np.float32)
            audio_data = np.expand_dims(audio_data, axis=1)
            
            with open(lrc_path, 'r') as f:
                time_parts = [line.split(']')[0][1:].split(':') for line in f.readlines()]
            timestamps = [float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2]) if len(parts) == 3 else float(parts[0]) * 60 + float(parts[1]) for parts in time_parts]
    
            print("audio_data: ", audio_data)
            print("timestamps: ", timestamps)
            
            X.append(audio_data)
            y.append(timestamps)
            max_length = max(max_length, len(audio_data))
        
    # Дополнение данных до максимальной длины
    print("PREPARING DATA 1")
    X_padded = [np.pad(audio_data, ((0, max_length - len(audio_data)), (0, 0)), mode='constant') for audio_data in X]
    y_padded = [np.pad(timestamps, (0, max_length - len(timestamps)), mode='constant') for timestamps in y]
    X, y = np.array(X_padded), np.array(y_padded)

    # Разбиение аудио на фрагменты и добавление в списки
    print("PREPARING DATA 2")
    X = np.concatenate([np.expand_dims(segment, axis=1) for audio_data in X for segment in [audio_data[i:i+max_length] for i in range(0, len(audio_data), max_length)]], axis=0)
    y = np.concatenate([segment_timestamps for timestamps in y for segment_timestamps in [timestamps[i:i+max_length] for i in range(0, len(timestamps), max_length)]], axis=0)
    # print("\nX[0]:", X[0])
    # print("\nY[0]:", y[0])
    return X, y

# Загрузка данных
data_dir = '../../data/short_dataset'
X, y = load_data(data_dir)

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Создание модели
model = Sequential()
model.add(LSTM(256, input_shape=(None, 1), return_sequences=True))
model.add(Dropout(0.8))
model.add(LSTM(128, return_sequences=True))
model.add(Dropout(0.8))
model.add(LSTM(64, return_sequences=True))
model.add(Dropout(0.8))
model.add(Dense(32, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.compile(optimizer=Adam(), loss='mse', metrics=['accuracy'])

def data_generator(X, y, batch_size=32):
    while True:
        indices = np.random.randint(0, len(X), size=batch_size)
        yield X[indices], y[indices]

history = model.fit(data_generator(X_train, y_train, batch_size=32), steps_per_epoch=3000, epochs=10, validation_data=data_generator(X_test, y_test, batch_size=32), validation_steps=10)

# Оценка производительности модели на тестовых данных
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Оценка производительности модели на тестовых данных
test_loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")
# Сохранение модели
model.save('my_model.h5')


0
audio_data:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
1
2
3
audio_data:  [[ 0.]
 [ 0.]
 [ 0.]
 ...
 [ 1.]
 [-1.]
 [-2.]]
4
audio_data:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
5
6
audio_data:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
7
8
9
audio_data:  [[2.]
 [1.]
 [5.]
 ...
 [9.]
 [7.]
 [8.]]
10
audio_data:  [[1428.]
 [1252.]
 [1826.]
 ...
 [   0.]
 [   0.]
 [   0.]]
11
audio_data:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
12
13
14
audio_data:  [[ 0.]
 [-1.]
 [ 0.]
 ...
 [ 0.]
 [ 0.]
 [ 0.]]
15
audio_data:  [[ 0.]
 [ 4.]
 [-6.]
 ...
 [-4.]
 [ 1.]
 [ 8.]]
16
audio_data:  [[ 0.]
 [ 0.]
 [ 0.]
 ...
 [-1.]
 [ 0.]
 [ 0.]]
17
audio_data:  [[ 0.]
 [ 0.]
 [-1.]
 ...
 [-1.]
 [-1.]
 [-1.]]
18
19
audio_data:  [[-1.]
 [-2.]
 [ 0.]
 ...
 [ 0.]
 [ 0.]
 [ 0.]]
20
21
audio_data:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
22
audio_data:  [[ -6.]
 [  2.]
 [  5.]
 ...
 [-27.]
 [-10.]
 [-29.]]
23
audio_data:  [[ 0.]
 [ 0.]
 [ 0.]
 ...
 [-2.]
 [ 0.]
 [ 1.]]
24


In [16]:
!pip install scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Collecting scikit-learn
  Obtaining dependency information for scikit-learn from https://files.pythonhosted.org/packages/ae/54/e70102a9c12d27d985ba659f336851732415e5a02864bef2ead36afaf15d/scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading scikit_learn-1.5.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting scipy>=1.6.0 (from scikit-learn)
  Obtaining dependency information for scipy>=1.6.0 from https://files.pythonhosted.org/packages/88/ab/6ecdc526d509d33814835447bbbeedbebdec7cca46ef495a61b00a35b4bf/scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata
  Downloading scipy-1.13.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m610.2 kB/s[0m eta [36m0:00:00[0m1m454.4 kB/s