In [229]:
import numpy as np
import pandas as pd
import pydub
from keras.layers import Dense, LSTM, LeakyReLU
from keras.models import Sequential, load_model
from scipy.io.wavfile import read, write
from tensorflow import keras

## Model trainen

In [96]:
# converting mp3 file to wav file
sound = pydub.AudioSegment.from_mp3(r"..\scripts\assets\data\recordings\eminem.mp3")
sound.export(r"..\scripts\assets\data\recordings\eminem.wav", format="wav")

# loading the wav files
rate, music1 = read(r'..\scripts\assets\data\recordings\recording_08-10-2020_19-01-20.wav')
music1_limit = int(len(music1) / 4)
music1_beginlimit = int(len(music1) / 8)
# taking only some part of the songs and converting to a dataframe
music1 = pd.DataFrame(music1[0:music1_limit])

rate, music2 = read(r'..\scripts\assets\data\recordings\eminem.wav')
music2_limit = int(len(music2) / 4)
music2_beginlimit = int(len(music2) / 8)
music2 = pd.DataFrame(music2[0:music2_limit])

music2

60000
1438704


Unnamed: 0,0,1
0,0,0
1,0,0
2,0,0
3,0,0
4,0,0
...,...,...
2877403,-6374,-2511
2877404,-8590,-3918
2877405,-8178,-2730
2877406,-6648,-2086


In [97]:
# function to create training data by shifting the music data 
def create_train_dataset(df, look_back, train=True):
    dataX1, dataX2 , dataY1 , dataY2 = [],[],[],[]
    for i in range(len(df)-look_back-1):
        dataX1.append(df.iloc[i : i + look_back, 0].values)
        dataX2.append(df.iloc[i : i + look_back, 1].values)
        if train:
            dataY1.append(df.iloc[i + look_back, 0])
            dataY2.append(df.iloc[i + look_back, 1])
    if train:
        return np.array(dataX1), np.array(dataX2), np.array(dataY1), np.array(dataY2)
    else:
        return np.array(dataX1), np.array(dataX2)

In [98]:
X1, X2, y1, y2 = create_train_dataset(pd.concat([music1.iloc[0:music1_beginlimit, :],
                                                 music2.iloc[0:music2_beginlimit, :]], axis=0), look_back=3, train=True)

In [99]:
test1, test2 = create_train_dataset(pd.concat([music1.iloc[music1_beginlimit+1 : music1_limit, :],
                                               music2.iloc[music1_beginlimit+1 : music2_limit, :]], axis=0), look_back=3, train=False)

In [100]:
X1 = X1.reshape((-1, 1, 3))
X2 = X2.reshape((-1, 1, 3))
test1 = test1.reshape((-1, 1, 3))
test2 = test2.reshape((-1, 1, 3))

In [101]:
# LSTM Model for channel 1 of the music data
rnn1 = Sequential()
rnn1.add(LSTM(units=100, activation='relu', input_shape=(None, 3)))
rnn1.add(Dense(units=50, activation='relu'))
rnn1.add(Dense(units=25, activation='relu'))
rnn1.add(Dense(units=12, activation='relu'))
rnn1.add(Dense(units=1, activation='relu'))
rnn1.compile(optimizer='adam', loss='mean_squared_error')
rnn1.fit(X1, y1, epochs=20, batch_size=100)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x1dd9e753488>

In [102]:
# LSTM Model for channel 1 of the music data
rnn2 = Sequential()
rnn2.add(LSTM(units=100, activation='relu', input_shape=(None, 3)))
rnn2.add(Dense(units=50, activation='relu'))
rnn2.add(Dense(units=25, activation='relu'))
rnn2.add(Dense(units=12, activation='relu'))
rnn2.add(Dense(units=1, activation='relu'))
rnn2.compile(optimizer='adam', loss='mean_squared_error')
rnn2.fit(X2, y2, epochs=20, batch_size=100)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x1ddb069eec8>

In [103]:
# making predictions for channel 1 and channel 2
pred_rnn1 = rnn1.predict(test1)
pred_rnn2 = rnn2.predict(test2)

In [104]:
# saving the LSTM predicitons in wav format
write('pred_rnn.wav', rate, pd.concat([pd.DataFrame(pred_rnn1.astype('int16')), pd.DataFrame(pred_rnn2.astype('int16'))], axis=1).values)

# saving the original music in wav format
write('original.wav',rate, pd.concat([music1.iloc[music1_beginlimit+1 : music1_limit, :],
                                      music2.iloc[music2_beginlimit+1 : music2_limit, :]], axis=0).values)

In [227]:
# Save models for reuse
rnn1.model.save(r'..\scripts\models\rnn1.h5')
rnn2.model.save(r'..\scripts\models\rnn2.h5')

## Dit komt in backend

In [230]:
# Load models
rnn1 = keras.models.load_model(r'..\scripts\models\rnn1.h5')
rnn2 = keras.models.load_model(r'..\scripts\models\rnn2.h5')

In [231]:
# Read .wav file
rate, input_audio = read(r'..\scripts\assets\data\recordings\YAF_death_ps.wav')

# Convert wav array to dataframe
input_audioframe = pd.DataFrame(input_audio)

input_audioframe_startlimit = int(len(input_audioframe) / 4)
input_audioframe_maxlimit = int(len(input_audioframe))

# Add second column (channel) if it doesn't exist
if len(input_audioframe.columns.values) != 2:
    input_audioframe[1] = input_audioframe[0]

input_audioframe

Unnamed: 0,0,1
0,1,1
1,6,6
2,4,4
3,-3,-3
4,-14,-14
...,...,...
58448,-3,-3
58449,-5,-5
58450,0,0
58451,2,2


In [232]:
X1, X2, y1, y2 = create_train_dataset(pd.concat([input_audioframe.iloc[0:input_audioframe_startlimit, :],
                                                 input_audioframe.iloc[0:input_audioframe_startlimit, :]], axis=0), look_back=3, train=True)

In [233]:
test1, test2 = create_train_dataset(pd.concat([input_audioframe.iloc[input_audioframe_startlimit+1 : input_audioframe_maxlimit, :],
                                               input_audioframe.iloc[input_audioframe_startlimit+1 : input_audioframe_maxlimit, :]], axis=0), look_back=3, train=False)

In [234]:
# Reshape to 3D array
X1 = X1.reshape((-1, 1, 3))
X2 = X2.reshape((-1, 1, 3))
test1 = test1.reshape((-1, 1, 3))
test2 = test2.reshape((-1, 1, 3))

In [235]:
pred_rnn1 = rnn1.predict(test1)
pred_rnn2 = rnn2.predict(test2)

In [236]:
# saving the LSTM predicitons in wav format
write('..\scripts\output\pred_rnn3.wav', rate, pd.concat([pd.DataFrame(pred_rnn1.astype('int16')), pd.DataFrame(pred_rnn2.astype('int16'))], axis=1).values)

# saving the original music in wav format
write('..\scripts\output\original3.wav',rate, pd.concat([input_audioframe.iloc[input_audioframe_startlimit+1 : input_audioframe_maxlimit, :],
                                      input_audioframe.iloc[input_audioframe_startlimit+1 : input_audioframe_maxlimit, :]], axis=0).values)