# Preparation: Imports, functions and Audio Trim


In [1]:
import essentia.standard as es
import numpy as np
import os
from matplotlib import pyplot as plt
from scipy.interpolate import interp1d
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error
import soundfile as sf
from keras.models import load_model
from joblib import load, dump

In [2]:
def trim_wave(wave, idx):
    frame_size = len(wave)
    pitchYin = es.PitchYin(frameSize=frame_size)
    pitch, pitchconf = pitchYin(wave)
    print(pitch, pitchconf)
    if pitchconf < 0.8:
        raise ValueError("Pitch confidence too low")
    period = 1.0 / pitch
    period_samples = int(period * 44100)

    zero_crossings = np.where(np.diff(np.sign(wave)) > 0)[0]
    if len(zero_crossings) > 0:
        start = zero_crossings[0]
        trimmed_wave = wave[start : start + period_samples]
    else:
        trimmed_wave = wave[:period_samples]

    return trimmed_wave


def frequency(waves):
    for wave in waves:
        pitchYin = es.PitchYin(frameSize=len(wave))
        pitch, pitchconf = pitchYin(wave)
        print(pitch)

In [3]:
audio_dir = "def samples"
audio_files = sorted(os.listdir(audio_dir))
print(audio_files)
audio_data = []
for file in audio_files:
    if file.endswith(".wav"):
        loader = es.MonoLoader(filename=os.path.join(audio_dir, file), sampleRate=44100)
        audio = loader()
        audio_data.append(audio)
audio_data = np.array(audio_data)
audio_data_trimmed = []
for i in range(len(audio_data)):
    wave = audio_data[i]
    trimmed_wave = trim_wave(wave, i)
    audio_data_trimmed.append(trimmed_wave)

audios = np.array(audio_data_trimmed)
# pot_values = np.linspace(0, 4, len(audio_data_trimmed))
pot_values = np.linspace(0, 4, len(audio_data_trimmed))

['.DS_Store', '1.1.wav', '1.2.wav', '1.3.wav', '1.4.wav', '1.5.wav', '1.6.wav', '2.1.wav', '2.2.wav', '2.3.wav', '2.4.wav', '2.5.wav', '2.6.wav', '3.1.wav', '3.2.wav', '3.3.wav', '3.4.wav', '3.5.wav', '3.6.wav', '4.1.wav', '4.2.wav']
440.05621337890625 0.9994421601295471
440.03936767578125 0.9994345307350159
440.11334228515625 0.9992413520812988
440.09478759765625 0.9993952512741089
440.1029968261719 0.9989595413208008
440.0918884277344 0.9986041188240051
440.1206970214844 0.9982636570930481
440.1067810058594 0.9974650144577026
440.218017578125 0.9980720281600952
440.10198974609375 0.9990507960319519
440.08587646484375 0.9987959861755371
440.09356689453125 0.9973345398902893
440.0983581542969 0.9991087317466736
440.05987548828125 0.9990563988685608
440.07025146484375 0.9989929795265198
440.05804443359375 0.9983398914337158
440.056640625 0.995542049407959
440.0585021972656 0.995547354221344
440.052734375 0.99550861120224
440.06170654296875 0.9955691695213318
[0.         0.21052632 0.421

# Linear Interpolation


In [4]:
def predicted_output_lininter(pot_value, pot_values=pot_values, audios=audios):
    if pot_value < 0 or pot_value > 4:
        raise ValueError("Pot value must be between 0 and 4")
    interpolator = interp1d(pot_values, audios, axis=0, kind="linear")
    return interpolator(pot_value)

# Random Forest


In [5]:
param_grid = {
    "n_estimators": [100, 200, 300, 400, 500],
    "max_depth": [None, 5, 10, 15, 20, 25, 30],
    "min_samples_split": [2, 5, 10, 15, 20],
    "min_samples_leaf": [1, 2, 5, 10, 15],
}

rf = RandomForestRegressor()
rf_grid = GridSearchCV(rf, param_grid, cv=5, n_jobs=-1)
rf_grid.fit(pot_values.reshape(-1, 1), audios[:, 0])
best_params = rf_grid.best_params_
print(best_params)

models = [
    RandomForestRegressor(
        n_estimators=best_params["n_estimators"],
        max_depth=best_params["max_depth"],
        min_samples_split=best_params["min_samples_split"],
        min_samples_leaf=best_params["min_samples_leaf"],
    ).fit(pot_values.reshape(-1, 1), audios[:, i])
    for i in range(audios.shape[1])
]


def predicted_RF(pot_value):
    return np.array([model.predict([[pot_value]])[0] for model in models])

In [None]:
predicted_output_RF = []
for pot_value in pot_values:
    predicted_output_RF.append(predicted_RF(pot_value))
predicted_output_RF = np.array(predicted_output_RF)

In [None]:
dump(models, "models_RF.joblib")

NameError: name 'model' is not defined

# RNN


In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, Bidirectional, BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler

# Standardize the pot_values
scaler = StandardScaler()
pot_values_scaled = scaler.fit_transform(pot_values.reshape(-1, 1))

# Define the model
model = Sequential()
model.add(Bidirectional(LSTM(64, input_shape=(None, 1))))
model.add(Dense(128, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(audios.shape[1]))  # Adjusted to match the number of output samples

# Compile the model with a smaller learning rate
opt = Adam(lr=0.001)
model.compile(loss="mean_squared_error", optimizer=opt)

# Create the ReduceLROnPlateau callback
reduce_lr = ReduceLROnPlateau(monitor="loss", factor=0.95, patience=25, min_delta=5e-7)

# Reshape the data to fit the model
X = pot_values_scaled.reshape(len(pot_values_scaled), 1, 1)
Y = audios

# Train the model for more epochs with the ReduceLROnPlateau callback
model.fit(X, Y, epochs=3000, batch_size=128, callbacks=[reduce_lr], verbose=0)


# Define a function to make predictions
def predicted_RNN(pot_value):
    pot_value_scaled = scaler.transform(np.array([[pot_value]]))
    return model.predict(pot_value_scaled.reshape(1, 1, 1))[0]

In [None]:
predicted_RNN_values = np.array([predicted_RNN(pot_value) for pot_value in pot_values])

In [None]:
model.save("model_RNN.h5")

# Graphs


## MSE


### Random Forest


In [None]:
assert len(predicted_output_RF) == len(audio_data_trimmed)
mse_vals_rf = []
for predicted_output_val, audio_data_trim in zip(
    predicted_output_RF, audio_data_trimmed
):
    mse_vals_rf.append(mean_squared_error(predicted_output_val, audio_data_trim))

plt.bar(range(len(mse_vals_rf)), mse_vals_rf)
plt.xlabel("Sample number")
plt.ylabel("MSE")
plt.show()
print(np.mean(mse_vals_rf))

In [None]:
plt.plot(predicted_output_RF[1], label="RF")
plt.plot(audios[1], label="Original")
plt.title("Comparison for index 1, pot_value = 0.21052632")
plt.legend()
plt.show()

In [None]:
plt.plot(predicted_output_RF[12], label="RF")
plt.plot(audios[12], label="Original")
plt.title("Comparison for index 12, pot_value =  2.52631579")
plt.legend()
plt.show()

### RNN


In [None]:
assert len(predicted_RNN_values) == len(audio_data_trimmed)
mse_vals_rnn = []
for predicted_output_val, audio_data_trim in zip(
    predicted_RNN_values, audio_data_trimmed
):
    mse_vals_rnn.append(mean_squared_error(predicted_output_val, audio_data_trim))

plt.bar(range(len(mse_vals_rnn)), mse_vals_rnn)
plt.xlabel("Sample number")
plt.ylabel("MSE")
plt.show()
print(np.mean(mse_vals_rnn))

In [None]:
plt.plot(predicted_RNN_values[16], label="RNN")
plt.plot(audios[16], label="Original")
plt.title("Comparison for index 16, pot_value =  3.36842105")
plt.legend()
plt.show()

In [None]:
plt.plot(predicted_RNN_values[18], label="RNN")
plt.plot(audios[18], label="Original")
plt.title("Comparison for index 18, pot_value =  3.78947368")
plt.legend()
plt.show()

### Linear Interpolation


In [None]:
pot_values_new = []
for i in range(len(pot_values) - 1):
    pot_values_new.append((pot_values[i] + pot_values[i + 1]) / 2)
print(pot_values_new)

In [None]:
inter_pred_LI = []
inter_pred_RNN = []
inter_pred_RF = []
for pot_value in pot_values_new:
    inter_pred_LI.append(predicted_output_lininter(pot_value))
    inter_pred_RNN.append(predicted_RNN(pot_value))
    inter_pred_RF.append(predicted_RF(pot_value))

In [None]:
assert len(inter_pred_LI) == len(inter_pred_RNN)
mse_vals_rnn_LI = []
for predicted_output_val, audio_data_trim in zip(inter_pred_LI, inter_pred_RNN):
    mse_vals_rnn_LI.append(mean_squared_error(predicted_output_val, audio_data_trim))

plt.bar(range(len(mse_vals_rnn_LI)), mse_vals_rnn_LI)
plt.xlabel("Sample number")
plt.ylabel("MSE")
plt.title("MSE for Linear Interpolation adn RNN")
plt.show()
print(np.mean(mse_vals_rnn_LI))

In [None]:
assert len(inter_pred_LI) == len(inter_pred_RNN)
mse_vals_RF_LI = []
for predicted_output_val, audio_data_trim in zip(inter_pred_LI, inter_pred_RF):
    mse_vals_RF_LI.append(mean_squared_error(predicted_output_val, audio_data_trim))

plt.bar(range(len(mse_vals_RF_LI)), mse_vals_RF_LI)
plt.xlabel("Sample number")
plt.ylabel("MSE")
plt.title("MSE for Linear Interpolation and Random Forest")
plt.show()
print(np.mean(mse_vals_RF_LI))

In [None]:
plt.plot(inter_pred_LI[1], label="linear interpolation")
plt.plot(inter_pred_RNN[1], label="RNN")
plt.plot(inter_pred_RF[1], label="RF")
plt.title("Comparison for index 1, pot_value =  0.3157894736842105")
plt.legend()
plt.show()

In [None]:
print(pot_values)

In [None]:
plt.plot(audios[15], label="wave previous to pred", color="red")
plt.plot(audios[16], label="wave next to pred", color="black")
plt.fill_between(range(len(audios[15])), audios[15], audios[16], color="red", alpha=0.1)
plt.plot(inter_pred_LI[15], label="linear interpolation", color="blue")
plt.plot(inter_pred_RNN[15], label="RNN", color="green")
plt.plot(inter_pred_RF[15], label="RF", color="orange")
plt.title("Comparison for index 15, pot_value =  3.263157894736842")
plt.legend()
plt.show()

## Full Graphs


### Pot_values


In [None]:
for i in range(len(pot_values)):
    plt.plot(audios[i], label="original", color="red")
    plt.plot(predicted_RNN_values[i], label="RNN", color="green")
    plt.plot(predicted_output_RF[i], label="RF", color="orange")
    plt.title("Comparison for index " + str(i) + ", pot_value = " + str(pot_values[i]))
    plt.legend()

    # plt.savefig("comparison_" + str(i) + ".png")
    plt.show()

### inter_pot_values


In [None]:
for i in range(len(pot_values_new)):
    plt.plot(inter_pred_LI[i], label="LI", color="red")
    plt.plot(inter_pred_RF[i], label="RNN", color="green")
    plt.plot(inter_pred_RNN[i], label="RF", color="orange")
    plt.title(
        "Comparison for index " + str(i) + ", pot_value_new = " + str(pot_values[i])
    )
    plt.legend()

    # plt.savefig("comparison_" + str(i) + ".png")
    plt.show()

# Audio


In [None]:
def create_and_save_as_sound(audio, output_filename, length):
    num_rep = int(length * 44100 / len(audio))
    audio_strech = np.tile(audio, num_rep)
    sf.write(output_filename, audio_strech, 44100)

In [None]:
# for i in range(len(pot_values)):
#     create_and_save_as_sound(audios[i], "original" + str(i) + ".wav", 2)
#     create_and_save_as_sound(predicted_output_RF[i], "RF" + str(i) + ".wav", 2)
#     create_and_save_as_sound(predicted_RNN_values[i], "RNN" + str(i) + ".wav", 2)

In [None]:
# for i in range(len(pot_values_new)):
#     create_and_save_as_sound(inter_pred_LI[i], "LI" + str(i) + ".wav", 2)
#     create_and_save_as_sound(inter_pred_RF[i], "RF" + str(i) + ".wav", 2)
#     create_and_save_as_sound(inter_pred_RNN[i], "RNN" + str(i) + ".wav", 2)

# Timers


## Lin Inter


In [None]:
start = timeit.default_timer()
a = predicted_output_lininter(0.5)
stop = timeit.default_timer()
print("Time: ", stop - start)

## RF


In [None]:
start = timeit.default_timer()
a = predicted_RF(0.5)
stop = timeit.default_timer()
print("Time: ", stop - start)

## RNN


In [None]:
start = timeit.default_timer()
a = predicted_RNN(0.5)
stop = timeit.default_timer()
print("Time: ", stop - start)