# Making it a bit harder

We are going to increase the complexity on two levels:

* We are going to add chords and bass instruments
* We are going to let the lead synthesizer play multiple notes at once

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from music_generator.prefabs.random_walk_track import generate_dataset, make_lead_instrument, generate_lead_track
from music_generator.music.timing import Tempo, Signature, Duration
from music_generator.signalproc.signalproc import SamplingInfo, mix_at
from music_generator.analysis.plots import set_style
from music_generator.music.scales import GenericScale
from music_generator.music.songs import Track, Measure

from scipy.signal import stft

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib
import datetime as dt
%matplotlib inline

from IPython.display import Audio

In [None]:
set_style()

In [None]:
sr = 44100
sampling_info = SamplingInfo(sr)
scale = GenericScale('C', [0, 1, 4, 5, 7, 8, 10])
signature = Signature(4, 4)
n_measures = 128
tempo = t = Tempo(120)

In [None]:
lead_instrument = make_lead_instrument(sampling_info)

In [None]:
tracks, parts, mix = generate_dataset(n_measures, Tempo(120), scale=scale)

In [None]:
trk_bass, trk_chord, trk_lead = tracks
y_bass, y_chord, y_lead = parts

## Add harmony

In [None]:
lead_track = generate_lead_track(scale, tempo, signature, n_measures, 4)

In [None]:
terts = pd.DataFrame({'note': scale.generate(4, 8)[:-2], 'harm': scale.generate(4, 8)[2:]})

df_harm = pd.merge(pd.DataFrame({'note': [n.note for n in trk_lead.generate_notes()],
                                 'offset': [n.offset for n in trk_lead.generate_notes()],
                                 'duration': [n.duration for n in trk_lead.generate_notes()],
                                 'velocity': [n.velocity for n in trk_lead.generate_notes()]
                                }), terts, on='note')

In [None]:
measure = Measure(t, Signature(4, 4))
for i, row in df_harm.iterrows():
    measure.add_note(row.harm, row.offset.beats(t), row.duration.beats(t), row.velocity)
    measure.add_note(row.note, row.offset.beats(t), row.duration.beats(t), row.velocity)
trk_lead = Track([measure])

y_lead = lead_instrument.generate_track(trk_lead)

In [None]:
mix = mix_at(y_bass, y_chord)
mix = mix_at(mix, y_lead)

## End mix harmony

In [None]:
df = pd.DataFrame()

df['offset'] = [x.offset.seconds for x in trk_lead.generate_notes()]
df['duration'] = [x.duration.seconds for x in trk_lead.generate_notes()]
df['end'] = df['offset'] + df['duration']
df['note'] = [str(x.note) for x in trk_lead.generate_notes()]
df = df.groupby(['offset', 'end', 'note']).size().unstack('note').fillna(0).astype(int)
note_cols = df.columns
df = df.reset_index()

In [None]:
note_cols = [str(x) for x in 
             pd.Series([x.note for x in trk_lead.generate_notes()]).sort_values().drop_duplicates()][1:]

In [None]:
df;

In [None]:
Audio(mix[0:10*sr] , rate=sr)

## The Fourier spectrum shows that the problem is much harder now!

In [None]:
f_vec, t_vec, Zxx = stft(mix, sampling_info.sample_rate, nperseg=2048)
plt.pcolormesh(t_vec, f_vec, np.abs(Zxx), vmin=0, vmax=1e-1)
plt.title('STFT Magnitude')
plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
plt.ylim(0, 3000)
plt.xlim(0, 10);

In [None]:
y_train = np.array([df[(df.offset <= t) & (t < df.end)][note_cols].sum().values for t in t_vec])

In [None]:
df.head(4)

In [None]:
n_steps = 1
batch_size=32

X_train = np.abs(np.array(Zxx))
X_train = X_train.T
X_train = X_train[0:X_train.shape[0] // n_steps]
X_train = X_train.reshape(X_train.shape[0] // n_steps, n_steps, X_train.shape[1])

In [None]:
X_train = X_train[0:batch_size * (len(X_train) // batch_size)]
y_train = y_train[0:batch_size * (len(X_train) // batch_size)]

In [None]:
from tensorflow.keras import Model
from tensorflow.keras.layers import GRU, Input, Dense

In [None]:
inp = Input(batch_shape=(batch_size,n_steps, f_vec.shape[0]))

gru = GRU(14, stateful=True)
dense = Dense(len(note_cols), activation='sigmoid')

x = inp
x = gru(x)
out = dense(x)

model = Model(inp, out)
model.compile('adam', 'categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
X_train.T

In [None]:
val_split = 0.125
n_too_much = len(X_train) * val_split % batch_size
n_val_split = len(X_train) * val_split - n_too_much
val_split = n_val_split / len(X_train)
val_split

In [None]:
history = model.fit(X_train, 
                    y_train, 
                    batch_size=batch_size, 
                    epochs=300, validation_split=val_split, verbose=0)

Get some coffee

In [None]:
y_pred = model.predict(X_train, batch_size=batch_size)

fig, ax = plt.subplots(1, 2)
sel = np.arange(0, 200)

plt.sca(ax[0])
plt.pcolormesh(y_pred[sel].T, vmin=0, vmax=1)
plt.sca(ax[1])
plt.pcolormesh(y_train[sel].T, vmin=0, vmax=1)

In [None]:
nit = 0
while True:
    print(f"{dt.datetime.now().strftime('%H:%M:%S')}: iteration {nit}")
    history = model.fit(X_train, y_train, batch_size=batch_size, epochs=600, 
                        validation_split=val_split, verbose=0)
    
    train_loss = history.history['loss'][-1]
    val_loss = history.history['val_loss'][-1]
    print(f"Training loss: {train_loss}, Validation loss: {val_loss}")
    
    nit += 1

## After training for a night

In [None]:
y_pred = model.predict(X_train, batch_size=batch_size)

fig, ax = plt.subplots(1, 2)
sel = np.arange(0, 200)

plt.sca(ax[0])
plt.pcolormesh(y_pred[sel].T, vmin=0, vmax=1)
plt.sca(ax[1])
plt.pcolormesh(y_train[sel].T, vmin=0, vmax=1)