# Data Generation Notebook
---

## Import Statements

In [1]:
import glob
import pickle
import numpy as np
import pandas as pd
from music21 import converter, instrument, note, chord
import tensorflow as tf
from keras.utils.np_utils import to_categorical

2022-11-28 16:14:15.923200: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Parsing all the notes in the file

In [2]:
notes = []
for file in glob.glob("../lofi-samples/samples/*.mid"):
    midi = converter.parse(file)
    notes_to_parse = None
    
    try:
        s2 = instrument.partitionByInstrument(midi)
        notes_to_parse = s2.parts[0].recurse()
    except:
        notes_to_parse = midi.flat.notes
    
    for parsed_note in notes_to_parse:
        if isinstance(parsed_note, note.Note):
            notes.append(str(parsed_note.pitch))
        elif isinstance(parsed_note, chord.Chord):
            notes.append('.'.join(str(n) for n in parsed_note.normalOrder))
with(open("../data/notes", "wb")) as filepath:
    pickle.dump(notes, filepath)

## Creating the Input Sequences and Output Sequences

In [4]:
SEQUENCE_LENGTH = 50

pitch_names = sorted(set(item for item in notes))

note_dict = dict((note, number) for number, note in enumerate(pitch_names))

network_input = []
network_output = []

for i in range(0, len(notes) - SEQUENCE_LENGTH, 1):
    sequence_in = notes[i:i + SEQUENCE_LENGTH]
    sequence_out = notes[i + SEQUENCE_LENGTH]
    network_input.append([note_dict[char] for char in sequence_in])
    network_output.append(note_dict[sequence_out])

num_patterns = len(network_input)
num_vocab = len(set(notes))

network_input = np.reshape(network_input, (num_patterns, SEQUENCE_LENGTH, 1))

network_input = network_input / float(num_vocab)
network_output = to_categorical(network_output)

with(open("../data/network_input", "wb")) as filepath:
    pickle.dump(network_input, filepath)
with(open("../data/network_output", "wb")) as filepath:
    pickle.dump(network_output, filepath)
with(open("../data/note_dict",'wb')) as filepath:
    pickle.dump(note_dict,filepath)
with(open("../data/pitch_names",'wb')) as filepath:
    pickle.dump(pitch_names,filepath)
