In [1]:
# import numpy as np
# from pydub import AudioSegment
# from pydub.playback import play

# # Function to generate a sine wave
# def generate_sine_wave(frequency, duration, sample_rate=44100, amplitude=0.5):
#     t = np.linspace(0, duration, int(sample_rate * duration), False)
#     wave = amplitude * np.sin(2 * np.pi * frequency * t)
#     return wave

# # Function to create an 8-bit music note
# def create_note(frequency, duration, sample_rate=44100):
#     wave = generate_sine_wave(frequency, duration, sample_rate)
#     audio = np.int16(wave * 32767)  # Convert to 16-bit PCM
#     return AudioSegment(audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)

# # Define notes frequencies (C4, E4, G4, C5)
# notes = {
#     'C4': 261.63,
#     'E4': 329.63,
#     'G4': 392.00,
#     'C5': 523.25,
# }

# # Create a simple melody
# melody = [
#     ('C4', 0.5),
#     ('E4', 0.5),
#     ('G4', 0.5),
#     ('C5', 1.0),
#     ('G4', 0.5),
#     ('E4', 0.5),
#     ('C4', 1.0),
# ]

# # Generate the music
# music = AudioSegment.silent(duration=0)
# for note, duration in melody:
#     music += create_note(notes[note], duration * 0.5)

# # Export the music to a .wav file
# music.export("8bit_music.wav", format="wav")

# # Play the music (optional)
# play(music)


ML-Dataset generationa and generating clips

In [2]:
import numpy as np
import random
import csv

# Define notes frequencies for a C Major scale (C4, D4, E4, F4, G4, A4, B4, C5)
notes = {
    'C4': 261.63,
    'D4': 293.66,
    'E4': 329.63,
    'F4': 349.23,
    'G4': 392.00,
    'A4': 440.00,
    'B4': 493.88,
    'C5': 523.25,
}

# Define note durations (quarter, half, whole notes, etc.)
durations = [0.25, 0.5, 1.0, 2.0]  # In seconds

# Define common chord progressions in the key of C major using individual notes
chord_progressions = [
    ['C4', 'F4', 'G4', 'C5'],  # I-IV-V-I
    ['C4', 'G4', 'A4', 'F4'],  # I-V-vi-IV (using A4 instead of Am4)
    ['C4', 'A4', 'F4', 'G4'],  # I-vi-IV-V (using A4 instead of Am4)
    ['C4', 'F4', 'C5', 'G4'],  # I-IV-I-V
]

# Function to generate a random melody sequence with scale-based notes
def generate_scale_based_melody(min_length, max_length):
    length = random.randint(min_length, max_length)
    melody = []
    for _ in range(length):
        note = random.choice(list(notes.values()))  # Ensure note is within the scale
        duration = random.choice(durations)
        melody.append((note, duration))
    return melody

# Function to generate a chord using a chord progression
def generate_chord_from_progression():
    progression = random.choice(chord_progressions)
    chord_notes = [notes[note] for note in progression]
    duration = random.choice(durations)
    return (chord_notes, duration)

# Function to generate a random sequence with melodies and chords
def generate_theory_based_sequence(min_length, max_length):
    sequence = []
    for _ in range(random.randint(min_length, max_length)):
        if random.random() < 0.7:  # 70% chance of a single note
            sequence.append(generate_scale_based_melody(1, 1)[0])
        else:  # 30% chance of a chord
            sequence.append(generate_chord_from_progression())
    return sequence

# Generate an extensive dataset with music-theory-based sequences
dataset = []
num_sequences = 1000  # Number of sequences to generate
min_sequence_length = 4  # Minimum length of each sequence
max_sequence_length = 16  # Maximum length of each sequence

for _ in range(num_sequences):
    sequence = generate_theory_based_sequence(min_sequence_length, max_sequence_length)
    dataset.append(sequence)

# Save the dataset to a CSV file
csv_file = 'theory_based_8bit_music_dataset.csv'
with open(csv_file, 'w', newline='') as f:
    writer = csv.writer(f)
    # Write a header row with column names (optional)
    header = ['Element_Type', 'Notes', 'Duration']
    writer.writerow(header)
    
    # Write each sequence element as a row in the CSV file
    for sequence in dataset:
        for element in sequence:
            if isinstance(element[0], list):  # Chord
                notes_str = "-".join([str(n) for n in element[0]])
                writer.writerow(['Chord', notes_str, element[1]])
            else:  # Single note
                writer.writerow(['Note', element[0], element[1]])

print(f"Generated a music-theory-based dataset with {num_sequences} sequences, saved to {csv_file}.")


Generated a music-theory-based dataset with 1000 sequences, saved to theory_based_8bit_music_dataset.csv.


Data Prep

In [3]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Input
from keras.models import Model
from ast import literal_eval

# Load the dataset
csv_file = 'theory_based_8bit_music_dataset.csv'
data = pd.read_csv(csv_file)

# Function to parse notes and durations
def parse_notes(note_str):
    if '-' in note_str:
        return [float(n) for n in note_str.split('-')]
    else:
        return float(note_str)

data['Notes'] = data['Notes'].apply(parse_notes)
data['Duration'] = data['Duration'].apply(float)

# Normalize note frequencies
notes_flat = []
for note in data['Notes']:
    if isinstance(note, list):
        notes_flat.extend(note)
    else:
        notes_flat.append(note)

notes_flat = np.array(notes_flat).reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
notes_flat_scaled = scaler.fit_transform(notes_flat)

# Map back to original data
note_idx = 0
for i in range(len(data)):
    if isinstance(data['Notes'].iloc[i], list):
        note_count = len(data['Notes'].iloc[i])
        data.at[i, 'Notes'] = notes_flat_scaled[note_idx:note_idx + note_count].flatten().tolist()
        note_idx += note_count
    else:
        data.at[i, 'Notes'] = notes_flat_scaled[note_idx][0]
        note_idx += 1

# Prepare sequences for LSTM model
sequence_length = 16  # Use a fixed length for LSTM input
max_notes_per_step = max(len(note) if isinstance(note, list) else 1 for note in data['Notes'])

X = []
y_notes = []
y_durations = []

for i in range(len(data) - sequence_length):
    notes_sequence = []
    durations_sequence = []
    for j in range(sequence_length):
        note = data['Notes'].iloc[i + j]
        if isinstance(note, list):
            notes_sequence.extend(note + [0] * (max_notes_per_step - len(note)))  # Pad with zeros
        else:
            notes_sequence.extend([note] + [0] * (max_notes_per_step - 1))  # Pad with zeros for consistency
        durations_sequence.append(data['Duration'].iloc[i + j])
    
    X.append(notes_sequence)
    next_note = data['Notes'].iloc[i + sequence_length]
    y_notes.append(next_note[0] if isinstance(next_note, list) else next_note)
    y_durations.append(data['Duration'].iloc[i + sequence_length])

X = np.array(X)
y_notes = np.array(y_notes)
y_durations = np.array(y_durations)

# Reshape X for LSTM input (samples, time steps, features)
n_features = max_notes_per_step  # Number of features per time step (notes per step)
X = X.reshape((X.shape[0], sequence_length, n_features))


Model

In [4]:
# Build the LSTM model
inputs = Input(shape=(X.shape[1], X.shape[2]))
x = LSTM(256, return_sequences=True)(inputs)
x = Dropout(0.3)(x)
x = LSTM(256)(x)
x = Dropout(0.3)(x)

output_notes = Dense(1, name='notes_output')(x)  # Predict the next note
output_duration = Dense(1, name='duration_output')(x)  # Predict the next duration

model = Model(inputs=inputs, outputs=[output_notes, output_duration])
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X, [y_notes, y_durations], epochs=20, batch_size=64)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x11ac09d7d30>

Generate Music

In [5]:
# Function to generate a 20-second music sequence
def generate_music(model, seed_sequence, num_notes, scaler):
    generated_sequence = seed_sequence
    generated_durations = []
    
    for _ in range(num_notes):
        X_input = generated_sequence[-sequence_length:].reshape((1, sequence_length, -1))
        prediction_notes, prediction_duration = model.predict(X_input, verbose=0)
        
        # Rescale prediction to original note frequencies
        prediction_notes = scaler.inverse_transform(prediction_notes).flatten()
        
        # Ensure prediction_notes matches the number of features in generated_sequence
        prediction_notes_padded = np.zeros((generated_sequence.shape[1],))
        prediction_notes_padded[:len(prediction_notes)] = prediction_notes
        
        # Append predicted note and duration
        generated_sequence = np.vstack([generated_sequence, prediction_notes_padded])
        generated_durations.append(prediction_duration[0][0])
    
    return generated_sequence, generated_durations

# Generate a new music sequence
seed_sequence = X[0]  # Start with the first sequence in the dataset
note_duration = 0.5  # Default duration in seconds if prediction fails
total_duration = 20  # Total clip duration in seconds
num_notes = int(total_duration / note_duration)

generated_notes, generated_durations = generate_music(model, seed_sequence, num_notes, scaler)


Covert seq to audio

In [7]:
from pydub import AudioSegment  # Import the necessary class

# Function to create a sine wave for a note
def generate_sine_wave(frequency, duration, sample_rate=44100, amplitude=0.5):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    wave = amplitude * np.sin(2 * np.pi * frequency * t)
    return wave

# Create the audio file from the generated sequence
def create_note_sequence(notes, durations, sample_rate=44100):
    music = AudioSegment.silent(duration=0)
    for note, duration in zip(notes, durations):
        wave = generate_sine_wave(note, duration, sample_rate)
        audio = np.int16(wave * 32767)
        note_segment = AudioSegment(audio.tobytes(), frame_rate=sample_rate, sample_width=2, channels=1)
        music += note_segment
    return music

# Generate and save the audio
generated_notes = generated_notes.flatten()
music = create_note_sequence(generated_notes, generated_durations)
music.export("generated_8bit_music_20sec.wav", format="wav")




<_io.BufferedRandom name='generated_8bit_music_20sec.wav'>

######################## TEST 2 ##################