Define Piano Frequencies

In [2]:
import numpy as np
import random
import csv

# Define piano key frequencies (A0 to C8)
piano_keys = {
    'A0': 27.50, 'A#0': 29.14, 'B0': 30.87,
    'C1': 32.70, 'C#1': 34.65, 'D1': 36.71, 'D#1': 38.89, 'E1': 41.20, 'F1': 43.65, 'F#1': 46.25, 'G1': 49.00, 'G#1': 51.91, 'A1': 55.00, 'A#1': 58.27, 'B1': 61.74,
    'C2': 65.41, 'C#2': 69.30, 'D2': 73.42, 'D#2': 77.78, 'E2': 82.41, 'F2': 87.31, 'F#2': 92.50, 'G2': 98.00, 'G#2': 103.83, 'A2': 110.00, 'A#2': 116.54, 'B2': 123.47,
    'C3': 130.81, 'C#3': 138.59, 'D3': 146.83, 'D#3': 155.56, 'E3': 164.81, 'F3': 174.61, 'F#3': 185.00, 'G3': 196.00, 'G#3': 207.65, 'A3': 220.00, 'A#3': 233.08, 'B3': 246.94,
    'C4': 261.63, 'C#4': 277.18, 'D4': 293.66, 'D#4': 311.13, 'E4': 329.63, 'F4': 349.23, 'F#4': 369.99, 'G4': 392.00, 'G#4': 415.30, 'A4': 440.00, 'A#4': 466.16, 'B4': 493.88,
    'C5': 523.25, 'C#5': 554.37, 'D5': 587.33, 'D#5': 622.25, 'E5': 659.25, 'F5': 698.46, 'F#5': 739.99, 'G5': 783.99, 'G#5': 830.61, 'A5': 880.00, 'A#5': 932.33, 'B5': 987.77,
    'C6': 1046.50, 'C#6': 1108.73, 'D6': 1174.66, 'D#6': 1244.51, 'E6': 1318.51, 'F6': 1396.91, 'F#6': 1479.98, 'G6': 1567.98, 'G#6': 1661.22, 'A6': 1760.00, 'A#6': 1864.66, 'B6': 1975.53,
    'C7': 2093.00, 'C#7': 2217.46, 'D7': 2349.32, 'D#7': 2489.02, 'E7': 2637.02, 'F7': 2793.83, 'F#7': 2959.96, 'G7': 3135.96, 'G#7': 3322.44, 'A7': 3520.00, 'A#7': 3729.31, 'B7': 3951.07,
    'C8': 4186.01
}

# Convert the dictionary into a list of frequencies for easier access
piano_frequencies = list(piano_keys.values())


In [3]:
# Define note durations (quarter, half, whole notes, etc.)
durations = [0.25, 0.5, 1.0, 2.0]  # In seconds

# Function to generate a random melody sequence with piano notes
def generate_piano_melody(min_length, max_length):
    length = random.randint(min_length, max_length)
    melody = []
    for _ in range(length):
        note = random.choice(piano_frequencies)  # Choose from piano frequencies
        duration = random.choice(durations)
        melody.append((note, duration))
    return melody

# Function to generate a random chord (multiple notes played together)
def generate_piano_chord():
    chord_size = random.randint(2, 4)  # Number of notes in the chord
    chord = random.sample(piano_frequencies, chord_size)
    duration = random.choice(durations)
    return (chord, duration)

# Function to generate a random sequence with melody and chords
def generate_piano_sequence(min_length, max_length):
    sequence = []
    for _ in range(random.randint(min_length, max_length)):
        if random.random() < 0.7:  # 70% chance of a single note
            sequence.append(generate_piano_melody(1, 1)[0])
        else:  # 30% chance of a chord
            sequence.append(generate_piano_chord())
    return sequence

# Generate an extensive dataset with piano-based sequences
dataset = []
num_sequences = 1000  # Number of sequences to generate
min_sequence_length = 4  # Minimum length of each sequence
max_sequence_length = 16  # Maximum length of each sequence

for _ in range(num_sequences):
    sequence = generate_piano_sequence(min_sequence_length, max_sequence_length)
    dataset.append(sequence)

# Save the dataset to a CSV file
csv_file = 'piano_based_8bit_music_dataset.csv'
with open(csv_file, 'w', newline='') as f:
    writer = csv.writer(f)
    # Write a header row with column names (optional)
    header = ['Element_Type', 'Notes', 'Duration']
    writer.writerow(header)
    
    # Write each sequence element as a row in the CSV file
    for sequence in dataset:
        for element in sequence:
            if isinstance(element[0], list):  # Chord
                notes_str = "-".join([str(n) for n in element[0]])
                writer.writerow(['Chord', notes_str, element[1]])
            else:  # Single note
                writer.writerow(['Note', element[0], element[1]])

print(f"Generated a piano-based dataset with {num_sequences} sequences, saved to {csv_file}.")


Generated a piano-based dataset with 1000 sequences, saved to piano_based_8bit_music_dataset.csv.


In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Input
from keras.models import Model
from ast import literal_eval

# Load the dataset
csv_file = 'piano_based_8bit_music_dataset.csv'
data = pd.read_csv(csv_file)

# Function to parse notes and durations
def parse_notes(note_str):
    if '-' in note_str:
        return [float(n) for n in note_str.split('-')]
    else:
        return float(note_str)

data['Notes'] = data['Notes'].apply(parse_notes)
data['Duration'] = data['Duration'].apply(float)

# Normalize note frequencies
notes_flat = []
for note in data['Notes']:
    if isinstance(note, list):
        notes_flat.extend(note)
    else:
        notes_flat.append(note)

notes_flat = np.array(notes_flat).reshape(-1, 1)
scaler = MinMaxScaler(feature_range=(0, 1))
notes_flat_scaled = scaler.fit_transform(notes_flat)

# Map back to original data
note_idx = 0
for i in range(len(data)):
    if isinstance(data['Notes'].iloc[i], list):
        note_count = len(data['Notes'].iloc[i])
        data.at[i, 'Notes'] = notes_flat_scaled[note_idx:note_idx + note_count].flatten().tolist()
        note_idx += note_count
    else:
        data.at[i, 'Notes'] = notes_flat_scaled[note_idx][0]
        note_idx += 1

# Prepare sequences for LSTM model
sequence_length = 16  # Use a fixed length for LSTM input
max_notes_per_step = max(len(note) if isinstance(note, list) else 1 for note in data['Notes'])

X = []
y_notes = []
y_durations = []

for i in range(len(data) - sequence_length):
    notes_sequence = []
    durations_sequence = []
    for j in range(sequence_length):
        note = data['Notes'].iloc[i + j]
        if isinstance(note, list):
            notes_sequence.extend(note + [0] * (max_notes_per_step - len(note)))  # Pad with zeros
        else:
            notes_sequence.extend([note] + [0] * (max_notes_per_step - 1))  # Pad with zeros for consistency
        durations_sequence.append(data['Duration'].iloc[i + j])
    
    X.append(notes_sequence)
    next_note = data['Notes'].iloc[i + sequence_length]
    y_notes.append(next_note[0] if isinstance(next_note, list) else next_note)
    y_durations.append(data['Duration'].iloc[i + sequence_length])

X = np.array(X)
y_notes = np.array(y_notes)
y_durations = np.array(y_durations)

# Reshape X for LSTM input (samples, time steps, features)
n_features = max_notes_per_step  # Number of features per time step (notes per step)
X = X.reshape((X.shape[0], sequence_length, n_features))


In [5]:
# Build the LSTM model
inputs = Input(shape=(X.shape[1], X.shape[2]))
x = LSTM(256, return_sequences=True)(inputs)
x = Dropout(0.3)(x)
x = LSTM(256)(x)
x = Dropout(0.3)(x)

output_notes = Dense(1, name='notes_output')(x)  # Predict the next note
output_duration = Dense(1, name='duration_output')(x)  # Predict the next duration

model = Model(inputs=inputs, outputs=[output_notes, output_duration])
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X, [y_notes, y_durations], epochs=20, batch_size=64)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x26f9a54ac80>

In [8]:
# Function to generate a 20-second music sequence
def generate_music(model, seed_sequence, num_notes, scaler):
    generated_sequence = seed_sequence
    generated_durations = []
    
    for _ in range(num_notes):
        X_input = generated_sequence[-sequence_length:].reshape((1, sequence_length, -1))
        prediction_notes, prediction_duration = model.predict(X_input, verbose=0)
        
        # Rescale prediction to original note frequencies
        prediction_notes = scaler.inverse_transform(prediction_notes).flatten()
        
        # Ensure prediction_notes matches the number of features in generated_sequence
        prediction_notes_padded = np.zeros((generated_sequence.shape[1],))
        prediction_notes_padded[:len(prediction_notes)] = prediction_notes
        
        # Append predicted note and duration
        generated_sequence = np.vstack([generated_sequence, prediction_notes_padded])
        generated_durations.append(prediction_duration[0][0])
    
    return generated_sequence, generated_durations

# Generate a new music sequence
seed_sequence = X[0]  # Start with the first sequence in the dataset
note_duration = 0.5  # Default duration in seconds if prediction fails
total_duration = 20  # Total clip duration in seconds
num_notes = int(total_duration / note_duration)

generated_notes, generated_durations = generate_music(model, seed_sequence, num_notes, scaler)


In [9]:
# Function to generate a 20-second music sequence with debug prints
def generate_music(model, seed_sequence, num_notes, scaler):
    generated_sequence = seed_sequence
    generated_durations = []
    
    for i in range(num_notes):
        print(f"\nStep {i+1}/{num_notes}")
        X_input = generated_sequence[-sequence_length:].reshape((1, sequence_length, -1))
        prediction_notes, prediction_duration = model.predict(X_input, verbose=0)
        
        # Rescale prediction to original note frequencies
        prediction_notes = scaler.inverse_transform(prediction_notes).flatten()
        print(f"Predicted Notes (Rescaled): {prediction_notes}")
        print(f"Predicted Duration: {prediction_duration}")
        
        # Ensure prediction_notes matches the number of features in generated_sequence
        prediction_notes_padded = np.zeros((generated_sequence.shape[1],))
        prediction_notes_padded[:len(prediction_notes)] = prediction_notes
        
        # Append predicted note and duration
        generated_sequence = np.vstack([generated_sequence, prediction_notes_padded])
        generated_durations.append(prediction_duration[0][0])
        
        print(f"Generated Sequence Shape: {generated_sequence.shape}")
        print(f"Generated Durations: {generated_durations[-1]}")
    
    return generated_sequence, generated_durations

# Generate a new music sequence
seed_sequence = X[0]  # Start with the first sequence in the dataset
note_duration = 0.5  # Default duration in seconds if prediction fails
total_duration = 20  # Total clip duration in seconds
num_notes = int(total_duration / note_duration)

generated_notes, generated_durations = generate_music(model, seed_sequence, num_notes, scaler)

# Print final output
print("\nFinal Generated Notes:", generated_notes)
print("Final Generated Durations:", generated_durations)



Step 1/40
Predicted Notes (Rescaled): [767.1368]
Predicted Duration: [[0.94308233]]
Generated Sequence Shape: (17, 4)
Generated Durations: 0.943082332611084

Step 2/40
Predicted Notes (Rescaled): [841.1162]
Predicted Duration: [[0.9183592]]
Generated Sequence Shape: (18, 4)
Generated Durations: 0.9183592200279236

Step 3/40
Predicted Notes (Rescaled): [865.26654]
Predicted Duration: [[0.9354365]]
Generated Sequence Shape: (19, 4)
Generated Durations: 0.935436487197876

Step 4/40
Predicted Notes (Rescaled): [872.53217]
Predicted Duration: [[0.9424819]]
Generated Sequence Shape: (20, 4)
Generated Durations: 0.9424818754196167

Step 5/40
Predicted Notes (Rescaled): [876.8036]
Predicted Duration: [[0.94839543]]
Generated Sequence Shape: (21, 4)
Generated Durations: 0.9483954310417175

Step 6/40
Predicted Notes (Rescaled): [877.29144]
Predicted Duration: [[0.95003366]]
Generated Sequence Shape: (22, 4)
Generated Durations: 0.9500336647033691

Step 7/40
Predicted Notes (Rescaled): [878.6849

In [10]:
from pydub import AudioSegment
import numpy as np

# Function to create a sine wave for a note
def generate_sine_wave(frequency, duration, sample_rate=44100, amplitude=0.5):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    wave = amplitude * np.sin(2 * np.pi * frequency * t)
    return wave

# Create the audio file from the generated sequence
def create_note_sequence(notes, durations, sample_rate=44100):
    music = AudioSegment.silent(duration=0)  # Start with a silent segment
    for note, duration in zip(notes, durations):
        wave = generate_sine_wave(note, duration, sample_rate)
        audio = np.int16(wave * 32767)  # Convert to 16-bit PCM
        note_segment = AudioSegment(
            audio.tobytes(), 
            frame_rate=sample_rate, 
            sample_width=2, 
            channels=1
        )
        music += note_segment  # Append each note's segment to the music
    return music

# Generate and save the audio
generated_notes_flat = generated_notes.flatten()  # Flatten the array in case it's multi-dimensional
music = create_note_sequence(generated_notes_flat, generated_durations)
music.export("generated_8bit_music_20sec.wav", format="wav")

print("Music has been generated and saved as 'generated_8bit_music_20sec.wav'")


Music has been generated and saved as 'generated_8bit_music_20sec.wav'
