In [36]:
# Import necessary libraries
import numpy as np
import random
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from pydub import AudioSegment


In [37]:
# Step 1: Define Music Theory Elements
# Define frequencies for the C Major, G Major, and A Minor scales (in Hz)
c_major_scale = [261.63, 293.66, 329.63, 349.23, 392.00, 440.00, 493.88, 523.25]  # C4 to C5
g_major_scale = [392.00, 440.00, 493.88, 523.25, 587.33, 659.25, 739.99, 783.99]  # G4 to G5
a_minor_scale = [220.00, 246.94, 261.63, 293.66, 329.63, 349.23, 392.00, 440.00]  # A3 to A4

# Chord progressions in C Major, G Major, and A Minor
chord_progressions = {
    "I-IV-V": [[261.63, 329.63, 392.00], [349.23, 440.00, 523.25], [392.00, 493.88, 261.63]],  # C-F-G
    "ii-V-I": [[293.66, 349.23, 440.00], [392.00, 493.88, 261.63], [261.63, 329.63, 392.00]],  # Dm-G-C
    "vi-IV-I-V": [[220.00, 293.66, 349.23], [349.23, 440.00, 523.25], [261.63, 329.63, 392.00], [392.00, 493.88, 261.63]]  # Am-F-C-G
}

# Durations (in seconds)
durations = [0.25, 0.5, 0.75, 1.0]  # Quarters, halves, etc.


In [38]:
# Step 2: Generate a Complex Music Theory-Inspired Dataset
def generate_complex_music_sequence(scales, chord_progressions, num_sequences=5):
    dataset = []
    for _ in range(num_sequences):
        sequence = []
        current_scale = random.choice(scales)
        for _ in range(random.randint(16, 32)):  # Longer sequences for more depth
            progression_type = random.random()
            if progression_type < 0.4:  # 40% chance of a scale note
                note = random.choice(current_scale)
                duration = random.choice(durations)
                sequence.append((note, duration))
            elif progression_type < 0.7:  # 30% chance of a chord progression
                chord = random.choice(list(chord_progressions.values()))
                duration = random.choice(durations)
                sequence.append((chord, duration))
            elif progression_type < 0.9:  # 20% chance of modulation
                current_scale = random.choice(scales)
            else:  # 10% chance of dynamic change or rest
                if random.random() < 0.5:
                    note = random.choice(current_scale)
                    duration = random.choice(durations) * 2  # Longer note for intensity
                    sequence.append((note, duration))
                else:
                    # Introduce a rest (silence)
                    sequence.append((0, random.choice(durations)))  # 0 frequency for rest
        dataset.append(sequence)
    return dataset

# Define different scales
scales = [c_major_scale, g_major_scale, a_minor_scale]

# Generate the complex dataset
complex_music_theory_dataset = generate_complex_music_sequence(scales, chord_progressions)


In [39]:
# Updated Preprocess Function with Chord Averaging
def preprocess_dataset(dataset):
    flat_notes, flat_durations = [], []
    
    for sequence in dataset:
        for element in sequence:
            note, duration = element[0], element[1]
            
            if isinstance(note, list):  # If it's a chord
                average_note = np.mean(note)  # Take the average frequency of the chord
                flat_notes.append(average_note)
                flat_durations.append(duration)
            elif isinstance(note, (int, float)):  # It's already a single note
                flat_notes.append(note)
                flat_durations.append(duration)
            else:
                raise ValueError(f"Unexpected note type: {type(note)} with value {note}")
                
    return np.array(flat_notes).reshape(-1, 1), np.array(flat_durations).reshape(-1, 1)

# Generate the notes and durations arrays
notes, durations = preprocess_dataset(complex_music_theory_dataset)

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
notes_scaled = scaler.fit_transform(notes)


In [40]:
# Step 4: Build and Train the LSTM Model
# Prepare data for LSTM
sequence_length = 16
X = []
y = []
for i in range(len(notes_scaled) - sequence_length):
    X.append(notes_scaled[i:i + sequence_length])
    y.append(notes_scaled[i + sequence_length])

X = np.array(X)
y = np.array(y)

# Define the LSTM model
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(sequence_length, 1)))
model.add(Dropout(0.2))
model.add(LSTM(128))
model.add(Dropout(0.2))
model.add(Dense(1, activation='linear'))
model.compile(loss='mean_squared_error', optimizer='adam')

# Train the model
model.fit(X, y, epochs=100, batch_size=64)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.callbacks.History at 0x213b32ba470>

In [41]:
# Step 5: Define Functions for Music Generation and Conversion
# Function to generate a square wave for 8-bit sound
def generate_square_wave(frequency, duration, sample_rate=44100, amplitude=0.5):
    t = np.linspace(0, duration, int(sample_rate * duration), False)
    wave = amplitude * np.sign(np.sin(2 * np.pi * frequency * t))
    return wave

# Create the audio file from the generated sequence
def create_8bit_music_clip(sequence, sample_rate=44100, apply_counter_melody=False):
    music = AudioSegment.silent(duration=0)  # Start with silence
    counter_melody = []
    
    for element in sequence:
        note, duration = element[0], element[1]
        if isinstance(note, list):  # Handle chords
            chord_wave = sum(generate_square_wave(n, duration, sample_rate) for n in note) / len(note)
            audio = np.int16(chord_wave * 32767)
            if apply_counter_melody:
                counter_note = random.choice(note) * 1.5  # Simple counter melody one octave higher
                counter_melody.append((counter_note, duration))
        else:
            wave = generate_square_wave(note, duration, sample_rate)
            audio = np.int16(wave * 32767)
            if apply_counter_melody:
                counter_note = note * 1.5
                counter_melody.append((counter_note, duration))
        
        note_segment = AudioSegment(
            audio.tobytes(), 
            frame_rate=sample_rate, 
            sample_width=2, 
            channels=1
        )
        music += note_segment  # Append each note's segment to the music
    
    # Add counter-melody if applicable
    if apply_counter_melody:
        counter_melody_audio = create_8bit_music_clip(counter_melody, sample_rate=False)
        music = music.overlay(counter_melody_audio)
    
    return music

# Function to generate a 20-second music sequence
def generate_music(model, seed_sequence, num_notes, scaler):
    generated_sequence = seed_sequence
    for _ in range(num_notes):
        X_input = generated_sequence[-sequence_length:].reshape(1, sequence_length, 1)
        prediction = model.predict(X_input, verbose=0)
        generated_sequence = np.vstack([generated_sequence, prediction])
    return scaler.inverse_transform(generated_sequence).flatten()


In [42]:
# Updated function to ensure different start sequences
def generate_and_save_nuanced_clips(model, dataset, num_clips=5, clip_duration=10, sample_rate=44100):
    for i in range(num_clips):
        # Select a random sequence from the dataset as a seed
        random_start_index = random.randint(0, len(dataset) - 1)
        seed_sequence = np.array(preprocess_dataset([dataset[random_start_index]])[0]).reshape(-1, 1)
        
        # Ensure the seed sequence length matches the LSTM input sequence length
        if seed_sequence.shape[0] < sequence_length:
            seed_sequence = np.pad(seed_sequence, ((0, sequence_length - seed_sequence.shape[0]), (0, 0)), 'constant')
        else:
            seed_sequence = seed_sequence[:sequence_length]
        
        # Generate the number of notes based on the desired clip duration
        num_notes = int(clip_duration / random.choice(durations))
        generated_notes = generate_music(model, seed_sequence, num_notes, scaler)
        
        # Create an 8-bit music clip with more complexity
        clip = create_8bit_music_clip(list(zip(generated_notes, [0.5] * num_notes)), apply_counter_melody=True)
        clip_name = f"nuanced_8bit_music_clip_{i+1}.wav"
        clip.export(clip_name, format="wav")
        print(f"Generated and saved '{clip_name}'")

# Generate and save 5 nuanced clips
generate_and_save_nuanced_clips(model, complex_music_theory_dataset, num_clips=5)


Generated and saved 'nuanced_8bit_music_clip_1.wav'
Generated and saved 'nuanced_8bit_music_clip_2.wav'
Generated and saved 'nuanced_8bit_music_clip_3.wav'
Generated and saved 'nuanced_8bit_music_clip_4.wav'
Generated and saved 'nuanced_8bit_music_clip_5.wav'
