<a href="https://colab.research.google.com/github/brianellis1997/Music_Generation/blob/main/Model_Demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Model Demo
Here we will demo the model (XGBoost) and how it can give popularity predictions for our generative music.

In [1]:
!git clone https://github.com/brianellis1997/Music_Generation.git # Clone our repository

Cloning into 'Music_Generation'...
remote: Enumerating objects: 198, done.[K
remote: Counting objects: 100% (13/13), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 198 (delta 7), reused 6 (delta 6), pack-reused 185[K
Receiving objects: 100% (198/198), 30.98 MiB | 13.78 MiB/s, done.
Resolving deltas: 100% (93/93), done.


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
from music21 import *

# Load MIDI file
midi_file_path = '/content/drive/MyDrive/DS340/Generated_2nd_stage.mid'
midi_stream = converter.parse(midi_file_path)

In [4]:
# Extraction class
class Extraction:
    def __init__(self, midi_stream):
        self.midi_stream = midi_stream

    def key_signature_extract(self):
        key_signatures = self.midi_stream.recurse().getElementsByClass(key.KeySignature)
        if len(key_signatures) > 0:
            key_signature = key_signatures[0]
            key_name = key_signature.asKey().tonic.name  # Get the key name (e.g., 'C', 'G#')
        else:
            key_analysis = self.midi_stream.analyze('key')
            key_name = key_analysis.tonic.name  # Get the key name from the analysis

        # Define a mapping from key names to Spotify's numbers
        key_to_number = {
            'C': 0, 'C#': 1, 'D-': 1, 'D': 2, 'D#': 3, 'E-': 3, 'E': 4,
            'F': 5, 'F#': 6, 'G-': 6, 'G': 7, 'G#': 8, 'A-': 8,
            'A': 9, 'A#': 10, 'B-': 10, 'B': 11
        }

        # Account for both sharp and flat representations
        if key_name in key_to_number:
            return key_to_number[key_name]
        else:
            # Handle the case where the key might be represented differently (e.g., flats)
            # music21 might represent some keys differently, e.g., 'F#' could also be 'G-' (G flat)
            # This is a placeholder for handling such cases
            print("Key name not found in mapping:", key_name)
            return None


    def tempo_extract(self):
        tempos = []
        for event in self.midi_stream.flatten():
            if 'MetronomeMark' in event.classes:
                tempos.append(int(event.number))
        if tempos:
            return sum(tempos) / len(tempos)
        else:
            print("Tempo information not found in the MIDI file.")
            return None

    def duration_extract(self):
        quarter_lengths = self.midi_stream.duration.quarterLength
        tempo_value = self.tempo_extract()
        if tempo_value:  # Ensure tempo_value is not None
            duration_min = quarter_lengths / tempo_value
            duration_ms = duration_min * 60000  # Convert minutes to milliseconds
            return duration_ms
        else:
            return None

    def valence_extract(self):
        major_chords_count = 0
        minor_chords_count = 0
        chords = self.midi_stream.chordify()
        for chord in chords.recurse().getElementsByClass('Chord'):
            if chord.isMajorTriad():
                major_chords_count += 1
            elif chord.isMinorTriad():
                minor_chords_count += 1
        if major_chords_count > 0:
            return major_chords_count / (major_chords_count + minor_chords_count) if minor_chords_count > 0 else 1
        return 0

    def mode_extract(self):
        key = self.key_signature_extract()
        key_str = str(key)
        if 'major' in key_str:
            return 1  # Major mode
        else:
            return 0  # Minor or other modes

    def extract_danceability(self):
        # Assuming you've defined tempo_weight, mode_weight, and valence_weight previously
        tempo = self.tempo_extract()
        mode = self.mode_extract()
        valence = self.valence_extract()
        tempo_weight = 0.4
        mode_weight = 0.3
        valence_weight = 0.3
        normalized_tempo = min(max((tempo - 60) / (180 - 60), 0), 1) if tempo else 0
        danceability_score = (normalized_tempo * tempo_weight) + (mode * mode_weight) + (valence * valence_weight)
        return danceability_score

    def estimate_energy(self):
        avg_tempo = self.tempo_extract()
        notes_and_chords = self.midi_stream.recurse().notes
        total_duration = self.midi_stream.duration.quarterLength
        note_density = len(notes_and_chords) / total_duration if total_duration > 0 else 0
        velocities = [n.volume.velocityScalar for n in notes_and_chords if n.volume.velocityScalar is not None]
        avg_velocity = sum(velocities) / len(velocities) if velocities else 0.5
        energy_score = (avg_tempo / 120) + (note_density * 2) + (avg_velocity * 2)
        return min(energy_score / 10, 1.0)

    def loudness_extract(self):
        # Assuming you want to calculate the average velocity for the normalization process
        notes = self.midi_stream.recurse().notes
        velocities = [note.volume.velocity for note in notes if note.volume.velocity is not None]

        if velocities:
            avg_velocity = sum(velocities) / len(velocities)
        else:
            avg_velocity = 0  # Use a sensible default if no notes are found

        # Call the static method correctly using the class name
        avg_loudness = self.normalize_loudness(avg_velocity)
        return avg_loudness

    @staticmethod
    def normalize_loudness(velocity, min_loudness=-60, max_loudness=3.855):
        # Normalize MIDI velocity from 0-127 to 0-1
        normalized_velocity = velocity / 127

        # Scale to target loudness range
        scaled_loudness = (normalized_velocity * (max_loudness - min_loudness)) + min_loudness

        return scaled_loudness

In [None]:
# Initialize the Extraction class with the midi_stream
extraction = Extraction(midi_stream)

# Extract features
generated_key = extraction.key_signature_extract()
generated_tempo = extraction.tempo_extract()
generated_duration = extraction.duration_extract()
generated_valence = extraction.valence_extract()
generated_mode = extraction.mode_extract()
generated_danceability = extraction.extract_danceability()
generated_energy = extraction.estimate_energy()
generated_loudness = extraction.loudness_extract()

# Print extracted features
print("Key:", generated_key)
print("Tempo:", generated_tempo)
print("Duration:", generated_duration)
print("Valence:", generated_valence)
print("Mode:", generated_mode)
print("Danceability:", generated_danceability)
print("Energy:", generated_energy)
print("Loudness:", generated_loudness)

Key: 1
Tempo: 120.51982378854626
Duration: 246930.33116455883
Valence: 0.7735849056603774
Mode: 0
Danceability: 0.4338082176599341
Energy: 0.7516624991813022
Loudness: -29.48237006159789


In [5]:
# Load model
import xgboost as xgb

In [6]:
# Initialize a model instance
loaded_model = xgb.XGBRegressor()

# Load the saved model
loaded_model.load_model("/content/drive/MyDrive/DS340/best_xgb_model.json")  # Adjust path if necessary

In [7]:
import numpy as np
import joblib  # For loading the scaler

# Assuming 'Extraction' is your class for feature extraction
class PredictionPipeline:
    def __init__(self, model_path, scaler_path):
        # Load the XGBoost model
        self.model = xgb.XGBRegressor()
        self.model.load_model(model_path)

        # Load the scaler
        self.scaler = joblib.load(scaler_path)

        # Initialize the Extraction object (placeholder, need a midi_stream)
        self.extraction = None

    def extract_features(self, midi_path):
        # Load the MIDI file
        midi_stream = converter.parse(midi_path)
        self.extraction = Extraction(midi_stream)

        # Extract features
        features = np.array([
            self.extraction.valence_extract(),
            self.extraction.extract_danceability(),
            self.extraction.duration_extract(),
            self.extraction.estimate_energy(),
            self.extraction.key_signature_extract(),
            self.extraction.loudness_extract(),
            self.extraction.mode_extract(),
            self.extraction.tempo_extract()
        ]).reshape(1, -1)  # Reshape for a single sample

        return features

    def predict(self, midi_path):
        # Extract features
        features = self.extract_features(midi_path)

        # Scale features
        features_scaled = self.scaler.transform(features)

        # Make prediction
        prediction = self.model.predict(features_scaled)

        return prediction

# Example usage
pipeline = PredictionPipeline('/content/drive/MyDrive/DS340/best_xgb_model.json', '/content/drive/MyDrive/DS340/scaler.joblib')
prediction = pipeline.predict('/content/drive/MyDrive/DS340/Generated_2nd_stage.mid')
print("Prediction:", prediction)


Prediction: [25.183252]




In [8]:
# Prediction for my music
prediction = pipeline.predict('/content/drive/MyDrive/DS340/M6 - Full score - Flow 1.mid')
print("Prediction:", prediction)

Prediction: [25.573494]




In [23]:
# Bach popularity prediction
prediction = pipeline.predict('/content/drive/MyDrive/DS340/Johann Sebastian Bach/Minuet in G.mid')
print("Prediction:", prediction)

Prediction: [43.15239]




In [33]:
# Bach popularity prediction
prediction = pipeline.predict('/content/drive/MyDrive/DS340/Johann Sebastian Bach/Prelude in C Minor.mid')
print("Prediction:", prediction)

Prediction: [57.030502]




# Generate new music and make Popularity prediction

In [None]:
%ls

[0m[01;34mdrive[0m/  [01;34mMusic_Generation[0m/  [01;34msample_data[0m/


In [None]:
# Clone repository
!git clone https://github.com/slSeanWU/Compose_and_Embellish.git # Parent Paper's repo
%cd Compose_and_Embellish

# Install libraries
!pip install -r requirements.txt

# Install pre-trained transformers (15 min runtime)
!pip install git+https://github.com/cifkao/fast-transformers.git@39e726864d1a279c9719d33a95868a4ea2fb5ac5
!git clone https://huggingface.co/slseanwu/compose-and-embellish-pop1k7
!pip install miditoolkit

Cloning into 'Compose_and_Embellish'...
remote: Enumerating objects: 74, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (4/4), done.[K
remote: Total 74 (delta 9), reused 8 (delta 8), pack-reused 62[K
Receiving objects: 100% (74/74), 55.03 KiB | 955.00 KiB/s, done.
Resolving deltas: 100% (26/26), done.
/content/Compose_and_Embellish
Collecting miditoolkit (from -r requirements.txt (line 5))
  Downloading miditoolkit-1.0.1-py3-none-any.whl (24 kB)
[31mERROR: Could not find a version that satisfies the requirement torch==1.6.0 (from versions: 1.11.0, 1.12.0, 1.12.1, 1.13.0, 1.13.1, 2.0.0, 2.0.1, 2.1.0, 2.1.1, 2.1.2, 2.2.0, 2.2.1, 2.2.2)[0m[31m
[0m[31mERROR: No matching distribution found for torch==1.6.0[0m[31m
[0mCollecting git+https://github.com/cifkao/fast-transformers.git@39e726864d1a279c9719d33a95868a4ea2fb5ac5
  Cloning https://github.com/cifkao/fast-transformers.git (to revision 39e726864d1a279c9719d33a95868a4ea2fb5ac5) to /tmp

In [None]:
# Compose

# Generating a leadsheet
!python3 stage01_compose/inference.py \
  stage01_compose/config/pop1k7_finetune.yaml \
  generation/stage01 \
  1   # Generate one leadsheet

[nucleus parameters] t = 1.2, p = 0.97
[info] # params: 41331059
[global tempo] 83
 -- generating leadsheet #1 of 1
[info] generated 1 bars, #events = 11
[info] generated 2 bars, #events = 36
[info] generated 3 bars, #events = 50
[info] generated 4 bars, #events = 76
[info] generated 5 bars, #events = 94
[info] generated 6 bars, #events = 120
[info] generated 7 bars, #events = 143
[info] generated 8 bars, #events = 166
[info] generated 9 bars, #events = 185
[info] generated 10 bars, #events = 210
[info] generated 11 bars, #events = 233
[info] generated 12 bars, #events = 265
[info] generated 13 bars, #events = 294
[info] generated 14 bars, #events = 317
[info] generated 15 bars, #events = 336
[info] generated 16 bars, #events = 359
[info] generated 17 bars, #events = 382
[info] generated 18 bars, #events = 414
[info] generated 19 bars, #events = 440
[info] generated 20 bars, #events = 460
[info] generated 21 bars, #events = 479
[info] generated 22 bars, #events = 502
[info] generated 2

In [None]:
# Embellish

# We will embellish our generated leadsheet
!python3 stage02_embellish/inference.py \
  stage02_embellish/config/pop1k7_default.yaml \
  generation/stage01 \
  generation/stage02

[preparing data] now at #0
[info] model init completed
[info] model loaded
[# pieces] 1
The boolean parameter 'some' has been replaced with a string parameter 'mode'.
Q, R = torch.qr(A, some)
should be replaced with
Q, R = torch.linalg.qr(A, 'reduced' if some else 'complete') (Triggered internally at ../aten/src/ATen/native/BatchLinearAlgebra.cpp:2426.)
  Q, _ = torch.qr(block)
[info] generated 1 bars, #events = 58
[info] generated 2 bars, #events = 155
[info] generated 3 bars, #events = 215
[info] generated 4 bars, #events = 320
[info] generated 5 bars, #events = 394
[info] generated 6 bars, #events = 514
[info] generated 7 bars, #events = 618
[info] generated 8 bars, #events = 730
[info] generated 9 bars, #events = 845
[info] generated 10 bars, #events = 959
[info] generated 11 bars, #events = 1076
[info] generated 12 bars, #events = 1209
[info] generated 13 bars, #events = 1339
[info] generated 14 bars, #events = 1477
[info] generated 15 bars, #events = 1603
[info] generated 16 bars

# Listen and View Generation

In [None]:
# installs and imports to convert MIDI into audio
!pip install pretty_midi
!wget https://www.dropbox.com/s/4x27l49kxcwamp5/GeneralUser_GS_1.471.zip
!unzip GeneralUser_GS_1.471.zip
!apt install -y fluidsynth
from pretty_midi import PrettyMIDI
from IPython.display import Audio
from scipy.io.wavfile import write
import librosa

Collecting pretty_midi
  Downloading pretty_midi-0.2.10.tar.gz (5.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m19.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pretty_midi
  Building wheel for pretty_midi (setup.py) ... [?25l[?25hdone
  Created wheel for pretty_midi: filename=pretty_midi-0.2.10-py3-none-any.whl size=5592289 sha256=cb5901f2cdaf48f64670d5ab58c38d9b6dd446ba0735008bdac007039bc0f881
  Stored in directory: /root/.cache/pip/wheels/cd/a5/30/7b8b7f58709f5150f67f98fde4b891ebf0be9ef07a8af49f25
Successfully built pretty_midi
Installing collected packages: pretty_midi
Successfully installed pretty_midi-0.2.10
--2024-04-01 23:00:35--  https://www.dropbox.com/s/4x27l49kxcwamp5/GeneralUser_GS_1.471.zip
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443..

In [None]:
##########
# LISTEN #
##########

# render the first stage
# !fluidsynth -ni GeneralUser\ GS\ 1.471/GeneralUser\ GS\ v1.471.sf2 generation/stage01/samp_01.mid -F first_stage.wav -r 44100

# render the second stage
!fluidsynth -ni GeneralUser\ GS\ 1.471/GeneralUser\ GS\ v1.471.sf2 /content/Compose_and_Embellish/generation/stage02/samp_01_2stage_samp01.mid -F second_stage.wav -r 44100

# # uncomment if you want to hear the melody
# # generated in the first stage
# # hear the first stage
# x,sr=librosa.load('first_stage.wav')
# Audio(x,rate=sr)

# hear the second stage
x,sr=librosa.load('second_stage.wav')
Audio(x,rate=sr)

FluidSynth runtime version 2.2.5
Copyright (C) 2000-2022 Peter Hanappe and others.
Distributed under the LGPL license.
SoundFont(R) is a registered trademark of Creative Technology Ltd.

Rendering audio to file 'second_stage.wav'..


In [None]:
# Popularity prediction of newly generated piece
pipeline = PredictionPipeline('/content/drive/MyDrive/DS340/best_xgb_model.json', '/content/drive/MyDrive/DS340/scaler.joblib')
prediction = pipeline.predict('/content/Compose_and_Embellish/generation/stage02/samp_01_2stage_samp01.mid')
print("Prediction:", prediction)

Prediction: [19.75657]


