In [1]:
# -*- coding: utf-8 -*-
"""
@author: Giovanni Di Liberto
See description in the assignment instructions.
"""

import os
from mido import MidiFile, MidiTrack, Message

# Define the note dictionary with frequencies (this can also help determine pitch)
NOTE_FREQUENCIES = {
    'C': 261.63,
    'c': 277.18,  # C#
    'D': 293.66,
    'd': 311.13,  # D#
    'E': 329.63,
    'F': 349.23,
    'f': 369.99,  # F#
    'G': 392.00,
    'g': 415.30,  # G#
    'A': 440.00,
    'a': 466.16,  # A#
    'B': 493.88,
}

# Map MIDI note numbers to note names (ignoring octaves)
MIDI_NOTE_TO_NAME = {
    0: 'C', 1: 'c', 2: 'D', 3: 'd', 4: 'E', 5: 'F', 6: 'f', 7: 'G', 8: 'g', 9: 'A', 10: 'a', 11: 'B'
}

# Mapping of MIDI program numbers (0-127)
PROGRAM_TO_INSTRUMENT = {
    0: 'Acoustic Grand Piano', 1: 'Bright Acoustic Piano', 2: 'Electric Grand Piano',
    3: 'Honky-Tonk Piano', 4: 'Electric Piano 1', 5: 'Electric Piano 2', 6: 'Harpsichord',
    7: 'Clavinet', 8: 'Celesta', 9: 'Glockenspiel', 10: 'Music Box', 11: 'Vibraphone',
    12: 'Marimba', 13: 'Xylophone', 14: 'Tubular Bells', 15: 'Dulcimer', 16: 'Drawbar Organ',
    17: 'Percussive Organ', 18: 'Rock Organ', 19: 'Church Organ', 20: 'Reed Organ',
    21: 'Accordion', 22: 'Harmonica', 23: 'Tango Accordion', 24: 'Acoustic Guitar (nylon)',
    25: 'Acoustic Guitar (steel)', 26: 'Electric Guitar (jazz)', 27: 'Electric Guitar (clean)',
    28: 'Electric Guitar (muted)', 29: 'Overdriven Guitar', 30: 'Distorted Guitar',
    31: 'Guitar Harmonics', 32: 'Acoustic Bass', 33: 'Electric Bass (finger)', 34: 'Electric Bass (pick)',
    35: 'Fretless Bass', 36: 'Slap Bass 1', 37: 'Slap Bass 2', 38: 'Synth Bass 1',
    39: 'Synth Bass 2', 40: 'Violin', 41: 'Viola', 42: 'Cello', 43: 'Contrabass',
    44: 'Tremolo Strings', 45: 'Pizzicato Strings', 46: 'Orchestral Harp', 47: 'Timpani',
    48: 'String Ensemble 1', 49: 'String Ensemble 2', 50: 'SynthStrings 1', 51: 'SynthStrings 2',
    52: 'Choir Aahs', 53: 'Voice Oohs', 54: 'Synth Voice', 55: 'Orchestra Hit', 56: 'Trumpet',
    57: 'Trombone', 58: 'Tuba', 59: 'Muted Trumpet', 60: 'Flugelhorn', 61: 'French Horn',
    62: 'Brass Section', 63: 'SynthBrass 1', 64: 'SynthBrass 2', 65: 'Soprano Sax',
    66: 'Alto Sax', 67: 'Tenor Sax', 68: 'Baritone Sax', 69: 'Oboe', 70: 'English Horn',
    71: 'Bassoon', 72: 'Clarinet', 73: 'Piccolo', 74: 'Flute', 75: 'Recorder',
    76: 'Pan Flute', 77: 'Blown Bottle', 78: 'Shakuhachi', 79: 'Whistle', 80: 'Ocarina',
    81: 'Lead 1 (square)', 82: 'Lead 2 (sawtooth)', 83: 'Lead 3 (calliope)', 84: 'Lead 4 (chiff)',
    85: 'Lead 5 (charang)', 86: 'Lead 6 (voice)', 87: 'Lead 7 (fifths)', 88: 'Lead 8 (bass + lead)',
    89: 'Pad 1 (new age)', 90: 'Pad 2 (warm)', 91: 'Pad 3 (polysynth)', 92: 'Pad 4 (choir)',
    93: 'Pad 5 (bowed)', 94: 'Pad 6 (metallic)', 95: 'Pad 7 (halo)', 96: 'Pad 8 (sweep)',
    97: 'FX 1 (rain)', 98: 'FX 2 (soundtrack)', 99: 'FX 3 (crystal)', 100: 'FX 4 (atmosphere)',
    101: 'FX 5 (brightness)', 102: 'FX 6 (goblins)', 103: 'FX 7 (echoes)', 104: 'FX 8 (sci-fi)',
    105: 'Sitar', 106: 'Banjo', 107: 'Shamisen', 108: 'Koto', 109: 'Kalimba', 110: 'Bag pipe',
    111: 'Fiddle', 112: 'Shanai', 113: 'Tinkle Bell', 114: 'Agogo', 115: 'Steel Drums',
    116: 'Woodblock', 117: 'Taiko Drum', 118: 'Melodic Tom', 119: 'Synth Drum', 120: 'Reverse Cymbal',
    121: 'Guitar Fret Noise', 122: 'Breath Noise', 123: 'Seashore', 124: 'Bird Tweet', 125: 'Telephone',
    126: 'Helicopter', 127: 'Applause'
}

# Directory containing the MIDI files
midi_dir = '../musicDatasetOriginal'

# Directory to store the resulting MIDI files
output_dir = 'musicDatasetSimplified'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    
# List to store the text sequences
text_sequences = []
# List to store the velocity sequences
velocity_sequences = []
# List to store the duration sequences
duration_sequences = []

In [2]:
instruments_list=[]

def find_instruments(midi_path):
    current_instrument = "None"
    midi = MidiFile(midi_path)
    for track in midi.tracks:
        for msg in track:
            if msg.type == 'program_change':
                current_instrument = PROGRAM_TO_INSTRUMENT.get(msg.program, "Unknown Instrument")
            if current_instrument != "None" and current_instrument not in instruments_list: 
                    instruments_list.append(current_instrument)

for file_name in os.listdir(midi_dir):
    if file_name.endswith('.mid'):
        midi_path = os.path.join(midi_dir, file_name)
        find_instruments(midi_path)

In [3]:
instruments_list

['Acoustic Grand Piano']

In [4]:
#Function to find out different message type of MIDI Files

msgType_list=[]

def find_msgType(midi_path):
    current_msgType = "None"
    midi = MidiFile(midi_path)
    for track in midi.tracks:
        for msg in track:
            if msg.type != "None" and msg.type not in msgType_list:
                msgType_list.append(msg.type)

for file_name in os.listdir(midi_dir):
    if file_name.endswith('.mid'):
        midi_path = os.path.join(midi_dir, file_name)
        find_msgType(midi_path)

In [5]:
msgType_list

['track_name',
 'time_signature',
 'key_signature',
 'set_tempo',
 'control_change',
 'program_change',
 'midi_port',
 'note_on',
 'end_of_track',
 'pitchwheel']

In [6]:
# Function to convert MIDI file to text sequence

def midi_to_text_sequence(midi_path):
    midi = MidiFile(midi_path)
    sequence = []
    active_notes = {}
    last_time = 0
    previous_time = 0

    for track in midi.tracks:
        for msg in track:
            time_elapsed = last_time + msg.time

            if msg.type == 'note_on' and msg.velocity > 0:
                midi_note = msg.note
                note = MIDI_NOTE_TO_NAME.get(midi_note % 12, '')
                octave = (midi_note // 12) - 1
                
                if time_elapsed > previous_time:
                    rest_duration = time_elapsed - previous_time
                    if rest_duration > 0:
                        sequence.append(f"R({rest_duration}) ")
                
                active_notes[midi_note] = time_elapsed
                previous_time = time_elapsed
                last_time = time_elapsed

            elif msg.type == 'note_on' and msg.velocity == 0:
                midi_note = msg.note
                if midi_note in active_notes:
                    start_time = active_notes.pop(midi_note)
                    duration = time_elapsed - start_time
                    note = MIDI_NOTE_TO_NAME.get(midi_note % 12, '')
                    octave = (midi_note // 12) - 1
                    
                    sequence.append(f"{note}{octave}({duration}) ")
                    previous_time = time_elapsed
                    last_time = time_elapsed

            else:
                last_time += msg.time

    return ''.join(sequence)

In [7]:
# Process each MIDI file in the directory
for file_name in os.listdir(midi_dir):
    if file_name.endswith('.mid'):
        midi_path = os.path.join(midi_dir, file_name)
        text_sequence = midi_to_text_sequence(midi_path)
        if text_sequence:
            text_sequences.append(text_sequence)
        else:
            print(f"No notes found in {file_name}")

# Write the text sequences to a file
with open("inputMelodies.txt", "w") as file:
    for sequence in text_sequences:
        file.write(sequence + "\n")
    
print("Text sequences have been written to inputMelodies.txt")

Text sequences have been written to inputMelodies.txt
