# POP909 Dataset Converter (ver. 1.0)

### https://github.com/music-x-lab/POP909-Dataset

***

Powered by tegridy-tools: https://github.com/asigalov61/tegridy-tools

***

#### Project Los Angeles

#### Tegridy Code 2023

***

# (SETUP ENVIRONMENT)

In [None]:
#@title Install all dependencies (run only once per session)
!git clone https://github.com/music-x-lab/POP909-Dataset
!git clone https://github.com/asigalov61/tegridy-tools
!pip install tqdm

In [None]:
#@title Import all needed modules

print('Loading needed modules. Please wait...')
import os
import copy
import math
import statistics
import random

from tqdm import tqdm

if not os.path.exists('/content/Dataset'):
    os.makedirs('/content/Dataset')

if not os.path.exists('/content/Out'):
    os.makedirs('/content/Out')

print('Loading TMIDIX module...')
os.chdir('/content/tegridy-tools/tegridy-tools')

import TMIDIX

print('Done!')

os.chdir('/content/')
print('Enjoy! :)')

# (FILE LIST)

In [None]:
#@title Save file list
###########

print('Loading MIDI files...')
print('This may take a while on a large dataset in particular.')

dataset_addr = "/content/POP909-Dataset"
# os.chdir(dataset_addr)
filez = list()
for (dirpath, dirnames, filenames) in os.walk(dataset_addr):
    filez += [os.path.join(dirpath, file) for file in filenames]
print('=' * 70)

if filez == []:
    print('Could not find any MIDI files. Please check Dataset dir...')
    print('=' * 70)

print('Randomizing file list...')
random.shuffle(filez)

TMIDIX.Tegridy_Any_Pickle_File_Writer(filez, '/content/filez')

In [None]:
#@title Load file list
filez = TMIDIX.Tegridy_Any_Pickle_File_Reader('/content/filez')
print('Done!')

# (PROCESS)

In [None]:
#@title Convert

print('=' * 70)
print('TMIDIX MIDI Processor')
print('=' * 70)
print('Starting up...')
print('=' * 70)

###########

START_FILE_NUMBER = 0
LAST_SAVED_BATCH_COUNT = 0

input_files_count = START_FILE_NUMBER
files_count = LAST_SAVED_BATCH_COUNT

melody_chords_f = []
stats = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

###########

###########

print('Processing MIDI files. Please wait...')
print('=' * 70)

for f in tqdm(filez[START_FILE_NUMBER:]):
    try:
        input_files_count += 1

        fn = os.path.basename(f)
        fn1 = fn.split('.')[0]

        # Filtering out giant MIDIs
        file_size = os.path.getsize(f)

        if file_size < 200000 and fn.split('.')[1] == 'mid':

          #=======================================================
          # START PROCESSING

          # Convering MIDI to ms score with MIDI.py module
          score = TMIDIX.midi2ms_score(open(f, 'rb').read())

          events_matrix1 = []
          itrack = 1
          track = 0
          while itrack < len(score):
              for event in score[itrack]:         
                  if event[0] == 'note':
                      if itrack == 2:
                        event[3] = 10
                      if itrack == 3:
                        event[3] = 11
                      if itrack == 4:
                        event[3] = 12
                      events_matrix1.append(event)
              itrack += 1

          if min([y[1] for y in events_matrix1]) >=0 and min([y[2] for y in events_matrix1]) >= 0:

            events_matrix1.sort(key=lambda x: x[4], reverse=True)
            events_matrix1.sort(key=lambda x: x[1])

            events_matrix2 = []

            for e in events_matrix1:
              if e[1] >= 0 and e[2] > 0:
                if e[3] == 10:
                  e[3] = 3 # Melodies / Violin
                
                if e[3] == 11:
                  e[3] = 7 # Bridges / Sax

                if e[3] == 12:
                  e[3] = 0 # Accompaniment / Piano

                events_matrix2.append(e)

            events_matrix2.sort(key=lambda x: x[4], reverse=True)
            events_matrix2.sort(key=lambda x: x[1])


            chords = []
            cho = []
            pe = events_matrix2[0]
            for e in events_matrix2:
              
              if e[1] - pe[1] == 0:
                cho.append(e)
              else:
                if len(cho) > 0:
                  cho.sort(key=lambda x: x[4], reverse=True)
                  chords.append(cho)
                cho = []
                cho.append(e)

              pe = e
            
            if len(cho) > 0:
              cho.sort(key=lambda x: x[4], reverse=True)
              chords.append(cho)

            for c in chords:
              if c[-1][4] <= 53:
                c[-1][3] = 2 # Bass Melody / Fretless Bass

            SONG = []

            time = 0
            pt = chords[0][0][1]
            for c in chords:
              pct = c[0][1]
              time += c[0][1] - pt
              for cc in c:
                cc[1] = time
              SONG.extend(c)
              pt = pct

            SONG.sort(key=lambda x: x[4], reverse=True)
            SONG.sort(key=lambda x: x[1])

            detailed_stats = TMIDIX.Tegridy_SONG_to_MIDI_Converter(SONG,
                                                      output_signature = 'POP909-'+fn1,  
                                                      output_file_name = '/content/Out/POP909-'+fn1, 
                                                      track_name='Project Los Angeles',
                                                      list_of_MIDI_patches=[0, 24, 35, 40, 42, 46, 56, 65, 73, 0, 53, 19, 0, 0, 0, 0],
                                                      number_of_ticks_per_quarter=500,
                                                      verbose=False)


            #=======================================================
            
            # Processed files counter
            files_count += 1
   
    except KeyboardInterrupt:
        print('Saving current progress and quitting...')
        break  

    except Exception as ex:
        print('WARNING !!!')
        print('=' * 70)
        print('Bad MIDI:', f)
        print('Error detected:', ex)
        print('=' * 70)
        continue

print('=' * 70)
print('Done!')   
print('=' * 70)

# Congrats! You did it! :)