In [29]:
import pandas as pd
import math
import ast

In [52]:
note2number = {
    'B#': 0,
    'C': 0,
    'C#': 1,
    'Db': 1,
    'D': 2,
    'D#': 3,
    'Eb': 3,
    'E': 4,
    'Fb': 4,
    'E#': 5,
    'F': 5,
    'F#': 6,
    'Gb': 6,
    'G': 7,
    'G#': 8,
    'Ab': 8,
    'A': 9,
    'A#': 10,
    'Bb': 10,
    'B': 11,
    'Cb': 11
}

def key2number(key):
    tonic = 0
    tonality = ''
    if len(key) > 2: # e.g. C#m
        tonality = 'min'
        tonic = note2number[key[:2]]
    elif len(key) == 2: # e.g. Dm or Db
        if key in note2number: # e.g. Db
            tonality = 'maj'
            tonic = note2number[key]
        else: # e.g. Dm
            tonality = 'min'
            tonic = note2number[key[:1]]
    else: # e.g. C
        tonality = 'maj'
        tonic = note2number[key]
    return tonic, tonality

def chord2number(chord, tonic):
    copy = chord

    # convert root to number
    root = 0
    root_length = 0
    if copy[:2] in note2number: root_length = 2 # e.g. C#
    elif copy[:1] in note2number: root_length = 1 # e.g. C
    root = note2number[copy[:root_length]]
    root_relative = root - tonic + 12 if root - tonic < 0 else root - tonic
    copy = str(int(root_relative)) + copy[root_length:]

    # check if slash chord
    slash_index = 0
    for index, char in enumerate(copy):
        if char == '/': slash_index = index
    
    # convert if slash chord
    if slash_index > 0:
        slash_root = 0
        slash_root_relative = 0
        slash_root = note2number[copy[slash_index + 1:]]
        slash_root_relative = slash_root - tonic + 12 if slash_root - tonic < 0 else slash_root - tonic
        copy = copy[:slash_index + 1] + str(int(slash_root_relative))

    return copy

In [53]:
import math
csv = pd.read_csv('./1980.csv')
converted_list = []
i = 0
for row in csv.itertuples(index = True):
    tonic, tonality = key2number(row.key)
    if not math.isnan(row.capo):
        tonic = tonic - row.capo + 12 if tonic - row.capo < 0 else tonic - row.capo
    chords = ast.literal_eval(row.chords)
    converted_chords = ''
    for line_idx, line in enumerate(chords):
        for chord_idx, chord in enumerate(line):
            converted_chords += chord2number(chord, tonic)
            if line_idx != len(chords) - 1 or chord_idx != len(line) - 1: # only for the last chord
                converted_chords += ' '
    converted_list.append([0 if tonality == 'maj' else 1, converted_chords])
    i += 1
    if i > 20: break

test_df = pd.DataFrame(converted_list)
converted_list

[[0,
  '0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 2m 7 0 9m 2m 7 0 9m 2m 7 0 9m 2m 7 0 0 9m'],
 [0,
  '0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 4 5 5m 9m 7 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 4 5 5m 9m 7 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 9m 4 5 0 4 5 5m 9m 7'],
 [0,
  '6maj7 57 10m7 9m7 17 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 17 6maj7 57 10m7 6maj7 57 4maj7 37 2maj7 17 6maj7 1113 6maj7 57 4maj7 37 2maj7 17 6maj7 1113 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 17 6maj7 57 10m7 6maj7 57 10m7 9m7 8m7 1