In [67]:
!pip install seaborn



In [68]:
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import re
sns.set()

In [112]:
def extract_value(x, key):
    try:
        if x[key] in ['n/a', 'NA']:
            return '<unk>'
        else:
            return x[key]
    except:
        return '<unk>'

In [117]:
# ingest MIDI metadata
midi_metadatas = []
for file in tqdm(glob.glob("../data/piano-labelled/annotations/*.json")):
    print(file)    
    data = pd.read_json(file)
    annotations = data[['annotations']].dropna(subset=['annotations']).reset_index().rename(columns={'index': 'piece_id'})

    # remove trailing _no from the piece_id
    # fetch the piece NLP annotation description    
    annotations['piece_id'] = annotations['piece_id'].apply(lambda x: re.sub(r'_([0-9]+)', '', x))
    annotations['piece_description'] = annotations['annotations'].apply(lambda x: extract_value(x, 'ex1_description'))
    annotations['piece_arousal'] = annotations['annotations'].apply(lambda x: extract_value(x, 'ex2_arousal'))
    annotations = annotations.dropna(subset=['piece_description', 'piece_arousal'])


    # extract the piece name and the associated MIDI file
    pieces = data[['pieces']].dropna(subset='pieces').reset_index().rename(columns={'index': 'piece_id'})
    pieces['piece_name'] = pieces['pieces'].apply(lambda x: extract_value(x, 'name'))
    pieces['midi_file'] = pieces['pieces'].apply(lambda x: extract_value(x, 'midi'))
    pieces = pieces.dropna(subset=['piece_name', 'midi_file'])

    # merge the annotations and pieces dataframes
    # clean up the merged dataframe
    merged_data = pd.merge(annotations, pieces, on='piece_id', how='left')
    merged_data = merged_data.drop(columns=['piece_id','annotations', 'pieces'])
    midi_metadatas.append(merged_data)

midi_metadata = pd.concat(midi_metadatas, ignore_index=True)
midi_metadata = midi_metadata.reset_index().rename(columns={'index': 'piece_id'})
midi_metadata


  0%|          | 0/2 [00:00<?, ?it/s]

../data/piano-labelled/annotations\vgmidi_raw_1.json
../data/piano-labelled/annotations\vgmidi_raw_2.json


Unnamed: 0,piece_id,piece_description,piece_arousal,piece_name,midi_file
0,0,very upbeat,Delighted,Lurking In The Darkness,Final Fantasy_PS1_Final Fantasy VII_Lurking In...
1,1,I could tell the valence of the example was in...,Valence started out moderately negative and pr...,Lurking In The Darkness,Final Fantasy_PS1_Final Fantasy VII_Lurking In...
2,2,For a second I thought this piece was going to...,This piece seemed to have a positive valence t...,Lurking In The Darkness,Final Fantasy_PS1_Final Fantasy VII_Lurking In...
3,3,Bouncy and fun,Kind of sparatic,Lurking In The Darkness,Final Fantasy_PS1_Final Fantasy VII_Lurking In...
4,4,nice,nice,Lurking In The Darkness,Final Fantasy_PS1_Final Fantasy VII_Lurking In...
...,...,...,...,...,...
6100,6100,It started off slowly but happy and then built...,Seemed to remain consistent almost like it was...,One Winged Angel,Final Fantasy_PS1_Final Fantasy VII_One Winged...
6101,6101,This starts off a certain way then changes in ...,This is nostalgic because I recognize this and...,One Winged Angel,Final Fantasy_PS1_Final Fantasy VII_One Winged...
6102,6102,The piece begins slow in tempo and then become...,"The beginning rhythm sounds suspenseful, makin...",One Winged Angel,Final Fantasy_PS1_Final Fantasy VII_One Winged...
6103,6103,started slow but picked up.,I feel it stayed the same,One Winged Angel,Final Fantasy_PS1_Final Fantasy VII_One Winged...


In [118]:
midi_metadata[midi_metadata['piece_description'] != midi_metadata['piece_description']]

Unnamed: 0,piece_id,piece_description,piece_arousal,piece_name,midi_file


In [119]:
midi_metadata[(midi_metadata['piece_id'] == 'piece55') & (midi_metadata['piece_description'] == 'NS')]

Unnamed: 0,piece_id,piece_description,piece_arousal,piece_name,midi_file


In [120]:
midi_metadata.to_csv('../data/piano-labelled/labelled_piano_midi_metadata.csv', index=False)