In [None]:
import os
import h5py
import csv

# Define the function to convert an HDF5 file to a list of rows
def hdf5_to_rows(input_path):
    # Open the HDF5 file
    h5 = h5py.File(input_path, 'r')

    # Get the table of song metadata
    metadata_table = h5['/metadata/songs']

    # Loop over each row in the metadata table and convert it to a list of values
    rows = []
    for row in metadata_table:
        rows.append([row[field].decode('utf-8') if isinstance(row[field], bytes) else row[field] for field in metadata_table.dtype.names])

    # Close the HDF5 file
    h5.close()

    return rows

# Define the function to recursively search for HDF5 files in a directory and its subdirectories
def find_hdf5_files(directory):
    # Create an empty list to hold the file paths
    file_paths = []

    # Loop over each item in the directory
    for item in os.listdir(directory):
        # Get the full path of the item
        item_path = os.path.join(directory, item)

        # If the item is a directory, recursively call this function on it
        if os.path.isdir(item_path):
            file_paths.extend(find_hdf5_files(item_path))

        # If the item is an HDF5 file, add its path to the list
        elif os.path.isfile(item_path) and item_path.endswith('.h5'):
            file_paths.append(item_path)

    return file_paths

# Define the function to write the metadata for all songs to a CSV file
def write_metadata_to_csv(directory, output_path):
    # Find all HDF5 files in the directory and its subdirectories
    hdf5_files = find_hdf5_files(directory)

    # Create an empty list to hold the rows of metadata
    rows = []

    # Loop over each HDF5 file and convert it to a list of rows
    for hdf5_file in hdf5_files:
        rows.extend(hdf5_to_rows(hdf5_file))

    # Open the output CSV file
    with open(output_path, 'w') as csv_file:
        writer = csv.writer(csv_file)

        # Write the header row with all available metadata fields
        writer.writerow(['artist_name', 'title', 'release', 'year', 'track_7digitalid', 'shs_perf', 'shs_work', 'song_hotttnesss', 'artist_hotttnesss', 'duration', 'end_of_fade_in', 'start_of_fade_out', 'loudness', 'tempo', 'key', 'key_confidence', 'mode', 'mode_confidence', 'time_signature', 'time_signature_confidence'])
        #writer.writerow([    'artist_name',    'artist_id',    'artist_mbid',    'artist_playmeid',    'release',    'release_7digitalid',    'song_id',    'song_hotttnesss',    'title',    'track_7digitalid',    'analysis_sample_rate',    'audio_md5',    'danceability',    'duration',    'end_of_fade_in',    'energy',    'key',    'key_confidence',    'loudness',    'mode',    'mode_confidence',    'start_of_fade_out',    'tempo',    'time_signature',    'time_signature_confidence',    'track_id',    'segments_start',    'segments_confidence',    'segments_pitches',    'segments_timbre',    'segments_loudness_max',    'segments_loudness_max_time',    'segments_loudness_start',    'sections_start',    'sections_confidence',    'beats_start',    'beats_confidence',    'bars_start',    'bars_confidence',    'tatums_start',    'tatums_confidence',    'year'])

        
        # Write the rows of metadata to the CSV file
        for row in rows:
            writer.writerow(row)

# Call the function to write the metadata for all songs to a CSV file
write_metadata_to_csv('/home/ubuntu/data/MillionSongSubset', '/home/ubuntu/data/output.csv')
