In [None]:
import pandas as pd
import numpy as np
import h5py

# Define a function to recursively inspect the contents of groups and datasets
def inspect_groups_and_datasets(group, indent=""):
    for name, item in group.items():
        if isinstance(item, h5py.Group):
            print(f"{indent}Group: {name}")
            inspect_groups_and_datasets(item, indent + "\t")
        elif isinstance(item, h5py.Dataset):
            print(f"{indent}Dataset's column: {name}")
            print(f"{indent}\tShape: {item.shape}")
            print(f"{indent}\tDtype: {item.dtype}")
            # Print a sample of the dataset if it's not too large
            if item.size < 10:
                print(f"{indent}\tSample Data: {item[:]}")
        else:
            print(f"{indent}Unknown type: {name}")

# Define a function to inspect specific datasets and groups in an HDF5 file
def inspect_specific_datasets(filepath, dataset_names):
    with h5py.File(filepath, 'r') as file:
        print("Inspecting HDF5 file:", filepath)
        for dataset_name in dataset_names:
            if dataset_name in file:
                print("\nRoot:")
                inspect_groups_and_datasets(file[dataset_name], indent="\t")
            else:
                print(f"\nDataset '{dataset_name}' not found")

# Inspect the uploaded file
file_path = 'TRAYYBD128F9335F7A.h5'
datasets_to_inspect = ["analysis", "metadata", "musicbrainz"]
inspect_specific_datasets(file_path, datasets_to_inspect)


# Create a DataFrame with the specified row and column types
data = {
    'analysis_sample_rate': [22050],
    'audio_md5': [np.bytes_('947b8654581d1b3bcc8c008fdd7780c5')],
    'danceability': [0.0],
    'duration': [201.24689],
    'end_of_fade_in': [0.0],
    'energy': [0.0],
    'idx_bars_confidence': [0],
    'idx_bars_start': [0],
    'idx_beats_confidence': [0],
    'idx_beats_start': [0],
    # Add the remaining columns as needed
    'key': [2],
    'key_confidence': [0.117],
    'loudness': [-7.174],
    'mode': [1],
    'mode_confidence': [0.299],
    'start_of_fade_out': [187.501],
    'tempo': [137.54],
    'time_signature': [5],
    'time_signature_confidence': [0.731],
    'track_id': [np.bytes_('TRAYYBD128F9335F7A')]
}

df = pd.DataFrame(data)

# Display the DataFrame
df

Inspecting HDF5 file: TRAYYBD128F9335F7A.h5

Root:
	Dataset's column: bars_confidence
		Shape: (89,)
		Dtype: float64
	Dataset's column: bars_start
		Shape: (89,)
		Dtype: float64
	Dataset's column: beats_confidence
		Shape: (451,)
		Dtype: float64
	Dataset's column: beats_start
		Shape: (451,)
		Dtype: float64
	Dataset's column: sections_confidence
		Shape: (8,)
		Dtype: float64
		Sample Data: [1.    0.725 0.047 0.586 0.825 0.584 0.656 1.   ]
	Dataset's column: sections_start
		Shape: (8,)
		Dtype: float64
		Sample Data: [  0.        9.58939  53.70647  73.55706 110.35083 128.38405 153.95657
 190.18736]
	Dataset's column: segments_confidence
		Shape: (620,)
		Dtype: float64
	Dataset's column: segments_loudness_max
		Shape: (620,)
		Dtype: float64
	Dataset's column: segments_loudness_max_time
		Shape: (620,)
		Dtype: float64
	Dataset's column: segments_loudness_start
		Shape: (620,)
		Dtype: float64
	Dataset's column: segments_pitches
		Shape: (620, 12)
		Dtype: float64
	Dataset's colum

Unnamed: 0,analysis_sample_rate,audio_md5,danceability,duration,end_of_fade_in,energy,idx_bars_confidence,idx_bars_start,idx_beats_confidence,idx_beats_start,key,key_confidence,loudness,mode,mode_confidence,start_of_fade_out,tempo,time_signature,time_signature_confidence,track_id
0,22050,b'947b8654581d1b3bcc8c008fdd7780c5',0.0,201.24689,0.0,0.0,0,0,0,0,2,0.117,-7.174,1,0.299,187.501,137.54,5,0.731,b'TRAYYBD128F9335F7A'
