In [1]:
import numpy as np
import pandas as pd
import os
  
# load the metadata
metadata_df = pd.read_csv("metadata.csv")
title_embeddings = np.load("metadata_title_embeddings.npz")["embeddings"]
keywords_embeddings = np.load("metadata_keywords_embeddings.npz")["embeddings"]
  
# load the annotations
annotations_df = pd.read_csv("annotations.csv")
annotations_embeddings = np.load("annotations_text_embeddings.npz")["embeddings"]
  
# load audio features
feature_filename = metadata_df.loc[0, "filename"].replace("mp3", "npz")
features = np.load(os.path.join("audio_features", feature_filename))
print(list(features.keys()))
print("Shape of ZCR feature (time, n_features)", features["zerocrossingrate"].shape)
print("Shape of MFCC features (time, n_features)", features["mfcc"].shape)
  
# load audio (optional, just i you want to compute your own features ...)
import librosa
waveform, sr = librosa.load(os.path.join("audio", metadata_df.loc[0, "filename"]), sr=16000)

['embeddings', 'melspectrogram', 'mfcc', 'mfcc_delta', 'mfcc_delta2', 'flatness', 'centroid', 'flux', 'energy', 'power', 'bandwidth', 'contrast', 'zerocrossingrate']
Shape of ZCR feature (time, n_features) (233, 1)
Shape of MFCC features (time, n_features) (233, 32)


In [2]:
print(annotations_df.columns)

Index(['task_id', 'filename', 'annotator', 'text', 'onset', 'offset',
       'filename_unsafe'],
      dtype='object')


In [5]:
grouped = annotations_df.groupby(['filename', 'annotator']).size().reset_index(name='annot_count')
filtered = grouped[grouped['annot_count'] >= 2]
annot_per_file = filtered.groupby('filename')['annotator'].nunique().reset_index(name='n_annotators')
need_files = annot_per_file[annot_per_file['n_annotators'] >= 2]
filenames = need_files['filename'].tolist()
print(filenames[:2])

['102744.mp3', '110921.mp3']


In [6]:
annotations_file = annotations_df.groupby('filename').size().reset_index(name='n_annotations')
annots_sorted = annotations_file.sort_values(by='n_annotations', ascending=False)
print(annots_sorted.head(10))

        filename  n_annotations
6578  623187.mp3             96
8926   94017.mp3             73
6250  591203.mp3             65
5264  518570.mp3             63
6537  620967.mp3             42
3541  406538.mp3             40
8630  777608.mp3             40
2795  352225.mp3             39
3526  406166.mp3             38
2126  272516.mp3             38
