In [4]:
import boto3
import os
import librosa
import numpy as np

In [16]:
def download_first_mp3_from_s3(bucket_name, prefix, download_dir="downloads"):
    s3 = boto3.client("s3")

    response = s3.list_objects_v2(Bucket=bucket_name, Prefix=prefix)
    contents = response.get("Contents", [])
    mp3_keys = [obj["Key"] for obj in contents if obj["Key"].endswith(".mp3")]

    if not mp3_keys:
        raise ValueError("No MP3 files found under the specified prefix.")

    first_key = mp3_keys[1]
    file_name = os.path.basename(first_key)

    os.makedirs(download_dir, exist_ok=True)
    download_path = os.path.join(download_dir, file_name)

    s3.download_file(bucket_name, first_key, download_path)
    print(f"Downloaded {first_key} to {download_path}")

    return download_path

In [17]:
downloaded_path = download_first_mp3_from_s3(
    bucket_name="youtrends-project", prefix="music-data/audio/"
)

Downloaded music-data/audio/-1xAi_Q6a8g.mp3 to downloads/-1xAi_Q6a8g.mp3


In [18]:
def extract_audio_features(file_path):
    y, sr = librosa.load(file_path, sr=None)

    features = {
        "tempo": librosa.feature.rhythm.tempo(y=y, sr=sr)[0],
        "zcr_mean": np.mean(librosa.feature.zero_crossing_rate(y=y)),
        "rmse_mean": np.mean(librosa.feature.rms(y=y)),
        "spectral_centroid_mean": np.mean(librosa.feature.spectral_centroid(y=y, sr=sr)),
        "spectral_bandwidth_mean": np.mean(librosa.feature.spectral_bandwidth(y=y, sr=sr)),
        "spectral_rolloff_mean": np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr)),
        "mfccs_mean": np.mean(librosa.feature.mfcc(y=y, sr=sr), axis=1).tolist(),
        "chroma_stft_mean": np.mean(librosa.feature.chroma_stft(y=y, sr=sr), axis=1).tolist(),
    }

    return features

In [19]:
features = extract_audio_features(downloaded_path)
features

{'tempo': np.float64(139.6748310810811),
 'zcr_mean': np.float64(0.048357983290752936),
 'rmse_mean': np.float32(0.088420145),
 'spectral_centroid_mean': np.float64(2053.3206260208153),
 'spectral_bandwidth_mean': np.float64(2580.4237527445835),
 'spectral_rolloff_mean': np.float64(3963.1447666393583),
 'mfccs_mean': [-243.8916015625,
  145.49668884277344,
  -18.005151748657227,
  32.52787780761719,
  5.8710503578186035,
  12.21762466430664,
  -3.9086289405822754,
  -5.55466365814209,
  3.7589497566223145,
  -2.9381661415100098,
  -5.607466220855713,
  1.3901548385620117,
  0.6910752058029175,
  -1.84996497631073,
  0.18747229874134064,
  1.150999903678894,
  4.210670471191406,
  -1.5306581258773804,
  0.3984512388706207,
  1.0817126035690308],
 'chroma_stft_mean': [0.3960314095020294,
  0.3623977303504944,
  0.3214225769042969,
  0.3531979024410248,
  0.30506858229637146,
  0.339432030916214,
  0.2972325384616852,
  0.35863080620765686,
  0.3870934844017029,
  0.30509820580482483,
  0

In [7]:
features

{'tempo': np.float64(84.72079918032787),
 'zcr_mean': np.float64(0.04984508874642069),
 'rmse_mean': np.float32(0.2839137),
 'spectral_centroid_mean': np.float64(2618.5035010027054),
 'spectral_bandwidth_mean': np.float64(3314.8723855080407),
 'spectral_rolloff_mean': np.float64(5718.998061322742),
 'mfccs_mean': [-116.3820571899414,
  137.0737762451172,
  -20.79558753967285,
  39.9806022644043,
  -6.7852702140808105,
  35.407413482666016,
  -17.11289405822754,
  12.039874076843262,
  -11.978132247924805,
  1.1444486379623413,
  -7.692205905914307,
  3.447439432144165,
  -3.252932548522949,
  -7.237246036529541,
  -4.366272449493408,
  -10.73857593536377,
  -0.09413838386535645,
  -6.213627815246582,
  -2.6523969173431396,
  -9.325313568115234],
 'chroma_stft_mean': [0.3173564374446869,
  0.3300778865814209,
  0.4707897901535034,
  0.5560809373855591,
  0.3803911805152893,
  0.32415711879730225,
  0.31585127115249634,
  0.42702096700668335,
  0.3409738838672638,
  0.30128994584083557,
