# Extract music features from bands in local files

## Libraries

In [1]:
import librosa
import numpy as np
import pandas as pd
import os

## Helper fns

In [2]:
# Extract music features with librosa and return a numpy array
def extract_features(file_path):
    # Load audio file
    y, sr = librosa.load(file_path)
    
    # Extract features
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    
    # Aggregate features
    features = np.hstack((np.mean(mfcc, axis=1), 
                          np.mean(spectral_centroid),
                          np.mean(chroma, axis=1),
                          tempo))
    return features

In [11]:
# Generate list of full file paths for music files in a given folder
def get_music_files(folder_path):
    music_extensions = ['.mp3', '.wav', '.flac', '.m4a', '.aac']
    music_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) 
                   if os.path.splitext(file)[1].lower() in music_extensions]
    return music_files

In [26]:
## DF fn
def create_music_dataframe(artist_album_dict):
    data = []
    for artist, albums in artist_album_dict.items():
        for album, folder_path in albums.items():
            files = get_music_files(folder_path)
            for file_path in files:
                data.append({
                    'artist': artist,
                    'album': album,
                    'filepath': file_path,
                    'track_name': os.path.basename(file_path)
                })
    return pd.DataFrame(data)

In [33]:
# Add librosa features to the df
def add_features_to_df(df):
    feature_names = ['mfcc_' + str(i) for i in range(20)] + ['spectral_centroid'] + ['chroma_' + str(i) for i in range(12)] + ['tempo']
    
    def extract_and_add_features(row):
        features = extract_features(row['filepath'])
        for i, feature in enumerate(features):
            row[feature_names[i]] = feature
        return row
    
    return df.apply(extract_and_add_features, axis=1)

## Files from Bands (AC, The Strokes, The Tallest Man on Earth, Pogo)

In [34]:
# AC
## Fall Be Kind
ac_fall_be_kind_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Fall Be Kind EP"

## Feels
ac_feels_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Feels"

## Meeting of the Waters
ac_waters_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Meeting Of The Waters - mp3"

## MPP
ac_mpp_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Merriweather Post Pavilion"

## Painting With
ac_pw_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Painting With - MP3\Animal Collective - Painting With"

## Strawberry Jam
ac_sj_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Strawberry Jam"

## Sung Tongs
ac_st_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Animal Collective\Sung Tongs"

In [22]:
# TMOE
## The Wild Hunt
tmoe_wild_hunt_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Tallest Man on Earth\The Wild Hunt\DOC040"

## Shallow Grave
tmoe_sg_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Tallest Man on Earth\Shallow Grave"

## There's No Leaving Now
tmoe_nln_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Tallest Man on Earth\There's No Leaving Now"

## Dark Bird Is Home
tmoe_dbh_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Tallest Man on Earth\Dark Bird Is Home"

In [23]:
# The Strokes
## Angles
strokes_angles_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Strokes\Angles"

## First Impressions of Earth
strokes_fioe_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Strokes\First Impressions of Earth"

## Is This It
strokes_iti_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Strokes\Is This It"

## Room On Fire
strokes_room_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\The Strokes\Room On Fire"

In [24]:
# Pogo
## 2016 misc
pogo_2016_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Pogo\2016"

## Kindred Shadows
pogo_ks_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Pogo\Kindred Shadows"

## Star Charts
pogo_sc_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Pogo\Star Charts"

## Misc
pogo_misc_fp = r"C:\Users\Owner\Music\iTunes\iTunes Media\Music\Pogo\Unknown Album"

### Now combine all the info into a df

In [27]:
# Create a dictionary with artist, album, and folder path information
music_data = {
    'Animal Collective': {
        'Fall Be Kind': ac_fall_be_kind_fp,
        'Feels': ac_feels_fp,
        "Meeting of the Waters": ac_waters_fp,
        'MPP': ac_mpp_fp,
        'Painting With': ac_pw_fp,
        'Strawberry Jam': ac_sj_fp,
        'Sung Tongs': ac_st_fp
    },

    'The Tallest Man on Earth': {
        'The Wild Hunt': tmoe_wild_hunt_fp,
        'Shallow Grave': tmoe_sg_fp,
        "There's No Leaving Now": tmoe_nln_fp,
        'Dark Bird Is Home': tmoe_dbh_fp
    },

    'The Strokes': {
        'Angles': strokes_angles_fp,
        'First Impressions of Earth': strokes_angles_fp,
        'Is This It': strokes_iti_fp,
        'Room On Fire': strokes_room_fp
    },

    'Pogo': {
        '2016_misc': pogo_2016_fp,
        'Kindred Shadows': pogo_ks_fp,
        'Star Charts': pogo_sc_fp,
        'Misc': pogo_misc_fp
    }
}

In [31]:
# Create the DataFrame
df_music = create_music_dataframe(music_data)

### Add features to the music df

In [35]:
# Add features to the DataFrame
df_music = add_features_to_df(df_music)

  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
  y, sr = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.

In [38]:
# Save as pickle to preserve all data typess
df_music.to_pickle("extracted data/2025_01_07_local_music_librosa_features.pkl")

In [2]:
# df = pd.read_pickle('extracted data/2025_01_07_local_music_librosa_features.pkl')