In [70]:
import librosa as lb
import soundfile as sf
import numpy as np
import os

In [71]:
SILENCE_THRESHOLD_DB = 15
NORMALIZING_LEVEL = -0.5

## Gather all Samples to display 

In [None]:
sample_root_folder = r'C:\Users\emilo\Documents\Splice - bruder_emil\Samples\packs'

cwd = os.getcwd()   
data_folder = os.path.join(cwd, 'audio_data')
os.makedirs(data_folder, exist_ok=True)  #hier cwd einen tiefer gehen

def file_finder(root_folder):
    all_samples = []
    for root, dirs, files in os.walk(root_folder):
        for file in files:
            full_path = os.path.join(root, file)
            all_samples.append(full_path)
            data_folder
    return all_samples

In [73]:
data = file_finder(sample_root_folder)
data[0:5]

['C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\2-Step Garage\\SM_White_Label_-_2-Step_Garage_-_Wav\\drum_hits\\sampler_formats\\ableton_drum_rack\\sm_white_label_-_2-step_garage_-_sampler_formats_Project\\Samples\\Imported\\2sg_hat_bigga.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\2-Step Garage\\SM_White_Label_-_2-Step_Garage_-_Wav\\drum_hits\\sampler_formats\\exs24\\sm_white_label_-_2-step_garage_-_sampler_formats\\Samples\\kicks\\2sg_kick_cdj.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s Deep House and Garage\\SM_White_Label_-_90s_Deep_House___Garage_-_Splice\\drum_hits\\hats\\dhg_hat_usg.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s Deep House and Garage\\SM_White_Label_-_90s_Deep_House___Garage_-_Splice\\fx_shots\\dhg_fx_wheel.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s House\\SM74_-_90s_House_-_Wav\\one_shots\\drum_hits\\sampler_

# For Basic function: Only One-Shots

# Extract Features from samples 
## Allows for best visualization of the data based on sample-similarity

### 1. Step: Load and prepare the data

In [74]:
#load audio files

def load_audio(file_path):
    audio, sr = lb.load(file_path, sr=None, mono=True)
    return audio, sr

In [75]:
#important: only used for "correct" display of samples -> not supposed to change actual sample
#removes max-amplitude as possible reason to not place similar sample in same cluster

def normalize_amplitude(input_as_array, target_db = NORMALIZING_LEVEL):      #scales amplitude of input_sample to target_db
    max_amplitude = np.max(np.abs(input_as_array))               #max amplitude, positive or negative (important due to polarity being positive or negative)
    target_amplitude = 10 ** (target_db / 20.0)     
    normalized_file = input_as_array * (target_amplitude/ max_amplitude)
    return normalized_file

In [76]:
def trim_start_end_silence(input_as_array):
    trimmed_audio, _ = lb.effects.trim(input_as_array, top_db=SILENCE_THRESHOLD_DB) #cuts sound at start and end of sample, where amplitude is below 30db
    return trimmed_audio

In [77]:
def preprocess_sample(input_as_array):
    normalized = normalize_amplitude(input_as_array)
    preprocessed_array = trim_start_end_silence(normalized)
    return preprocessed_array

In [78]:
def make_mel_db(input_as_array, sample_rate):
    mel_spectogram = lb.feature.melspectrogram(y=input_as_array, sr=sample_rate) #2D array -> fitted on human perception of sound
    mel_db = lb.power_to_db(mel_spectogram, ref=np.max) #converts power to decibel
    return mel_db

### 2. Step: Extract Features

In [79]:
def get_features(audio_as_array, sr):
    sample_lenght = lb.get_duration(y=audio_as_array, sr=sr)
    rms = lb.feature.rms(y=audio_as_array)  #average amplitude of sample
    rms_mean = np.mean(rms)
    rms_std = np.std(rms)
    spectral_flatness = lb.feature.spectral_flatness(y=audio_as_array) #how noise-like a sound is
    spectral_bandwidth = lb.feature.spectral_bandwidth(y=audio_as_array, sr=sr) #width of frequency range
    spectral_centroid = lb.feature.spectral_centroid(y=audio_as_array, sr=sr) #lower=bassier, higeher=more treble
    zero_crossing_rate = lb.feature.zero_crossing_rate(y=audio_as_array) #for noise vs tonal sounds -> higher=more noise
    return  sr, sample_lenght, rms_mean, rms_std, spectral_flatness, spectral_bandwidth, spectral_centroid, zero_crossing_rate

# DataFrame 

In [80]:
import pandas as pd

In [81]:
data = file_finder(sample_root_folder)
data[0:5]

['C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\2-Step Garage\\SM_White_Label_-_2-Step_Garage_-_Wav\\drum_hits\\sampler_formats\\ableton_drum_rack\\sm_white_label_-_2-step_garage_-_sampler_formats_Project\\Samples\\Imported\\2sg_hat_bigga.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\2-Step Garage\\SM_White_Label_-_2-Step_Garage_-_Wav\\drum_hits\\sampler_formats\\exs24\\sm_white_label_-_2-step_garage_-_sampler_formats\\Samples\\kicks\\2sg_kick_cdj.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s Deep House and Garage\\SM_White_Label_-_90s_Deep_House___Garage_-_Splice\\drum_hits\\hats\\dhg_hat_usg.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s Deep House and Garage\\SM_White_Label_-_90s_Deep_House___Garage_-_Splice\\fx_shots\\dhg_fx_wheel.wav',
 'C:\\Users\\emilo\\Documents\\Splice - bruder_emil\\Samples\\packs\\90s House\\SM74_-_90s_House_-_Wav\\one_shots\\drum_hits\\sampler_

In [82]:
list_for_dataframe = []

for sample in data:
    try:
        file_name = os.path.basename(sample)
        #print(f"Processing file: {file_name}")
        audio, sr = load_audio(sample)
        if audio is None or sr is None:
            continue
        preprocessed_audio = preprocess_sample(audio)
        mel_db = make_mel_db(preprocessed_audio, sr)
        sr, sample_length, rms_mean, rms_std, spectral_flatness, spectral_bandwidth, spectral_centroid, zero_crossing_rate = get_features(preprocessed_audio, sr)

        features = {
            'file_name': file_name,
            'sample_length': sample_length,
            'rms_mean': rms_mean,
            'rms_std': rms_std,
            'spectral_flatness': spectral_flatness,
            'spectral_bandwidth': spectral_bandwidth,
            'spectral_centroid': spectral_centroid,
            'zero_crossing_rate': zero_crossing_rate
        }
        list_for_dataframe.append(features)
    except Exception as e:
        print(f"Error processing file {sample}: {e}")

df = pd.DataFrame(list_for_dataframe)
print(df)

Error processing file C:\Users\emilo\Documents\Splice - bruder_emil\Samples\packs\Raw House Cuts\Sample_Magic_-_Raw_House_Cuts_-_WAV\loops\vocal_loops\RHC_120_vocal_loop_oldsoul_Cmin.wav.zpa: 
Error processing file C:\Users\emilo\Documents\Splice - bruder_emil\Samples\packs\Tony Romera - Trademark Series\Toolroom_-_Tony_Romera_-_Trademark_Series\One_Shots\Drums\Kicks\T_TSTR_kick_oneshot_ukay.wav.zpa: 
                                              file_name  sample_length  \
0                                     2sg_hat_bigga.wav       0.069660   
1                                      2sg_kick_cdj.wav       0.162540   
2                                       dhg_hat_usg.wav       0.104490   
3                                      dhg_fx_wheel.wav       0.882358   
4                                    ny_ophat_livin.wav       0.301859   
...                                                 ...            ...   
2669  DS_VUKG_124_vocal_hook_female_loop_time_verse_...      14.245442   
267

In [83]:
df.head()

Unnamed: 0,file_name,sample_length,rms_mean,rms_std,spectral_flatness,spectral_bandwidth,spectral_centroid,zero_crossing_rate
0,2sg_hat_bigga.wav,0.06966,0.143392,0.069376,"[[0.34824944, 0.34124035, 0.36512172, 0.373325...","[[6845.0664959826445, 6597.070993731029, 6347....","[[11628.03530060286, 12245.089774616446, 12558...","[[0.279296875, 0.4287109375, 0.56787109375, 0...."
1,2sg_kick_cdj.wav,0.16254,0.510856,0.205252,"[[0.005304109, 0.00257921, 0.0006072147, 7.448...","[[4157.725519690624, 4194.186048246147, 3954.7...","[[2675.0922491839697, 2197.6141309722775, 1644...","[[0.02392578125, 0.025390625, 0.0263671875, 0...."
2,dhg_hat_usg.wav,0.10449,0.111331,0.059912,"[[0.23216747, 0.21083269, 0.15930599, 0.139946...","[[4816.00669894319, 4797.675369703416, 4663.13...","[[7958.766245673152, 7976.873948666755, 7818.2...","[[0.1591796875, 0.23291015625, 0.3134765625, 0..."
3,dhg_fx_wheel.wav,0.882358,0.282015,0.093424,"[[0.0072932886, 0.002170617, 0.0005545649, 0.0...","[[3240.995614924431, 2394.04500933452, 2451.14...","[[2503.4080840549427, 2427.064342380947, 2891....","[[0.03564453125, 0.0625, 0.0703125, 0.06201171..."
4,ny_ophat_livin.wav,0.301859,0.148086,0.045668,"[[0.14152071, 0.1339682, 0.09250415, 0.0732399...","[[4562.069564568951, 4563.742425774321, 4383.1...","[[11089.38570534048, 10916.241368329742, 10549...","[[0.25634765625, 0.3798828125, 0.48583984375, ..."


### Reduce arrays to a mean and standard deviation

In [84]:
import numpy as np

# Reduziere die Arrays auf Mittelwert und Standardabweichung
df['spectral_flatness_mean'] = df['spectral_flatness'].apply(np.mean)
df['spectral_flatness_std'] = df['spectral_flatness'].apply(np.std)

df['spectral_bandwidth_mean'] = df['spectral_bandwidth'].apply(np.mean)
df['spectral_bandwidth_std'] = df['spectral_bandwidth'].apply(np.std)

df['spectral_centroid_mean'] = df['spectral_centroid'].apply(np.mean)
df['spectral_centroid_std'] = df['spectral_centroid'].apply(np.std)

df['zero_crossing_rate_mean'] = df['zero_crossing_rate'].apply(np.mean)
df['zero_crossing_rate_std'] = df['zero_crossing_rate'].apply(np.std)

# Optional: Entferne die ursprünglichen Arrays, falls nicht mehr benötigt
df.drop(columns=['spectral_flatness', 'spectral_bandwidth', 'spectral_centroid', 'zero_crossing_rate'], inplace=True)

# Ausgabe zur Kontrolle
df.head()


Unnamed: 0,file_name,sample_length,rms_mean,rms_std,spectral_flatness_mean,spectral_flatness_std,spectral_bandwidth_mean,spectral_bandwidth_std,spectral_centroid_mean,spectral_centroid_std,zero_crossing_rate_mean,zero_crossing_rate_std
0,2sg_hat_bigga.wav,0.06966,0.143392,0.069376,0.368317,0.016868,6403.459615,216.176266,11825.951415,634.322103,0.425642,0.126881
1,2sg_kick_cdj.wav,0.16254,0.510856,0.205252,0.000745,0.00148,2146.030276,1680.559099,864.004161,946.89885,0.007129,0.009136
2,dhg_hat_usg.wav,0.10449,0.111331,0.059912,0.134991,0.049456,4628.190863,148.929099,7484.009791,382.581299,0.245557,0.059547
3,dhg_fx_wheel.wav,0.882358,0.282015,0.093424,0.028588,0.049069,3470.510634,882.606921,4453.312289,1294.820164,0.127181,0.062058
4,ny_ophat_livin.wav,0.301859,0.148086,0.045668,0.076336,0.052573,4221.875746,404.433825,9316.975243,1346.671772,0.392614,0.069868


In [None]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
features_for_scaling = ['sample_length', 'rms_mean', 'rms_std', 'spectral_flateness_mean', 'spectral_flateness_std', 'spectral_bandwidth_mean', 'spectral_bandwidth_std', 'spectral_centroid_mean', 'spectral_centroid_std', 'zero_crossing_rate_mean', 'zero_crossing_rate_std']

for feature in features_for_scaling:
    df[feature] = scaler.fit_transform(df[feature].values.reshape(-1, 1))
