In [51]:
import argparse
import pandas as pd
import numpy as np
import librosa
import os


In [37]:
from transformers import AutoFeatureExtractor

model_id = "anton-l/xtreme_s_xlsr_300m_minds14"
feature_extractor = AutoFeatureExtractor.from_pretrained(
    model_id, do_normalize=True, return_attention_mask=True
)

In [38]:
sampling_rate = feature_extractor.sampling_rate
sampling_rate

16000

In [1]:
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

In [69]:
csv_file_path = 'C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/labels_paths_train.csv'
df = load_data(csv_file_path)

# To see the DataFrame
print(df)

           Idx  Label                 Path
0    audio_876      2  train/audio_876.wav
1    audio_208      4  train/audio_208.wav
2    audio_408      3  train/audio_408.wav
3    audio_772      2  train/audio_772.wav
4    audio_356      2  train/audio_356.wav
..         ...    ...                  ...
695  audio_525      2  train/audio_525.wav
696  audio_291      2  train/audio_291.wav
697  audio_305      2  train/audio_305.wav
698  audio_524      2  train/audio_524.wav
699  audio_194      3  train/audio_194.wav

[700 rows x 3 columns]


In [70]:
def preprocess_data(df):
    data = []
    base_directory = "C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/"
    for index, row in df.iterrows():
        file_path = os.path.join(base_directory, row['Path'])
        audio_array, _ = librosa.load(file_path, sr=None)  # Preserve the original sampling rate
        
        # Structure the data
        item = {
            "file": file_path,
            "audio": {
                "path": file_path,
                "array": audio_array,
                "sampling_rate": sampling_rate,
            },
            "label": row['Label'],  # Using 'Label' as the genre/classification label
        }
        data.append(item)
    return data

In [71]:
data = preprocess_data(df)


In [72]:
print(data[0])

{'file': 'C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/train/audio_876.wav', 'audio': {'path': 'C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/train/audio_876.wav', 'array': array([-0.02017212, -0.00720215, -0.01403809, ..., -0.0194397 ,
       -0.01556396, -0.009552  ], dtype=float32), 'sampling_rate': 16000}, 'label': 2}


In [76]:

sample = data[0]["audio"]
print(f"Mean: {np.mean(sample['array']):.3}, Variance: {np.var(sample['array']):.3}")
inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
print(f"inputs keys: {list(inputs.keys())}")
print(
    f"Mean: {np.mean(inputs['input_values']):.3}, Variance: {np.var(inputs['input_values']):.3}"
)

Mean: -6.52e-09, Variance: 1.0


KeyError: 'sampling_rate'

In [74]:
for item in data:
    sample = item["audio"]
    
    # Process the audio sample through the feature extractor
    inputs = feature_extractor(sample["array"], sampling_rate=sample["sampling_rate"])
    
    # Normalize the 'input_values'
    normalized_input_values = (inputs['input_values'] - np.mean(inputs['input_values'])) / np.sqrt(np.var(inputs['input_values']))
    
    # Option 1: Replace the original 'input_values' with the normalized ones
    inputs['array'] = normalized_input_values
    
    # Option 2: Add the normalized 'input_values' as a new key to the 'audio' dictionary
    # sample['normalized_input_values'] = normalized_input_values
    
    # Update the item with the new inputs (choose depending on whether you used option 1 or 2)
    item["audio"] = inputs  # If you're updating the inputs directly
    # item["audio"] = sample  # If you're updating the sample with a new key for normalized values


In [80]:
data[0]

{'file': 'C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/train/audio_876.wav',
 'audio': {'input_values': [array([-0.95584095, -0.341237  , -0.6651694 , ..., -0.92113394,
        -0.73747575, -0.45258877], dtype=float32)], 'attention_mask': [array([1, 1, 1, ..., 1, 1, 1])], 'array': array([[-0.95594823, -0.3412753 , -0.6652441 , ..., -0.92123735,
         -0.73755854, -0.45263958]], dtype=float32)},
 'label': 2}

In [24]:
def save_data(df, output_file):
    # TODO: Save processed data to a CSV file
    # I don't need to do this for now
    return

In [25]:
output_csv_file = 'C:/Users/bruno/OneDrive - Imperial College London/Pers/Portfolio/Sound Classifier/nuwehack-data-AI_SR/data/transformed_data.csv'
save_data(data, output_csv_file)

In [27]:
def parse_arguments():
    parser = argparse.ArgumentParser(description='Data processing script for Automated Instrument Sound Recognition Hackathon')
    parser.add_argument(
        '--input_file',
        type=str,
        default='data/labels_paths_train.csv',
        help='Path to the raw data file to process'
    )
    parser.add_argument(
        '--output_file', 
        type=str, 
        default='data/processed_data/', 
        help='Folder path to save the processed data'
    )
    return parser.parse_args()

In [28]:
def main(input_file, output_file):
    df = load_data(input_file)
    df_clean = clean_data(df)
    df_processed = preprocess_data(df_clean)
    save_data(df_processed, output_file)

if __name__ == "__main__":
    args = parse_arguments()
    main(args.input_file, args.output_file)

usage: ipykernel_launcher.py [-h] [--input_file INPUT_FILE]
                             [--output_file OUTPUT_FILE]
ipykernel_launcher.py: error: unrecognized arguments: --f=c:\Users\bruno\AppData\Roaming\jupyter\runtime\kernel-v2-1476Y5X4owonEewQ.json


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
