In [None]:
from transformers import AutoFeatureExtractor, ASTForAudioClassification, AutoModelForAudioClassification, TrainingArguments, Trainer
from datasets import load_metric, Dataset, load_dataset, Audio
import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.optim import AdamW
from tqdm.auto import tqdm
import torchmetrics
import torchaudio
import wandb


import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import re

In [None]:
#set up genre names and their codes
genre_names = [
    "blues",
    "classical",
    "country",
    "disco",
    "hiphop",
    "jazz",
    "metal",
    "pop",
    "reggae",
    "rock",
]
genre_codes = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Import these into your working script to make sure that we all have the same codes
id2label = {id_: label for id_, label in zip(genre_codes, genre_names)}
label2id = {label: id_ for label, id_ in zip(genre_names, genre_codes)}

In [None]:
os.environ['DATA_BASELINE_TRAIN'] = '/kaggle/input/data-train-val-test/data_train_val_test'
os.environ['DATA_BASELINE_NOISY_TRAIN'] = '/kaggle/input/data-noisy-train-val-test/data_noisy_train_val_test'
os.environ['DATA_BASELINE_GENERATED_TRAIN'] = '/kaggle/input/aml24mst/data_train_val_test'

In [None]:
df_baseline = load_dataset(os.getenv('DATA_BASELINE_TRAIN'))
test_set = df_baseline.pop('test')

In [None]:
df_noise = load_dataset(os.getenv('DATA_BASELINE_NOISY_TRAIN'))

In [None]:
df_gen = load_dataset(os.getenv('DATA_BASELINE_GENERATED_TRAIN'))

In [None]:
model_checkpoint = "MIT/ast-finetuned-audioset-10-10-0.4593"
feature_extractor = AutoFeatureExtractor.from_pretrained(model_checkpoint)
sampling_rate = feature_extractor.sampling_rate

## Plot normal audiofiles

In [None]:
# Assuming 'test_set' is your dataset and 'sampling_rate' is defined
test_inputs = []
for k in [0, 20, 40, 60, 80, 100, 120, 140, 160, 180]:  # Adjust the range as needed
    file = test_set[k]['audio']['array']
    test_inputs.append(file)

# Plotting the collected test_inputs in subplots
num_plots = len(test_inputs)
num_cols = 5
num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate the number of rows needed

plt.figure(figsize=(20, 8))
for i, test_input in enumerate(test_inputs, 1):
    plt.subplot(num_rows, num_cols, i)
    plt.plot(test_input)
    plt.title(f'{genre_names[i-1]}')
    plt.xlabel('Sample index')
    plt.ylabel('Amplitude')

plt.tight_layout()
plt.savefig(f'waveforms.png')
FileLink(r'waveforms.png')

In [None]:
# Assuming 'test_set' is your dataset and 'sampling_rate' is defined
test_inputs = []
for k in [0,20,40,60,80,100,120,140,160,180]:  # Adjust the range as needed
    file = test_set[k]['audio']['array']
    test_input = feature_extractor(file, sampling_rate=sampling_rate, return_tensors='pt')
    test_inputs.append(test_input['input_values'].numpy())

# Plotting the collected test_inputs in subplots
num_plots = len(test_inputs)
num_cols = 5
num_rows = (num_plots + num_cols - 1) // num_cols  # Calculate the number of rows needed


plt.figure(figsize=(20, 8))
for i, test_input in enumerate(test_inputs,1 ):
    plt.subplot(num_rows, num_cols, i)
    plt.imshow(test_input.T, aspect='auto', origin='lower', cmap='viridis')
    plt.title(f'{genre_names[i-1]}')
    plt.xlabel('Time')
    plt.ylabel('Frequency')

plt.tight_layout()
plt.savefig(f'spectograms.png')
FileLink(r'spectograms.png')

## Plot normal and noise audiofile example

In [None]:
def file_name_search(file_name):
    '''
    Find file-name in the path of the file. 
    '''
    if 'KAGGLE_KERNEL_RUN_TYPE' in os.environ:
        pattern = r'([^/]+\.wav)'
    else:
        pattern = r'(.+\\)?(.+\.wav)'
    
    match = re.search(pattern, file_name)
    return match.group(1)

In [None]:
clean_path = file_name_search(df_noise['train'][120]['audio']['path'])
noise_path = file_name_search(df_noise['train'][180]['audio']['path'])
print(f"{clean_path}")
print(f"{noise_path}")

In [None]:
clean = df_noise['train'][120]['audio']['array']
noise = df_noise['train'][180]['audio']['array']
clean_spec = feature_extractor(clean, sampling_rate=sampling_rate, return_tensors='pt')['input_values'].numpy()
noise_spec = feature_extractor(noise, sampling_rate=sampling_rate, return_tensors='pt')['input_values'].numpy()

blues1 = [clean, noise, clean_spec, noise_spec]

# Plotting the collected test_inputs in subplots
num_cols = 2
num_rows = 2

plt.figure(figsize=(7, 6))
for i, input in enumerate(blues1, 1):
    plt.subplot(num_rows, num_cols, i)
    if i <3:
        plt.plot(input)
        plt.xlabel('Sample index')
        plt.ylabel('Amplitude')
        if i == 1:
            plt.title(f'Original data')
        else:
            plt.title(f'Noise data')
    else:
        plt.imshow(input.T, aspect='auto', origin='lower', cmap='viridis')
        plt.xlabel('Time')
        plt.ylabel('Frequency')
   

plt.tight_layout()
plt.savefig(f'original_noise.png')
FileLink(r'original_noise.png')

## Plot Generated Data

In [None]:
gen_example = df_gen['train'][201]['audio']['array']
gen_example_spec = feature_extractor(gen_example, sampling_rate=sampling_rate, return_tensors='pt')['input_values'].numpy()

# Plotting the collected test_inputs in subplots
num_cols = 1
num_rows = 2

plt.figure(figsize=(6, 5))
for i, input in enumerate([gen_example, gen_example_spec], 1):
    plt.subplot(num_rows, num_cols, i)
    if i ==1:
        plt.plot(input)
        plt.xlabel('Sample index')
        plt.ylabel('Amplitude')
        plt.title(f'Generated Data')
    else:
        plt.imshow(input.T, aspect='auto', origin='lower', cmap='viridis')
        plt.xlabel('Time')
        plt.ylabel('Frequency')
   

plt.tight_layout()
plt.savefig(f'generated_example.png')
FileLink(r'generated_example.png')