### WANDB

In [11]:
import yaml
import wandb
# Load configuration from conf.yaml
with open("conf_yamls/cap_0_conf.yaml", "rb") as stream:
    conf = yaml.full_load(stream)
wandb.login(key = '47618d84c64aa733128b0ff7e395fbbe96304b6c')
# Extract WandB configuration
wandb_conf = conf.get("wandb_conf", {})
api = wandb.Api()
runs = api.runs(wandb_conf['project'])  # Replace with your actual project name
# Get the latest run
latest_run = runs[len(runs)-1]

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33myuvi-kiit[0m ([33myuvi-kiit-university-of-potsdam[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/yuvidh/.netrc


In [12]:
latest_run.id

'tqhxk9vc'

In [None]:
import wandb

api = wandb.Api()
runs = api.runs(wandb_conf['project'])
for j,i in enumerate(runs):
  print("num =",j,"run name = ",i.name," id: ", i.id)

In [None]:
import shelve
import sys

# Directory where your .db files are stored
db_path = "z_ckpts/nk56srrr/train_tid_2_items.db"  # Replace with your actual directory path

total_size = 0  # Variable to accumulate total memory usage

with shelve.open(db_path) as db:
    count = 0
    for fid, group_scores in db.items():
        print(fid, group_scores)
        print(len(group_scores)) # 19195
        break

# Print the total memory usage
print(f"Total size of the entire database in RAM: {total_size} bytes")

### Library Imports 

In [None]:
%load_ext autoreload
%autoreload 2
import librosa
import numpy as np
import matplotlib.pyplot as plt
import librosa.display

### Audio as WaveForm

- Waveform is a visual representation of the audio signal, where the x-axis represents time and the y-axis represents amplitude.

In [None]:
array, sampling_rate = librosa.load("data/Clotho/development/voice.wav")
plt.figure().set_figwidth(12)
librosa.display.waveshow(array, sr=sampling_rate)
plt.show()

### Audio as Frequency Spectrum

- Frequency Spectrumrum plots the strength of the various frequency components that are present in this audio segment. The frequency values are on the x-axis, usually plotted on a logarithmic scale, while their amplitudes are on the y-axis.

In [None]:
dft_input = array[:500] # taking only first 500 samples for better visualization

# Compute the DFT of the input signal
window = np.hanning(len(dft_input))
windowed_input = dft_input * window
dft = np.fft.rfft(windowed_input)

# Get the amplitude spectrum in decibals
amplitude = np.abs(dft)
amplitude_db = librosa.amplitude_to_db(amplitude, ref=np.max)

# get the frequency bins
frequency = librosa.fft_frequencies(sr=sampling_rate, n_fft=len(dft_input))

plt.figure().set_figwidth(12)
plt.plot(frequency, amplitude_db)
plt.xlabel("Frequency (Hz)")
plt.ylabel("Amplitude (dB)")
plt.xscale("log")

### Audio as Spectrogram

- Spectrum only shows a frozen snapshot of the frequencies at a given instant. 
- The solution is to take multiple DFTs, each covering only a small slice of time, and stack the resulting spectra together into a spectrogram.
- A spectrogram plots the frequency content of an audio signal as it changes over time. It allows you to see time, frequency, and amplitude all on one graph. The algorithm that performs this computation is the STFT or Short Time Fourier Transform.
- The spectrogram is one of the most informative audio tools available to you

In [None]:
import numpy as np

D = librosa.stft(array)
S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)

plt.figure().set_figwidth(12)
librosa.display.specshow(S_db, x_axis="time", y_axis="hz")
plt.colorbar()
plt.show()

### Mel Spectograms

- A mel spectrogram is a variation of the spectrogram that is commonly used in speech processing and machine learning tasks. It is similar to a spectrogram in that it shows the frequency content of an audio signal over time, but on a different frequency axis.
- In a standard spectrogram, the frequency axis is linear and is measured in hertz (Hz). However, the human auditory system is more sensitive to changes in lower frequencies than higher frequencies, and this sensitivity decreases logarithmically as frequency increases. The mel scale is a perceptual scale that approximates the non-linear frequency response of the human ear.

In [None]:
S = librosa.feature.melspectrogram(y=array, sr=sampling_rate, n_mels=128, fmax=8000)
S_dB = librosa.power_to_db(S, ref=np.max)

plt.figure().set_figwidth(12)
librosa.display.specshow(S_dB, x_axis="time", y_axis="mel", sr=sampling_rate, fmax=8000)
plt.colorbar()
plt.show()

## Working with the datasets

In [None]:
from datasets import load_dataset

In [None]:
minds = load_dataset("PolyAI/minds14", name = "en-AU", split = "train")

In [None]:
minds

In [None]:
example= minds[0]
example

In [None]:
# Converting the int classes to strings
id2label = minds.features["intent_class"].int2str
id2label(example["intent_class"])

In [None]:
columns_to_remove = ["lang_id", "english_transcription"]
minds = minds.remove_columns(columns_to_remove)
minds

In [None]:
import librosa
import matplotlib.pyplot as plt
import librosa.display

array = example["audio"]["array"]
sampling_rate = example["audio"]["sampling_rate"]

plt.figure().set_figwidth(12)
librosa.display.waveshow(array, sr=sampling_rate)
plt.show()

## Preprocessing the Dataset

### Resampling the audio data 

In [None]:
from datasets import Audio
minds = minds.cast_column("audio", Audio(sampling_rate=16_000))

In [None]:
minds[0]

### Filtering the datasets

In [None]:
MAX_DURATION_IN_SECONDS = 20.0

def is_audio_length_in_range(input_length):
    return input_length < MAX_DURATION_IN_SECONDS

In [None]:
# use librosa to get example's duration from the audio file
new_column = [librosa.get_duration(path=x) for x in minds["path"]]
minds = minds.add_column("duration", new_column)

# use 🤗 Datasets' `filter` method to apply the filtering function
minds = minds.filter(is_audio_length_in_range, input_columns=["duration"])

# remove the temporary helper column
minds = minds.remove_columns(["duration"])
minds

### Preprocessing Audio Data

In [None]:
from transformers import WhisperFeatureExtractor

feature_extractor = WhisperFeatureExtractor.from_pretrained("openai/whisper-small")

In [None]:
def prepare_dataset(example):
    audio = example["audio"]
    features = feature_extractor(
        audio["array"], sampling_rate=audio["sampling_rate"], padding=True
    )
    return features

In [None]:
minds = minds.map(prepare_dataset)
minds

In [None]:
import random

def generate_sample_data(num_points):
    """Generates sample data with random relevance."""
    data = []
    for i in range(num_points):
        score = random.random()  # Random score between 0 and 1
        is_relevant = random.randint(0, 1)  # Random relevance (0 or 1)
        data.append((i + 1, score, is_relevant))  # Object ID is sequential
    return data

def calculate_r1_r5(data):
    """Calculates R1 and R5 for a list of (object_id, score, is_relevant) tuples."""
    num_relevant = sum([item[2] for item in data])
    if num_relevant == 0:
        return 0, 0  # Handle cases with no relevant items
    r1 = sum([item[2] for item in data[:1]]) / num_relevant
    r5 = sum([item[2] for item in data[:5]]) / num_relevant
    return r1, r5

# Generate sample data
num_points = 100
data = generate_sample_data(num_points)

# Sort data by score in descending order (important for R1/R5 calculation)
data.sort(key=lambda x: x[1], reverse=True)

# Chunk the data
chunk_size = 20
chunks = [data[i:i + chunk_size] for i in range(0, num_points, chunk_size)]

# Calculate metrics for each chunk
chunk_metrics = [calculate_r1_r5(chunk) for chunk in chunks]

# Calculate overall metrics for the entire dataset
overall_metrics = calculate_r1_r5(data)

# Aggregate chunk metrics
avg_r1 = sum([m[0] for m in chunk_metrics]) / len(chunk_metrics)
avg_r5 = sum([m[1] for m in chunk_metrics]) / len(chunk_metrics)

# Print results
print("Chunk Metrics (R1, R5):")
for i, metrics in enumerate(chunk_metrics):
    print(f"Chunk {i + 1}: {metrics}")

print("\nOverall Metrics (R1, R5):", overall_metrics)
print("\nAveraged Chunk Metrics (R1, R5):", (avg_r1, avg_r5))