# 2. Tensor creation

2.1 Preparation <br>
2.2 Whisper tensor creation <br>
2.3 Tdnn tensor creation <br>

## 2.1 Preparation

In [None]:
# Select which GPU to use

import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0" # "0" -> usage of first GPU, "1" -> usage of second GPU

In [None]:
# Import necessary libraries

import os

import whisper

import numpy as np

try:
    import tensorflow  # required in Colab to avoid protobuf compatibility issues
except ImportError:
    pass

import torch
import pandas as pd
import torchaudio

from tqdm.notebook import tqdm

from speechbrain.pretrained import EncoderClassifier

In [None]:
# Check for GPU and set device

DEVICE = "cuda" if torch.cuda.is_available() else "cpu" # Set Runtime to GPU in Google Colab
print(DEVICE)

In [None]:
# Load models

model = whisper.load_model("large")
language_id = EncoderClassifier.from_hparams(source="speechbrain/lang-id-voxlingua107-ecapa", savedir="tmp")

In [None]:
# Define a helper function to get the embeddings

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

## 2.2 Whisper tensor creation

In [None]:
# Iterate through the files, create whisper embeddings and save them

# Note: to create tensors for the different parts of the dataset, 'train', 'dev', and 'test' have to be specified in the 
# paths below

dir = 'data/fleurs/downloads/extracted/'

for i in range(0, 102):
    
    print(i+1)
    current_dir = dir + os.listdir(dir)[i]
    ln = os.listdir(current_dir)[0]
    current_dir = current_dir + '/' + ln + '/audio/train/'
    x = 0
    new_dir = 'data/tensors_whisper/train/' + ln + '/'
    
    if not os.path.exists(new_dir):
        os.mkdir(new_dir)

    for f in tqdm(os.listdir(current_dir)):

        file_path = current_dir + f

        activation = {}
        model.decoder.blocks[30].mlp_ln.register_forward_hook(get_activation('whisper'))

        audio = whisper.load_audio(file_path)
        audio = whisper.pad_or_trim(audio)

        # make log-Mel spectrogram and move to the same device as the model
        mel = whisper.log_mel_spectrogram(audio).to(model.device)

        # detect the spoken language
        _, probs = model.detect_language(mel)
        ln_whisper = max(probs, key=probs.get)

        #inp = torch.tensor(result.audio_features)
        name = 'wtensor_' + ln + str(x) + '.pt'
        save_path = 'data/tensors_whisper/train/' + ln + '/' + name

        decoding_result = activation['whisper']
        torch.save(decoding_result, save_path)

        x += 1

## 2.3 Tdnn tensor creation

In [None]:
# Iterate through the files, create tdnn embeddings and save them

# Note: to create tensors for the different parts of the dataset, 'train', 'dev', and 'test' have to be specified in the 
# paths below

dir = 'data/fleurs/downloads/extracted/'

for i in range(0, 102):
    
    print(i+1)
    current_dir = dir + os.listdir(dir)[i]
    ln = os.listdir(current_dir)[0]
    current_dir = current_dir + '/' + ln + '/audio/train/'
    x = 0
    new_dir = 'data/tensors_tdnn/train/' + ln + '/'
    
    if not os.path.exists(new_dir):
        os.mkdir(new_dir)

    for f in tqdm(os.listdir(current_dir)):

        file_path = current_dir + f

        activation = {}
        language_id.mods.classifier.DNN.block_0.norm.norm.register_forward_hook(get_activation('mods.classifier.DNN.block_0.norm.norm'))

        signal = language_id.load_audio(file_path)
        prediction =  language_id.classify_batch(signal)

        #inp = torch.tensor(result.audio_features)
        name = 'tensor_' + ln + str(x) + '.pt'
        save_path = 'data/tensors_tdnn/train/' + ln + '/' + name

        decoding_result = activation['mods.classifier.DNN.block_0.norm.norm']

        torch.save(decoding_result, save_path)

        x += 1