In [1]:
BASE_DIR = "/tmp/akshett.jindal"
BATCH_SIZE = 1

In [2]:
from os import path

MODEL_CHECKPOINTS_DL_LINK = "https://drive.google.com/file/d/1iWA7KfiR1JjRi-hD6R4LK5cug1FMcblD/view"
MODEL_CHECKPOINTS_PATH = path.join(BASE_DIR, "cached_models", "EmotionCLIP", "emotionclip_latest.pt")

In [3]:
from os import path
from glob import glob
import re

video_files_glob = path.join(
    BASE_DIR,
    "shared_task_data",
    "task03",
    "*",
    "*.mp4",
)

mp4_files = sorted(
    glob(video_files_glob, recursive=True),
    key=lambda fname: tuple([int(num) for num in re.findall(r"\d+", fname)]),
)
mp4_files[:5], len(mp4_files)

(['/tmp/akshett.jindal/shared_task_data/task03/train/dia1utt1.mp4',
  '/tmp/akshett.jindal/shared_task_data/task03/train/dia1utt2.mp4',
  '/tmp/akshett.jindal/shared_task_data/task03/train/dia1utt3.mp4',
  '/tmp/akshett.jindal/shared_task_data/task03/train/dia1utt4.mp4',
  '/tmp/akshett.jindal/shared_task_data/task03/train/dia1utt5.mp4'],
 13619)

In [4]:
!git clone "https://github.com/Xeaver/EmotionCLIP"
!touch EmotionCLIP/__init__.py

fatal: destination path 'EmotionCLIP' already exists and is not an empty directory.


In [5]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [6]:
import torch

from EmotionCLIP.src.models.base import EmotionCLIP

model = EmotionCLIP(
    backbone_checkpoint=None,
    video_len=8,
    backbone_config="EmotionCLIP/src/models/model_configs/ViT-B-32.json"
)
ckpt = torch.load(MODEL_CHECKPOINTS_PATH, map_location="cpu")
model.load_state_dict(ckpt["model"], strict=True)
model = model.eval().to(device)

In [None]:
MODEL_NAME = MODEL_ID.replace("/", "_").replace(" ", "_")

In [None]:
from os import path

HUGGINGFACE_CACHE_DIR = path.join(BASE_DIR, ".huggingface_cache")
OUTPUT_DIR = path.join(BASE_DIR, "shared_task", "audio_embeddings", MODEL_NAME)

In [None]:
import os

os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
import torch

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

device = torch.device(device)
device

In [None]:
processor = PROCESSOR_CLASS.from_pretrained(
    MODEL_ID,
    cache_dir=HUGGINGFACE_CACHE_DIR,
)

In [None]:
model = MODEL_CLASS.from_pretrained(
    MODEL_ID,
    cache_dir=HUGGINGFACE_CACHE_DIR,
).to(device)

In [None]:
from datasets import Dataset
import numpy
from os import path
import soundfile
from tqdm.auto import tqdm

def data_generator():
    for wav_file in tqdm(mp4_files):
        with open(wav_file, "rb") as f:
            audio_data, _ = soundfile.read(f)
        audio_id = path.basename(wav_file).replace(".wav", "")
        yield { "id": audio_id, "audio": numpy.average(audio_data, axis=1) }

dataset = Dataset.from_generator(data_generator)
dataset

In [None]:
def batch(iterable, n=1):
    l = len(iterable)
    for ndx in range(0, l, n):
        yield iterable[ndx:min(ndx+n, l)]

In [None]:
import json
import numpy
from os import path
import pickle
import torch
from tqdm.auto import tqdm

BUFFER = []
BUFFER_MAX = 500 // BATCH_SIZE

model.eval()

with torch.inference_mode():
    for batch_num, d in tqdm(enumerate(batch(dataset, n=BATCH_SIZE)), total=len(dataset) // BATCH_SIZE):

        inputs = processor(
            raw_speech=d["audio"],
            padding=BATCH_SIZE > 1,
            sampling_rate=16000,
            return_tensors="pt",
        )
        for k in inputs.keys():
            inputs[k] = inputs[k].to(device)

        outputs = model(**inputs)
        for k in outputs.keys():
            outputs[k] = numpy.array(outputs[k].cpu())

        last_hidden_states = numpy.mean(outputs["last_hidden_state"], axis=1)
        extract_features = numpy.mean(outputs["extract_features"], axis=1)
        if batch_num == 0:
            print(last_hidden_states.shape, extract_features.shape)

        BUFFER.append({
            "ids": d["id"],
            "last_hidden_state": last_hidden_states,
            "extract_features": extract_features,
        })

        if len(BUFFER) == BUFFER_MAX:
            batch_of = path.join(OUTPUT_DIR, f"batch_{batch_num}.pkl")
            with open(batch_of, "wb") as f:
                pickle.dump(BUFFER, f)
            del BUFFER
            BUFFER = []

if len(BUFFER) > 0:
    batch_of = path.join(OUTPUT_DIR, f"batch_{batch_num}.pkl")
    with open(batch_of, "wb") as f:
        pickle.dump(BUFFER, f)
    del BUFFER
    BUFFER = []