# This notebook creates a dictionary of the frame data for use in downstream tasks
### Keys are the yt8m_id, values are audio, rgb, and label data.

##### Needs hunting_dict.json to have been created in the BERTopics section to work

In [11]:
import tensorflow as tf
import pickle
import json
import numpy as np
from tqdm import tqdm
from path import Path

In [None]:
# path to where the frame_data.pkl and hunting_dict.json are stored
base_path = Path("/nfs/turbo/seas-nhcarter/human_wildlife_interactions/classifier_video_data/")
repo_path = Path("/nfs/turbo/seas-nhcarter/human_wildlife_interactions/repo")

In [16]:
# bring in the frame data
with open(base_path / "frame_data.pkl", 'rb') as file:
    frame_data = pickle.load(file)
# bring in the cluster results
with open(repo_path / "human_wildlife_interactions/data/processed/hunting_dict.json") as file:
    labels = json.load(file)

In [13]:
feature_dict = {}

for yt8m_id, features in tqdm(frame_data.items()):
    if yt8m_id in set(labels.keys()):
        # get list of audio tensors
        audio_features = features['sequence_example'].feature_lists.feature_list['audio'].feature
        audio_lst = []
        rgb_features = features['sequence_example'].feature_lists.feature_list['rgb'].feature
        rgb_lst = []
        
        for i in range(len(audio_features)):
            audio_bytes = audio_features[i].bytes_list.value[0]
            audio_np = tf.cast(tf.io.decode_raw(audio_bytes, tf.uint8), tf.float32).numpy()
            audio_lst.append(audio_np)
            
            rgb_bytes = rgb_features[i].bytes_list.value[0]
            rgb_np = tf.cast(tf.io.decode_raw(rgb_bytes, tf.uint8), tf.float32).numpy()
            rgb_lst.append(rgb_np)

        # add to feature dictionary
        feature_dict[yt8m_id] = {'audio_lst': audio_lst,
                                'rgb_lst': rgb_lst,
                                'label': labels[yt8m_id]
                                }

100%|██████████| 4243/4243 [04:01<00:00, 17.56it/s]


In [14]:
with open(base_path / "frame_features_dict.pkl', 'wb') as file:
    pickle.dump(feature_dict, file)