In [None]:
import tensorflow as tf
import cv2
import numpy as np

In [None]:
import math
import glob
import argparse
from functools import partial
from pathlib import Path

import torch
import pandas as pd
from tqdm import tqdm
import numpy as np
import joblib
import yaml

from train_pure_segment import DATA_DIR_STR, prepare_model as prepare_agnostic_model
from train_segment_w_context import prepare_model as prepare_aware_model
from dataloader import YoutubeTestDataset, DataLoader, collate_test_segments
from telegram_tokens import BOT_TOKEN, CHAT_ID
from telegram_sender import telegram_sender

BATCH_SIZE = 32


def inverse_label_mapping(vocab_path="./data/segment_vocabulary.csv"):
    vocab = pd.read_csv(vocab_path)
    return {
        index: label for label, index in zip(vocab["Index"], vocab.index)
    }

DATA_DIR_STR = "data/Youtube8MData"
def collect_file_paths():
    return list(glob.glob(str(DATA_DIR_STR + "/test/*.tfrecord")))


def prepare_model(model_path):
    with open('y8m-2019-trained/segment/dbof-3_0_20191030-0608/config.yaml') as fin:
        config = yaml.safe_load(fin)
    state_dict = torch.load(str(model_path),map_location=torch.device('cpu'))
    if "context_base" in config:
        # context-aware model
        model = prepare_aware_model(config)
    else:
        # context-agnostic model
        model = prepare_agnostic_model(config)
    model.load_state_dict(state_dict)
    return model

model_paths = ['y8m-2019-trained/segment/dbof-3_0_20191030-0608/model.pth']

#@telegram_sender(token=BOT_TOKEN, chat_id=CHAT_ID, name="Inferencing")

test_ds = YoutubeTestDataset(
    collect_file_paths(), offset=0, device="cpu")
loader = DataLoader(
    test_ds, batch_size=BATCH_SIZE, num_workers=1,
    collate_fn=partial(
        collate_test_segments,
        return_vid=False),
    pin_memory=True)
#ref_indices = joblib.load("data/cache/ref_indices.jl")

In [None]:
import glob
import joblib
import numpy as np
import pandas as pd

from tqdm import tqdm
from inference_memmap import collect_file_paths
from dataloader import YoutubeTestDataset, DataLoader, collate_test_segments

DATA_DIR_STR = "data/Youtube8MData"
def collect_file_paths():
    return list(glob.glob(str(DATA_DIR_STR + "/test/*.tfrecord")))

test_ds = YoutubeTestDataset(
        collect_file_paths(), offset=3, device="cpu",
        vocab_path="data/segment_vocabulary.csv"
        )
vid_mapping = {}
video_lengths = []
global_indices = np.zeros(1704348, dtype="int8")
global_vids = np.zeros(1704348, dtype="int32")

vidid_arr =[]
for i, (video_features, segment_row, index, vid) in tqdm(enumerate(test_ds), total=433376):
    video_lengths.append(video_features.size(0))
    global_indices[i] = index
    if vid not in vid_mapping:
        vid_mapping[vid] = len(vid_mapping)
    global_vids[i] = vid_mapping[vid]
    vidid_arr.append(vid)

joblib.dump(global_indices, "data/cache/ref_indices.jl")
joblib.dump(global_vids, "data/cache/ref_vids.jl")
joblib.dump(vid_mapping, "data/cache/vid_mapping.jl")

In [None]:
vidid_arr[80]

In [None]:
from dataloader import YoutubeVideoDataset, collate_videos
from torch.utils.data import DataLoader

# Path to the TFRecord file
tfrecord_path = 'data/Youtube8MData/test/test0100.tfrecord'

# Create a dataset instance
dataset = YoutubeVideoDataset([tfrecord_path], epochs=1)

# Create a DataLoader
loader = DataLoader(dataset, num_workers=0, batch_size=1, collate_fn=collate_videos)

# Iterate through the DataLoader
for i, (data, masks, labels) in enumerate(loader):
    # 'data' contains the video data
    print(f"Video Data Shape: {data.size()}")
    print(f"Video Masks Shape: {masks.size()}")
    print(f"Labels Shape: {labels.size()}")
    break  # Stop after processing the first batch

In [None]:
    # Assume video_data is a 4D array with dimensions (num_frames, height, width, channels)
    num_frames, height, width, _ = video_data.shape
    
    # Concatenate frames along the height axis to create a vertical strip
    video_strip = np.concatenate(video_data, axis=1)
    
    # Convert BGR to RGB if necessary
    if video_strip.shape[-1] == 3:
        video_strip = cv2.cvtColor(video_strip, cv2.COLOR_BGR2RGB)
    
    # Display the video strip
    display(HTML(f'<img src="data:image/png;base64,{image_to_base64(video_strip)}">'))
    
    # Break after processing the first video
    break

In [None]:
# Specify the path to your TFRecord file
tfrecord_path = 'data/Youtube8MData/test/test0100.tfrecord'

# Create a TFRecordDataset
dataset = tf.data.TFRecordDataset(tfrecord_path)

In [None]:
# Iterate through the records and print their structure
for raw_record in dataset.take(1):  # Take one record as an example
    example = tf.train.Example()
    example.ParseFromString(raw_record.numpy())
    print(example)

In [None]:
import json 

# Define a list to store decoded examples
decoded_examples = []

# Iterate through the records and decode them
for record in dataset.take(1):  # Take the first 5 records for example
    # Parse the record
    example = tf.train.Example()
    example.ParseFromString(record.numpy())
    
    # Convert the example to a dictionary for easier serialization
    example_dict = {}
    for key, feature in example.features.feature.items():
        # Convert feature to a format that can be serialized
        if feature.HasField('bytes_list'):
            example_dict[key] = feature.bytes_list.value[0].decode('utf-8')
        elif feature.HasField('int64_list'):
            example_dict[key] = feature.int64_list.value
        elif feature.HasField('float_list'):
            example_dict[key] = feature.float_list.value
    
    decoded_examples.append(example_dict)

# Store the decoded examples in a JSON file
json_path = 'decoded_examples.json'
with open(json_path, 'w') as json_file:
    json.dump(decoded_examples, json_file, indent=2)

In [None]:
decoded_examples

In [None]:
# TFRecord file path
tfrecord_path = 'data/Youtube8MData/training/train3749.tfrecord'

# Create a TFRecordDataset
dataset = tf.data.TFRecordDataset(tfrecord_path)

# Parse the TFRecord using the parse_tfrecord_fn
parsed_dataset = dataset.map(parse_tfrecord_fn)

In [None]:
parsed_dataset

In [None]:
# Assuming parsed_dataset is your tf.data.Dataset
for parsed_example in parsed_dataset.take(1):  # Take only one example
    # Access the first 'rgb' feature
    first_rgb_feature = parsed_example[0]['rgb'].numpy()

    # Assuming 'rgb' is a byte string, print the first 10 bytes for illustration
    print(f"First value of 'rgb' feature: {first_rgb_feature[:10]}")

In [None]:
# Extract relevant information from the parsed dataset
for parsed_example in parsed_dataset:
    # Extract features
    rgb_features = parsed_example['rgb'].values
    labels = parsed_example['labels'].values.numpy()
    video_id = parsed_example['id'].numpy().decode('utf-8')
    mean_rgb = parsed_example['mean_rgb'].numpy()

    # Decode RGB features
    decoded_frames = [decode_rgb_feature(rgb_feature) for rgb_feature in rgb_features]

    # Reconstruct video from decoded frames (using OpenCV)
    output_video_path = f'reconstructed_videos/{video_id}.avi'
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_writer = cv2.VideoWriter(output_video_path, fourcc, 1, (width, height))
    for frame in decoded_frames:
        video_writer.write(frame)
    video_writer.release()

    print(f"Video saved: {output_video_path}")
    print(f"Labels: {labels}")
    print(f"Mean RGB features: {mean_rgb}")

In [None]:
import math
import glob
import argparse
from functools import partial
from pathlib import Path

import torch
import pandas as pd
from tqdm import tqdm
import numpy as np
import joblib
import yaml

from train_pure_segment import DATA_DIR_STR, prepare_model as prepare_agnostic_model
from train_segment_w_context import prepare_model as prepare_aware_model
from dataloader import YoutubeTestDataset, DataLoader, collate_test_segments
from telegram_tokens import BOT_TOKEN, CHAT_ID
from telegram_sender import telegram_sender

BATCH_SIZE = 32

def inverse_label_mapping(vocab_path="./data/segment_vocabulary.csv"):
    vocab = pd.read_csv(vocab_path)
    return {
        index: label for label, index in zip(vocab["Index"], vocab.index)
    }

DATA_DIR_STR = "data/Youtube8MData"
def collect_file_paths():
    return list(glob.glob(str(DATA_DIR_STR + "/dummytrain/*.tfrecord")))

def prepare_model(model_path):
    with open('y8m-2019-trained/segment/dbof-3_0_20191030-0608/config.yaml') as fin:
        config = yaml.safe_load(fin)
    state_dict = torch.load(str(model_path),map_location=torch.device('cpu'))
    if "context_base" in config:
        # context-aware model
        model = prepare_aware_model(config)
    else:
        # context-agnostic model
        model = prepare_agnostic_model(config)
    model.load_state_dict(state_dict)
    return model

model_paths = ['y8m-2019-trained/segment/dbof-3_0_20191030-0608/model.pth']

#@telegram_sender(token=BOT_TOKEN, chat_id=CHAT_ID, name="Inferencing")

test_ds = YoutubeTestDataset(
    collect_file_paths(), offset=0, device="cpu")
loader = DataLoader(
    test_ds, batch_size=BATCH_SIZE, num_workers=1,
    collate_fn=partial(
        collate_test_segments,
        return_vid=False),
    pin_memory=True)
#ref_indices = joblib.load("data/cache/ref_indices.jl")

In [None]:
with torch.no_grad():
    models = []
    predictions = []
    for model_path in model_paths:
        target_path = f"data/cache/predictions/trainpred.npy"
        if Path(target_path).exists():
            print("Skipping ", model_path)
            continue
        print(model_path)
        model = prepare_model(model_path)
        models.append(model.eval())
        predictions.append(np.memmap(
            target_path,
            "int16", mode="w+", shape=(43300, 1000)
        ))
    if not models:
        print("No eligible models found!")
    global_indices = np.zeros(43300, dtype="int8")
    cnt = 0
    for video_features, video_masks, segment_features, indices in tqdm(
            loader, total=int(math.ceil(43300 / BATCH_SIZE))):
        # shape(frames / 5, n_classes)
        n_segments = segment_features.shape[0]
        for i, model in enumerate(models):
            # value range 0 ~ 9999
            probs = np.round(torch.sigmoid(
                model(
                    video_features, video_masks, segment_features
                )
            ).cpu().numpy() * 9999, 0).astype("int16")
            predictions[i][cnt:cnt+n_segments] = probs
        global_indices[cnt:cnt+n_segments] = indices
        cnt += n_segments
    #assert np.array_equal(ref_indices, global_indices)

In [None]:
predictions

In [None]:
import glob
import joblib
import numpy as np
import pandas as pd

from tqdm import tqdm
from inference_memmap import collect_file_paths
from dataloader import YoutubeTestDataset, DataLoader, collate_test_segments

DATA_DIR_STR = "data/Youtube8MData"
def collect_file_paths():
    return list(glob.glob(str(DATA_DIR_STR + "/dummytrain/*.tfrecord")))

test_ds = YoutubeTestDataset(
        collect_file_paths(), offset=3, device="cpu",
        vocab_path="data/segment_vocabulary.csv"
        )
vid_mapping = {}
video_lengths = []
global_indices = np.zeros(1704348, dtype="int8")
global_vids = np.zeros(1704348, dtype="int32")

vidid_arr =[]
for i, (video_features, segment_row, index, vid) in tqdm(enumerate(test_ds), total=433376):
    video_lengths.append(video_features.size(0))
    global_indices[i] = index
    if vid not in vid_mapping:
        vid_mapping[vid] = len(vid_mapping)
    global_vids[i] = vid_mapping[vid]
    vidid_arr.append(vid)

joblib.dump(global_indices, "data/cache/ref_indices.jl")
joblib.dump(global_vids, "data/cache/ref_vids.jl")

In [None]:
len(vidid_arr)

In [None]:
predictions_dict = {}
top_n = 5
vocab = pd.read_csv('data/segment_vocabulary.csv')

# Iterate over each segment and update the dictionary
for i in range(len(vidid_arr)):
    vid_id = vidid_arr[i]
    
    # Get the indices of the top_n classes
    top_indices = np.argsort(predictions[0][i])[-top_n:][::-1]
    
    # Get the corresponding class names and values
    top_classes = [vocab.iloc[index]['Name'] for index in top_indices]
    top_values = 0.0001*predictions[0][i][top_indices]

    if vid_id not in predictions_dict:
        predictions_dict[vid_id] = [list(zip(top_classes, top_values))]
    else:
        predictions_dict[vid_id].append(list(zip(top_classes, top_values)))

In [None]:
predictions_dict

In [None]:
import json

# Convert the dictionary to JSON format
json_data = json.dumps(predictions_dict, indent=2)

# Save the JSON data to a file
with open('predictions.json', 'w') as json_file:
    json_file.write(json_data)

# Alternatively, you can print the JSON data
print(json_data)

In [3]:
import json

# Read the JSON data from the file
with open('predictions.json', 'r') as json_file:
    loaded_data = json.load(json_file)

# Extract the first 5 elements
subset_data = {key: loaded_data[key][:5] for key in loaded_data}

# Print the subset of the loaded JSON data
print(json.dumps(subset_data, indent=2))

{
  "lKbF": [
    [
      [
        "Racing",
        0.9965
      ],
      [
        "Sports car",
        0.9803000000000001
      ],
      [
        "Go-kart",
        0.9630000000000001
      ],
      [
        "Road racing",
        0.9591000000000001
      ],
      [
        "Motorsport",
        0.9265
      ]
    ],
    [
      [
        "Racing",
        0.9956
      ],
      [
        "Sports car",
        0.9809
      ],
      [
        "Road racing",
        0.9276000000000001
      ],
      [
        "Bugatti Automobiles",
        0.9229
      ],
      [
        "Porsche 911",
        0.9095000000000001
      ]
    ],
    [
      [
        "Racing",
        0.9963000000000001
      ],
      [
        "Sports car",
        0.9835
      ],
      [
        "Go-kart",
        0.9525
      ],
      [
        "Porsche 911",
        0.8812000000000001
      ],
      [
        "Motorsport",
        0.8745
      ]
    ],
    [
      [
        "Racing",
        0.9915
      ],
     