<a href="https://colab.research.google.com/github/hassssan051/portrait-video-synthesis/blob/audio-to-descriptor-pred/prediction/runLength_for_hierarchical_clustering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import os
import zipfile
from tqdm import tqdm
from google.colab import drive
from sklearn.preprocessing import MinMaxScaler

import torch.nn.functional as F

In [None]:
from collections import defaultdict
import pickle


#getting descriptors of frames of each video and storing this information in a dictionary (true_descriptors) where key is video name and value is a list of descriptors of its frames.

clusters_info = 'Sawaiz_2/pkl_for_lstm_encoded/17_53_50_800'
file_path = '/content/drive/MyDrive/'+clusters_info+'/live_portrait_descriptors_all_encoder.pkl'

# Open the file in binary read mode and load the data
with open(file_path, 'rb') as file:
    data = pickle.load(file)

video_dict = defaultdict(list)


# Populate the video_dict with frame arrays in order
for key, value in data.items():
    # Split the key to extract video name and frame number
    parts = key.split('/')
    if 'M' not in key: #For Ravdess data
      video_name = parts[1]  # Extracts '02-01-01-01-02-02-16'
      frame_number = int(parts[2].split('.')[0])  # Extracts frame number as an integer (e.g., 1)

    else: #for MEAD
      video_name = parts[0] + "__" + parts[2] + "__" + parts[3] + "__" + parts[4]
      frame_number = int(parts[-1].split(".")[0].split("_")[-1])
    # Append the frame array to the respective video entry in the dictionary
    video_dict[video_name].append((frame_number, value))



# Sort frames for each video by frame number and concatenate them into a single array
final_video_dict = {}
for video_name, frames in video_dict.items():
    # Sort frames by frame number to ensure the order is correct
    sorted_frames = sorted(frames, key=lambda x: x[0])
    # Extract only the frame data, discarding the frame numbers
    sorted_arrays = [frame_data for _, frame_data in sorted_frames]
    # Concatenate all frames into a single numpy array
    final_video_dict[video_name] = np.vstack(sorted_arrays)
true_descriptors = final_video_dict
videos_list = list(true_descriptors.keys())
print(len(videos_list))

9282


In [None]:
file_path = '/content/drive/MyDrive/'+clusters_info+'/cluster_rep_level4.pkl'

with open(file_path, 'rb') as file:
    clusters_data = pickle.load(file)

# Actual labels for the LP
file_path = '/content/drive/MyDrive/'+clusters_info+'/frame_to_cluster_level4.pkl'
with open(file_path, 'rb') as file:
    frames_data_raw = pickle.load(file)

In [None]:
cluster_level = 1
frames_to_clusters_indices = {}
clusters_indices= {}

# Populate the video_dict with frame arrays in order
for key, value in frames_data_raw.items():
    # Split the key to extract video name and frame number
    parts = key.split('/')
    if 'M' not in key: #For Ravdess data
      video_name = parts[1]  # Extracts '02-01-01-01-02-02-16'
      frame_number = int(parts[2].split('.')[0])  # Extracts frame number as an integer (e.g., 1)

    else: #for MEAD
      video_name = parts[0] + "__" + parts[2] + "__" + parts[3] + "__" + parts[4]
      frame_number = int(parts[-1].split(".")[0].split("_")[-1])
    # Append the frame array to the respective video entry in the dictionary
    if video_name not in frames_to_clusters_indices:
      frames_to_clusters_indices[video_name] = []
    try:
      #cluster_name = "-".join(value[cluster_level-1].split("."))
      cluster_name = value
      frames_to_clusters_indices[video_name].append((frame_number, cluster_name))
      clusters_indices[cluster_name]=0
    except:

      #cluster_name = "-".join(value[-1].split("."))
      cluster_name = value
      frames_to_clusters_indices[video_name].append((frame_number, cluster_name))
      clusters_indices[cluster_name]=0



clusters_descriptors = []
idx = 0
for key, val in clusters_indices.items():
  clusters_indices[key] = idx
  clusters_descriptors.append(clusters_data[key])
  idx+=1
clusters_descriptors = np.vstack(clusters_descriptors)

# Sort frames for each video by frame number and concatenate them into a single array
frames_data = {}
for video_name, frames in frames_to_clusters_indices.items():
    # Sort frames by frame number to ensure the order is correct
    sorted_frames = sorted(frames, key=lambda x: x[0])
    # Extract only the frame data, discarding the frame numbers
    sorted_arrays = [clusters_indices[frame_data] for _, frame_data in sorted_frames]
    # Concatenate all frames into a single numpy array
    frames_data[video_name] = sorted_arrays
#Here, frames_data is a dictionary where key is video name and value is list of cluster ids of its frames.

#Here, key is a video name and value is a list of cluster representatives of those clusters to which its frames are mapped

clusters_rep_as_ground_truth_for_a_video = {}
for video, frames in frames_data.items():
  stacked_clusters_rep = [clusters_descriptors[val] for val in frames]
  clusters_rep_as_ground_truth_for_a_video[video] = np.vstack(stacked_clusters_rep)

In [None]:
def compute_avg_run_length(data):
    total_run_length = 0  # Sum of all run lengths
    total_runs = 0        # Total number of runs

    for key, values in data.items():
        if not values:  # Skip empty lists
            continue

        # Track the current run length
        current_run_length = 1

        for i in range(1, len(values)):
            if values[i] == values[i - 1]:
                current_run_length += 1
            else:
                # Add the completed run to the totals
                total_run_length += current_run_length
                total_runs += 1
                current_run_length = 1  # Reset for the next run

        # Add the last run in the list
        total_run_length += current_run_length
        total_runs += 1

    # Compute average run length
    avg_run_length = total_run_length / total_runs if total_runs > 0 else 0
    return avg_run_length

avg_run_length = compute_avg_run_length(frames_data)
print(f"Average Run Length at level {cluster_level} is {avg_run_length}")

Average Run Length at level 1 is 14.525321942333061


In [None]:
'''
Average Run Length at level 1 is 31.481089176310416
Average Run Length at level 2 is 14.525321942333061
Average Run Length at level 3 is 8.777810677842565
Average Run Length at level 4 is 5.699609556017866
'''




'''
Average Run Length at level 4 is 6.2873838255847465
Average Run Length at level 3 is 9.571988011657172
Average Run Length at level 2 is 15.513283954593028
Average Run Length at level 1 is 31.475089839921594
'''

In [None]:
frames_count = {}
first_frame_clusters = {}
only_mead = {}
only_ravdess = {}
for key, val in frames_data.items():
  first_frame_clusters[val[0]] = 1
  if 'M' in key:
    only_mead[val[0]] =1
  else:
    only_ravdess[val[0]] = 1
  if val[0] not in frames_count:
    frames_count[val[0]] = 0
  frames_count[val[0]] += 1

print(len(list(first_frame_clusters.keys())))
print(len(list(only_mead.keys())))
print(len(list(only_ravdess.keys())))
vals = (list(frames_count.values()))
vals.sort(reverse= True)
valsnp = np.array(vals)
print(np.mean(valsnp))


253
239
157
36.687747035573125


In [None]:
print(frames_count)

{4: 469, 6: 570, 1: 754, 0: 257, 13: 845, 5: 492, 7: 614, 2: 319, 10: 717, 3: 834, 11: 263, 17: 148, 12: 783, 18: 114, 20: 188, 15: 70, 21: 38, 19: 55, 9: 631, 14: 224, 16: 41, 8: 856}
