In [162]:
import pandas as pd
import os
import os.path as osp
from glob import glob
import sys

# Add the parent directory to the Python path
sys.path.insert(0, '..')

import torch

In [2]:
path_to_root_folder = '/videos/mpi_data/2Itzik/MPIIGroupInteraction'

labels_folder = 'multimediate23-main/bodily_behaviour/sample_lists'
train_data_path = osp.join(path_to_root_folder,labels_folder,'train_samples.csv')
val_data_path = osp.join(path_to_root_folder,labels_folder,'val_samples.csv')

train_files_folder = osp.join(path_to_root_folder, 'clips_train')
val_files_folder = osp.join(path_to_root_folder, 'clips_val')

In [3]:
df_train_raw = pd.read_csv(train_data_path)
df_val_raw = pd.read_csv(val_data_path)

In [4]:
df_train_raw.head()

Unnamed: 0,sample_id,rec_no,subject_pos,start_time,end_time,Settle,Legs crossed,Groom,Hand-mouth,Fold arms,Leg movement,Scratch,Gesture,Hand-face,Adjusting clothing,Fumble,Shrug,Stretching,Smearing hands
0,34663,24,3,299533.3333,301666.6667,1,1,0,0,0,0,0,0,0,0,0,0,0,0
1,15218,14,3,903266.6667,905400.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,643,7,3,205666.6667,207800.0,0,0,0,0,0,0,0,1,0,0,0,0,1,0
3,21749,17,3,807266.6667,809400.0,0,0,0,0,0,0,0,1,0,0,0,1,0,0
4,960,7,3,881933.3333,884066.6667,0,0,0,1,0,0,0,0,0,0,0,0,0,0


In [5]:
# Structure of the final dataset: filename, labels
def get_video_list(files_folder):
    all_files = glob(osp.join(files_folder, '*.mp4'))
    base_names = [osp.basename(f) for f in all_files]
    sample_id = [int(osp.splitext(f)[0].split('-')[0]) for f in base_names]
    view = [(osp.splitext(f)[0].split('-video')[-1]) for f in base_names]
    df = pd.DataFrame(zip(all_files, base_names, sample_id, view), columns=['filenames', 'base_name', 'sample_id', 'view'])
    return df
df_train_files = get_video_list(files_folder=train_files_folder)
df_val_files = get_video_list(files_folder=val_files_folder)
# file_sample_ids.sort()
# file_sample_ids
# df_train_files

In [6]:
df_train_files.loc[df_train_files['view'].isin(['1','2'])].head()

Unnamed: 0,filenames,base_name,sample_id,view
1,/videos/mpi_data/2Itzik/MPIIGroupInteraction/c...,30811-video1.mp4,30811,1
3,/videos/mpi_data/2Itzik/MPIIGroupInteraction/c...,37137-video1.mp4,37137,1
4,/videos/mpi_data/2Itzik/MPIIGroupInteraction/c...,20089-video1.mp4,20089,1
5,/videos/mpi_data/2Itzik/MPIIGroupInteraction/c...,34758-video1.mp4,34758,1
6,/videos/mpi_data/2Itzik/MPIIGroupInteraction/c...,27883-video2.mp4,27883,2


In [7]:
def get_left_pos(pos, max_n):
    # Increment pos and circle back to 1 if it becomes 5
    left_pos = (pos + 1) % (max_n+1)
    if left_pos == 0:
        left_pos = 1
    return left_pos

def get_right_pos(pos, max_n):
    # Decrement pos and circle back to 4 if it becomes 0
    right_pos = (pos - 1) % (max_n+1)
    if right_pos == 0:
        right_pos = max_n
    return right_pos

In [10]:
def get_corresponding_views(df):
    f1_view = []
    f2_view = []
    left_view = []
    right_view = []
    for v, r in df.iterrows():
        sample = r.sample_id
        rec_no = r.rec_no
        subject_pos = r.subject_pos
        start_time = r.start_time

        f1_view.append(f'{int(sample):05d}-video1.mp4')
        f2_view.append(f'{int(sample):05d}-video2.mp4')

        # find left sample id
        left_subject_pos = get_left_pos(subject_pos, 4)
        # print(f'{subject_pos=}')
        # print(f'{left_subject_pos=}')
        try:
            left_sample = int(df.loc[(df['subject_pos'] == left_subject_pos)&(df['rec_no']==rec_no)&(df['start_time']==start_time)]['sample_id'].values[0])
            
            left_file_name = f'{left_sample:05d}-video2.mp4'
        except:
            left_file_name = None
        left_view.append(left_file_name)

        # find right sample id
        right_subject_pos = get_right_pos(subject_pos, 4)
        try:
            right_sample = int(df.loc[(df.subject_pos == right_subject_pos)&(df.rec_no==rec_no)&(df.start_time==start_time)].sample_id.values[0])
            right_file_name = f'{right_sample:05d}-video1.mp4'
        except:
            right_file_name = None      
        right_view.append(right_file_name)

    df['front1'] = f1_view
    df['front2'] = f2_view
    df['right'] = right_view
    df['left'] = left_view

    return df

In [11]:
df_train_view = get_corresponding_views(df_train_raw)
df_val_view = get_corresponding_views(df_val_raw)


In [12]:
# df_train.loc[df_train['right'].isna()].head(10)

In [13]:
def find_missing_subjects(df):
    # Define a function to check for missing values
    def find_missing(group):
        missing_subjects = [pos for pos in range(1, 5) if pos not in group['subject_pos'].values]
        return missing_subjects if missing_subjects else None
    
    # Group by "rec_no" and apply the function to find missing subjects
    missing_subjects_per_rec = df.groupby('rec_no').apply(find_missing)
    
    # Create a new DataFrame with the information
    missing_subjects_df = pd.DataFrame(missing_subjects_per_rec, columns=['missing_subjects']).reset_index()
    
    return missing_subjects_df

result_df_val = find_missing_subjects(df_val_raw)
result_df_val['dataset'] = 'val'

result_df_train = find_missing_subjects(df_train_raw)
result_df_train['dataset'] = 'train'

result_df = pd.concat([result_df_val, result_df_train])
result_df.sort_values(by=['dataset','rec_no'], inplace=True)

# result_df

In [14]:
def melt_df(df):
    # Unpivot the columns front1, front2, right, and left
    unstacked_df = pd.melt(df, id_vars=df.columns.difference(['front1', 'front2', 'right', 'left']),
                        value_vars=['front1', 'front2', 'right', 'left'], var_name='view', value_name='filenames')

    # Filter out rows where filename is None
    unstacked_df = unstacked_df.dropna(subset=['filenames'])
    return unstacked_df

df_train_melted = melt_df(df_train_view)
df_val_melted = melt_df(df_val_view)


In [15]:
df_val_melted.head()

Unnamed: 0,Adjusting clothing,Fold arms,Fumble,Gesture,Groom,Hand-face,Hand-mouth,Leg movement,Legs crossed,Scratch,...,Shrug,Smearing hands,Stretching,end_time,rec_no,sample_id,start_time,subject_pos,view,filenames
0,0,0,0,0,0,0,0,0,0,0,...,0,0,1,990733.3,9,4298,988600.0,1,front1,04298-video1.mp4
1,0,0,0,1,0,0,0,0,0,0,...,0,0,0,60600.0,28,42222,58466.67,4,front1,42222-video1.mp4
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,280333.3,9,5609,278200.0,4,front1,05609-video1.mp4
3,0,0,0,0,0,1,0,1,1,0,...,0,0,0,1133667.0,27,41081,1131533.0,4,front1,41081-video1.mp4
4,0,0,1,0,0,0,0,0,1,0,...,0,1,0,822200.0,10,6959,820066.7,2,front1,06959-video1.mp4


In [16]:
feature_names_remap = {
    'Adjusting clothing': 'Adjusting_clothing',
    'Fold arms': 'Fold_arms',
    'Fumble': 'Fumble',
    'Gesture': 'Gesture',
    'Groom': 'Groom',
    'Hand-face': 'Hand_face',
    'Hand-mouth': 'Hand_mouth',
    'Leg movement': 'Leg_movement',
    'Legs crossed': 'Legs_crossed',
    'Scratch': 'Scratch',
    'Settle': 'Settle',
    'Shrug': 'Shrug',
    'Smearing hands': 'Smearing_hands',
    'Stretching': 'Stretching',
}

feature_names = feature_names_remap.values()


In [67]:
def get_activities(labels, actions):
    found_actions = []
    for label, action in zip(labels, actions):
        if action == 1:
            found_actions.append(label)
    if not found_actions:
        return ["idle"]
    else:
        return found_actions

In [17]:
def get_final_df(df_in):

    df = pd.DataFrame()
    df_in.rename(columns=feature_names_remap, inplace=True)
    labels_list = df_in[feature_names].apply(lambda x: [val for val in x if val != ''], axis=1)
    metadata_dict = df_in[['rec_no', 'subject_pos', 'start_time', 'end_time', 'filenames', 'view', 'sample_id']].apply(lambda x: x.dropna().to_dict(), axis=1)

    df['filenames'] = df_in['filenames']
    df['labels'] = labels_list
    df['view'] = df_in['view']
    df['metadata'] = metadata_dict
    return df

In [18]:
df_train = get_final_df(df_train_melted)
df_val = get_final_df(df_val_melted)

In [19]:
df_val.head()

Unnamed: 0,filenames,labels,view,metadata
0,04298-video1.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",front1,"{'rec_no': 9, 'subject_pos': 1, 'start_time': ..."
1,42222-video1.mp4,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",front1,"{'rec_no': 28, 'subject_pos': 4, 'start_time':..."
2,05609-video1.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",front1,"{'rec_no': 9, 'subject_pos': 4, 'start_time': ..."
3,41081-video1.mp4,"[0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]",front1,"{'rec_no': 27, 'subject_pos': 4, 'start_time':..."
4,06959-video1.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]",front1,"{'rec_no': 10, 'subject_pos': 2, 'start_time':..."


In [20]:
# ensure that all files exist
train_existing_filenames = set([a for a in df_train_files.base_name.values if 'video.' not in a]) # ignore cropped videos
val_existing_filenames = set([a for a in df_val_files.base_name.values if 'video.' not in a])

labeled_train_filenames = set(df_train['filenames'].values)
labeled_val_filenames = set(df_val['filenames'].values)

In [21]:
print(f'{len(train_existing_filenames)=}')
print(f'{len(labeled_train_filenames)=}')

print(f'{len(val_existing_filenames)=}')
print(f'{len(labeled_val_filenames)=}')

len(train_existing_filenames)=61346
len(labeled_train_filenames)=62442
len(val_existing_filenames)=22992
len(labeled_val_filenames)=22992


In [22]:
train_missing1 = train_existing_filenames-labeled_train_filenames
train_misssing_files = labeled_train_filenames-train_existing_filenames


In [23]:
missing_fname = list(train_misssing_files)[1].split('-')[0]
[a for a in train_existing_filenames if missing_fname in a]

['10283-video2.mp4']

In [24]:
# import zipfile

# def count_files_in_zip(zip_filename):
#     with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
#         file_list = zip_ref.namelist()
#         num_files = len(file_list)
#     return num_files, file_list

# train_zip_filename = osp.join(path_to_root_folder,'zipfiles', 'clips_train.zip')
# num_zip_train,zip_train_file_list = count_files_in_zip(train_zip_filename)
# print(f'{num_zip_train=}')

In [25]:
# fnames = [a.split('/')[-1] for a in zip_train_file_list]
# missing_fname = list(train_misssing_files)[1].split('-')[0]

# [a for a in fnames if missing_fname in a]

In [26]:
# remove missing files from the table
train_include = train_existing_filenames.intersection(labeled_train_filenames)
df_train_clean = df_train.loc[df_train['filenames'].isin(train_include)]

val_include = val_existing_filenames.intersection(labeled_val_filenames)
df_val_clean = df_val.loc[df_val['filenames'].isin(val_include)]
df_val_clean.head(100)


Unnamed: 0,filenames,labels,view,metadata
0,04298-video1.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]",front1,"{'rec_no': 9, 'subject_pos': 1, 'start_time': ..."
1,42222-video1.mp4,"[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]",front1,"{'rec_no': 28, 'subject_pos': 4, 'start_time':..."
2,05609-video1.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",front1,"{'rec_no': 9, 'subject_pos': 4, 'start_time': ..."
3,41081-video1.mp4,"[0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0]",front1,"{'rec_no': 27, 'subject_pos': 4, 'start_time':..."
4,06959-video1.mp4,"[0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0]",front1,"{'rec_no': 10, 'subject_pos': 2, 'start_time':..."
...,...,...,...,...
95,03314-video1.mp4,"[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",front1,"{'rec_no': 8, 'subject_pos': 4, 'start_time': ..."
96,04670-video1.mp4,"[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",front1,"{'rec_no': 9, 'subject_pos': 2, 'start_time': ..."
97,42002-video1.mp4,"[0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0]",front1,"{'rec_no': 28, 'subject_pos': 3, 'start_time':..."
98,04031-video1.mp4,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]",front1,"{'rec_no': 9, 'subject_pos': 1, 'start_time': ..."


In [27]:
df_val_clean.to_csv(osp.join('..','mpigroup','val.csv'))
df_train_clean.to_csv(osp.join('..','mpigroup','train.csv'))


In [28]:
df_val_clean.view.unique()

array(['front1', 'front2', 'right', 'left'], dtype=object)

In [170]:
from torch.utils.data import DataLoader
import dyadic_communication
from importlib import reload
import debug_model
import run_videomae_vis_v2

reload(dyadic_communication)
reload(debug_model)
reload(run_videomae_vis_v2)

from argparse import Namespace
import mpigroup.const as const


In [132]:
reload(const)
cropping_map = const.cropping_map
cropping_map

{'front1': 'tr', 'front2': 'tl', 'right': 'bl', 'left': 'br'}

In [133]:
data_path='/home/ubuntu/data_local/MPIIGroupInteraction/clips_val/'
args = debug_model.args
args.anno_path = osp.join('..','mpigroup','val.csv')
args.data_path = data_path
args.mode = 'validation'
args.test_mode = False
dataset = dyadic_communication.DyadicvideoClsDataset(
          anno_path=args.anno_path,
            data_path=args.data_path,
            mode=args.mode,
            clip_len=1,
            num_segment=args.num_frames,
            test_num_segment=args.test_num_segment,
            test_num_crop=args.test_num_crop,
            num_crop=1 if not args.test_mode else 3,
            keep_aspect_ratio=True,
            crop_size=args.input_size,
            short_side_size=args.short_side_size,
            new_height=224,
            new_width=224,
            view_crop_mapping=cropping_map,
            corner_crop_size=1000,
            args=args)



In [134]:

data = DataLoader(dataset=dataset, batch_size=10, shuffle=True)
iterdata = iter(data)

In [135]:
d = next(iterdata)

In [136]:
d[0].shape

torch.Size([10, 3, 16, 224, 224])

In [137]:
d[2:]

[('39215-video2',
  '04041-video2',
  '38408-video1',
  '07490-video2',
  '01791-video2',
  '42234-video2',
  '04891-video1',
  '03696-video2',
  '41446-video2',
  '07116-video1'),
 {'rec_no': tensor([26,  9, 26, 10,  8, 28,  9,  8, 28, 10]),
  'subject_pos': tensor([4, 4, 3, 3, 4, 4, 2, 3, 2, 3]),
  'start_time': tensor([ 657933.3333,  440333.3333,  105400.0000,  783800.0000,  316600.0000,
            84066.6667, 1084600.0000,  873400.0000,  741133.3333, 1155000.0000],
         dtype=torch.float64),
  'end_time': tensor([ 660066.6667,  442466.6667,  107533.3333,  785933.3333,  318733.3333,
            86200.0000, 1086733.3330,  875533.3333,  743266.6667, 1157133.3330],
         dtype=torch.float64),
  'filenames': ['39215-video2.mp4',
   '04041-video2.mp4',
   '38408-video1.mp4',
   '07490-video2.mp4',
   '01791-video2.mp4',
   '42234-video2.mp4',
   '04891-video1.mp4',
   '03696-video2.mp4',
   '41446-video2.mp4',
   '07116-video1.mp4'],
  'view': ['front2',
   'left',
   'front1',
 

In [138]:
# [a for a in dataset.view_list if 'front' in a]

In [139]:
# from run_videomae_vis_v2 import save_video

In [140]:
d[1]

tensor([[0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0],
        [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [141]:
actions = d[1][1]
labels = feature_names
ret = get_activities(labels, actions)
ret 

['Fumble']

In [142]:
reload(run_videomae_vis_v2)

vids = d[0]
labels = d[1]
views = d[3]['view']
fnames = d[3]['filenames']

for vid,label, view, fname in zip(vids,labels, views, fnames):
       
    ret = get_activities(feature_names, label)

    word_list = [f'View: {view}']+['Labels:']+[','.join(ret)]
    txt = '\n'.join(word_list)
    vid = run_videomae_vis_v2.unnormalize_frames(vid)
    run_videomae_vis_v2.save_video(vid, osp.join('..','mpigroup','testing_'+fname), txt=txt)
    


In [143]:
txt_list

['left', 'idle']

In [39]:
# video_path = osp.join(path_to_root_folder,'clips_val','05942-video1.mp4')
# video_path

In [145]:
model_dict = {
    'experiment':'MPIG_densepose_dual_2',
    'description':'MPIG_densepose_dual - videoMAE-K400 , same as K400 but then was finetuned on MPIGroupInteractions dataset (train set) for 100 epochs, with denspose as additional decoding target',
    'checkpoint_path':'/videos/pretrained/MPIIGroupInteraction/k400_finetune_videomae_pretrain_dual_2_patch16_224_frame_16x4_tube_mask_ratio_0.9_e100/checkpoint-99.pth',
    'model_name':'pretrain_videomae_base_patch16_224_densepose_dual',
}

In [149]:
image_batch = d[0]
save_folder = osp.join('..','mpigroup','videos')
model_path = model_dict['checkpoint_path']
model_name = model_dict['model_name']

args = Namespace(
        image_batch=image_batch,
        save_path=save_folder, # list
        model_path=model_path, 
        mask_type='tube',
        num_frames=16,
        sampling_rate=4,
        decoder_depth=4,
        input_size=224,
        device='cuda:0',
        imagenet_default_mean_and_std=True,
        mask_ratio=0,
        model=model_name,
        densepose=True,
        drop_path=0.0)



In [163]:
model = run_videomae_vis_v2.get_model(args=args) 

checkpoint = torch.load(args.model_path, map_location='cpu')
model.load_state_dict(checkpoint['model'])
model.eval()

# outputs = model(image_batch)   

Creating model: pretrain_videomae_base_patch16_224_densepose_dual


PretrainVisionTransformerMultiOutout(
  (encoder): PretrainVisionTransformerEncoder(
    (patch_embed): PatchEmbed(
      (proj): Conv3d(3, 768, kernel_size=(2, 16, 16), stride=(2, 16, 16))
    )
    (blocks): ModuleList(
      (0-11): 12 x Block(
        (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (attn): Attention(
          (qkv): Linear(in_features=768, out_features=2304, bias=False)
          (attn_drop): Dropout(p=0.0, inplace=False)
          (proj): Linear(in_features=768, out_features=768, bias=True)
          (proj_drop): Dropout(p=0.0, inplace=False)
        )
        (drop_path): Identity()
        (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
        (mlp): Mlp(
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (act): GELU(approximate='none')
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (drop): Dropout(p=0.0, inplace=False)
        )
      )
    )
    (norm): Laye

In [164]:
outputs = model(image_batch) 

In [173]:
reload(run_videomae_vis_v2)

rec_videos_patches = outputs[0]
rec_densepose_patches = outputs[1]
patch_size = model.encoder.patch_embed.patch_size
unnorm_videos = run_videomae_vis_v2.unnormalize_frames(img=image_batch)
_, rec_videos, _ = run_videomae_vis_v2.reconstruct_video_from_patches(
    ori_img=unnorm_videos, patch_size=patch_size, bool_masked_pos=None, outputs=rec_videos_patches, frame_id_list=None)
_, rec_densepose, _ = run_videomae_vis_v2.reconstruct_video_from_patches(
    ori_img=unnorm_videos, patch_size=patch_size, bool_masked_pos=None, 
    outputs=rec_densepose_patches, frame_id_list=None,normalize_with_orig=False)

In [174]:
# rec_densepose[0]

In [177]:
import os
vids = d[0]
labels = d[1]
views = d[3]['view']
fnames = d[3]['filenames']

for ori_vid,rec_vid,rec_dense,label, view, fname in zip(vids,rec_videos,rec_densepose,labels, views, fnames):
       
    ret = get_activities(feature_names, label)

    word_list = [f'View: {view}']+['Labels:']+[','.join(ret)]
    txt = '\n'.join(word_list)

    ori_vid = run_videomae_vis_v2.unnormalize_frames(ori_vid)
    rec_dense = run_videomae_vis_v2.unnormalize_frames(rec_dense)
    # rec_vid = run_videomae_vis_v2.unnormalize_frames(rec_vid)
    

    save_folder = osp.join('..','mpigroup','videos',fname.replace('.mp4',''))
    os.makedirs(save_folder, exist_ok=True)
    
    run_videomae_vis_v2.save_video(ori_vid, osp.join(save_folder,'ori_vid_'+fname), txt=txt)

    # run_videomae_vis_v2.save_video(ori_dense, osp.join(save_folder,'ori_dense_'+fname), txt=txt)

    run_videomae_vis_v2.save_video(rec_vid, osp.join(save_folder,'rec_vid_'+fname), txt=txt)

    run_videomae_vis_v2.save_video(rec_dense, osp.join(save_folder,'rec_dense_'+fname), txt=txt)
    

In [194]:
# labels analysis
df = df_train_melted
df[feature_names].sum()

Adjusting_clothing     1709
Fold_arms              9332
Fumble                18121
Gesture               21285
Groom                  1618
Hand_face              4142
Hand_mouth             3191
Leg_movement           9405
Legs_crossed          59380
Scratch                4236
Settle                 2436
Shrug                   632
Smearing_hands         1682
Stretching              213
dtype: int64

In [185]:
distribution = pd.DataFrame(df[feature_names].sum())
distribution

Unnamed: 0,0
Adjusting_clothing,1709
Fold_arms,9332
Fumble,18121
Gesture,21285
Groom,1618
Hand_face,4142
Hand_mouth,3191
Leg_movement,9405
Legs_crossed,59380
Scratch,4236


In [187]:
from sklearn.utils.class_weight import compute_class_weight
import numpy as np

In [191]:
import numpy as np
from sklearn.utils.class_weight import compute_class_weight
from sklearn.preprocessing import MultiLabelBinarizer


def generate_class_weights(class_series, multi_class=True, one_hot_encoded=False):
  """
  Method to generate class weights given a set of multi-class or multi-label labels, both one-hot-encoded or not.
  Some examples of different formats of class_series and their outputs are:
    - generate_class_weights(['mango', 'lemon', 'banana', 'mango'], multi_class=True, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 1.3333333333333333, 'mango': 0.6666666666666666}
    - generate_class_weights([[1, 0, 0], [0, 1, 0], [0, 0, 1], [1, 0, 0]], multi_class=True, one_hot_encoded=True)
    {0: 0.6666666666666666, 1: 1.3333333333333333, 2: 1.3333333333333333}
    - generate_class_weights([['mango', 'lemon'], ['mango'], ['lemon', 'banana'], ['lemon']], multi_class=False, one_hot_encoded=False)
    {'banana': 1.3333333333333333, 'lemon': 0.4444444444444444, 'mango': 0.6666666666666666}
    - generate_class_weights([[0, 1, 1], [0, 0, 1], [1, 1, 0], [0, 1, 0]], multi_class=False, one_hot_encoded=True)
    {0: 1.3333333333333333, 1: 0.4444444444444444, 2: 0.6666666666666666}
  The output is a dictionary in the format { class_label: class_weight }. In case the input is one hot encoded, the class_label would be index
  of appareance of the label when the dataset was processed. 
  In multi_class this is np.unique(class_series) and in multi-label np.unique(np.concatenate(class_series)).
  Author: Angel Igareta (angel@igareta.com)
  """
  if multi_class:
    # If class is one hot encoded, transform to categorical labels to use compute_class_weight   
    if one_hot_encoded:
      class_series = np.argmax(class_series, axis=1)
  
    # Compute class weights with sklearn method
    class_labels = np.unique(class_series)
    class_weights = compute_class_weight(class_weight='balanced', classes=class_labels, y=class_series)
    return dict(zip(class_labels, class_weights))
  else:
    # It is neccessary that the multi-label values are one-hot encoded
    mlb = None
    if not one_hot_encoded:
      mlb = MultiLabelBinarizer()
      class_series = mlb.fit_transform(class_series)

    n_samples = len(class_series)
    n_classes = len(class_series[0])

    # Count each class frequency
    class_count = [0] * n_classes
    for classes in class_series:
        for index in range(n_classes):
            if classes[index] != 0:
                class_count[index] += 1
    
    # Compute class weights using balanced method
    class_weights = [n_samples / (n_classes * freq) if freq > 0 else 1 for freq in class_count]
    class_labels = range(len(class_weights)) if mlb is None else mlb.classes_
    return dict(zip(class_labels, class_weights))

In [203]:
def get_class_weights(df,feature_names, alpha=10, beta=2):
    class_weights = {}
    positive_weights = {}
    negative_weights = {}
    # N = len(df)
    n_features = len(feature_names)

    N = np.sum(df[feature_names].to_numpy())
    for label in feature_names:
        if label in df.columns:
            positive_weights[label] = np.log((N+n_features) /(alpha * (sum(df[label] == 1))+1)*beta)
            negative_weights[label] = np.log((N+n_features) /(alpha * (sum(df[label] == 0))+1)*beta)
        else:
            positive_weights[label] = 0
            negative_weights[label] = 0


            
    # class_weights['positive_weights'] = pd.DataFrame.from_dict(positive_weights)
    # class_weights['negative_weights'] = pd.DataFrame.from_dict(negative_weights)
    class_weights = pd.DataFrame(zip(positive_weights.keys(),positive_weights.values(), negative_weights.values()),columns=['class','positive_weights','negative_weights'])        
    class_weights['method'] = 'inv'
    return class_weights


In [208]:
df = df_train_melted

class_weights = get_class_weights(df,feature_names, 1, 1)
class_weights = class_weights.drop('method',axis=1)
class_weights


Unnamed: 0,class,positive_weights,negative_weights
0,Adjusting_clothing,4.386374,0.173476
1,Fold_arms,2.689311,0.241746
2,Fumble,2.025741,0.326718
3,Gesture,1.864818,0.359165
4,Groom,4.441059,0.172688
5,Hand_face,3.501447,0.194763
6,Hand_mouth,3.76222,0.186388
7,Leg_movement,2.681519,0.242423
8,Legs_crossed,0.838893,0.865135
9,Scratch,3.479012,0.195594


In [209]:
# class_series = df[feature_names].values

# weights_dict_nums = generate_class_weights(class_series, multi_class=False, one_hot_encoded=True)
# weights_dict_labels = pd.DataFrame({t:v for t, v in zip(feature_names, weights_dict_nums.values())})
# weights_dict_labels

In [211]:
save_folder = '/home/ubuntu/efs/videoMAE/scripts/MPIIGroupInteraction/videomae_vit_base_patch16_224_kinetic_400_densepose_dual/dataset'

os.makedirs(save_folder, exist_ok=True)

df_train_clean.to_csv(osp.join(save_folder, 'train.csv'))
# df_test.to_csv(osp.join(save_folder, 'test.csv'))
df_val_clean.to_csv(osp.join(save_folder, 'val.csv'))
class_weights.to_csv(osp.join(save_folder, 'weights.csv'))

In [212]:
import json
wrapped_data = {'data': class_weights.to_dict(orient='records')}

# Save wrapped data as JSON
with open(osp.join(save_folder, 'weights.json'), 'w') as json_file:
    json.dump(wrapped_data, json_file, indent=4)

In [213]:
len(class_weights)

14