Set `autoreload` to execute the change in `.py` files.


In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import _pickle
from pathlib import Path
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from __utils__ import image_processing
from __utils__ import label_processing
from __utils__ import labeling
from __utils__ import loso_preparing
from __utils__ import functions

In [None]:
dataset_dir = "D:/Databases/SAMM_longvideos"
# dataset_dir = "I:/HEH/Databases/SAMM_longvideos"
# dataset_dir = "/data/disk1/heh/databases/SAMM_longvideos"

images_loading = False
image_size = 128
load_cropped_images = False
# expression_type = "mae"  # macro-expression spotting
expression_type = "me"  # micro-expression spotting
debug_preds = True
labeling_function = "pseudo_labeling"
# labeling_function = "original_labeling"
model_names = {
    0: "SOFTNet",
    1: "SOFTNetCBAM",
    2: "ViT-B",
    3: "SL-ViT-B",
    4: "Swin-T",
    5: "Swin-S",
    6: "L-Swin-T",
    7: "S-Swin-T",
    8: "SL-Swin-T",
    9: "SL-Swin-S",
}
model_name = model_names[8]
batch_size = 48
epochs = 25
save_preds = False
preds_stem = (
    f"{expression_type}_"
    + model_name.lower().replace("-", "_")
    + f"_batch_size_{batch_size}"
    + f"_epochs_{epochs}"
    + f"_{labeling_function}"
    + f"_{image_size}"
)
preds_path = Path(dataset_dir, "preds", preds_stem).with_suffix(".pkl")
print(f"preds_path: {preds_path}")

## Load Images


When debug the image processing, the videos_images is from cropped_rawpic, whereas the other variables are from rawpic.


In [4]:
videos_images, subjects, subjects_videos_code = image_processing.load_images(
    dataset_dir,
    images_loading=images_loading,
    image_size=image_size,
    load_cropped_images=load_cropped_images,
)

subject_video: 006_1
subject_video: 006_2
subject_video: 006_3
subject_video: 006_4
subject_video: 006_5
subject_video: 006_6
subject_video: 006_7
subject_video: 007_3
subject_video: 007_4
subject_video: 007_5
subject_video: 007_6
subject_video: 007_7
subject_video: 008_1
subject_video: 008_5
subject_video: 008_6
subject_video: 008_7
subject_video: 009_2
subject_video: 009_3
subject_video: 009_4
subject_video: 009_6
subject_video: 009_7
subject_video: 010_1
subject_video: 010_2
subject_video: 010_3
subject_video: 010_4
subject_video: 010_5
subject_video: 010_6
subject_video: 010_7
subject_video: 011_1
subject_video: 011_2
subject_video: 011_3
subject_video: 011_4
subject_video: 011_5
subject_video: 011_6
subject_video: 011_7
subject_video: 012_3
subject_video: 012_4
subject_video: 012_5
subject_video: 012_6
subject_video: 012_7
subject_video: 013_1
subject_video: 013_2
subject_video: 013_3
subject_video: 013_6
subject_video: 013_7
subject_video: 014_1
subject_video: 014_2
subject_video

In [5]:
print("subjects:", subjects)
print("subjects_videos_code:", subjects_videos_code)

subjects: ['006', '007', '008', '009', '010', '011', '012', '013', '014', '015', '016', '017', '018', '019', '020', '021', '022', '023', '024', '025', '026', '028', '030', '031', '032', '033', '034', '035', '036', '037']
subjects_videos_code: [['1', '2', '3', '4', '5', '6', '7'], ['3', '4', '5', '6', '7'], ['1', '5', '6', '7'], ['2', '3', '4', '6', '7'], ['1', '2', '3', '4', '5', '6', '7'], ['1', '2', '3', '4', '5', '6', '7'], ['3', '4', '5', '6', '7'], ['1', '2', '3', '6', '7'], ['1', '2', '3', '4', '5', '6', '7'], ['1', '3', '5', '6', '7'], ['1', '2', '4', '5', '6', '7'], ['1', '2', '3', '4', '5', '6'], ['1', '2', '3', '4', '5', '6', '7'], ['1', '2', '3', '4', '5', '7'], ['1', '2', '3', '4', '5', '6', '7'], ['3', '7'], ['2', '3', '4', '5', '6'], ['1', '4'], ['2', '3', '5'], ['3', '4', '5', '6'], ['1', '2', '3', '5', '6', '7'], ['4'], ['1', '2', '5'], ['3'], ['2', '3', '4', '5', '6'], ['1', '2', '3', '4', '5', '6', '7'], ['3', '6', '7'], ['1', '2', '3', '4', '5', '6', '7'], ['2', '4',

## Load Excel 


In [7]:
Excel_data = label_processing.load_excel(dataset_dir)
Excel_data.head(5)

Unnamed: 0,subject,Filename,video_code,onset,apex,offset,Duration,expression_type,Action Units,Notes,subject_video_code,subject_code
0,6,006_1_1,1,566,648,743,178,Macro,4(B/C)+7B,,006_1,6
1,6,006_1_2,1,3562,3588,3632,71,Micro - 1/2,4+7,,006_1,6
2,6,006_1_3,1,1912,1948,1988,77,Micro - 1/2,4,While blinking,006_1,6
3,6,006_1_4,1,324,368,403,80,Micro - 1/2,4+7,,006_1,6
4,6,006_1_5,1,3343,3388,3424,82,Micro - 1/2,4+7,,006_1,6


## Load Ground Truth Labels


In [8]:
(
    clean_videos_images,
    clean_subjects_videos_code,
    clean_subjects,
    clean_subjects_videos_ground_truth_labels,
) = label_processing.load_ground_truth_labels(
    dataset_dir,
    expression_type,
    videos_images,
    subjects_videos_code,
    subjects,
    Excel_data,
)

required_videos_index:  [0, 1, 2, 4, 7, 8, 9, 10, 11, 16, 17, 22, 24, 28, 29, 30, 31, 32, 33, 34, 35, 39, 40, 44, 45, 46, 47, 49, 50, 51, 54, 62, 65, 68, 69, 71, 73, 75, 78, 79, 80, 82, 85, 88, 90, 91, 92, 93, 94, 96, 98, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 114, 115, 117, 118, 120, 121, 122, 128, 130, 131, 134, 135, 136, 137, 141, 143, 144]
len(clean_videos_images) = 79


In [9]:
print("len(clean_subjects): ", len(clean_subjects))
print("clean_subjects: ", clean_subjects)
print("len(clean_subjects_videos_code): ", len(clean_subjects_videos_code))
print("clean_subjects_videos_codes: ", clean_subjects_videos_code)
print(
    "len(clean_subjects_videos_ground_truth_labels): ",
    len(clean_subjects_videos_ground_truth_labels),
)
print(
    "clean_subjects_videos_ground_truth_labels: ",
    clean_subjects_videos_ground_truth_labels,
)
total_len = 0
ambiguous_onset = 0
for index, clean_subject_videos_code in enumerate(clean_subjects_videos_code):
    ground_truth_len = 0
    for i in clean_subjects_videos_ground_truth_labels[index]:
        for j in i:
            ground_truth_len += 1
            if j[0] == 0:
                ambiguous_onset += 1
    print(
        f"{index} {clean_subjects[index]}: {clean_subject_videos_code}, ground truth len: {ground_truth_len}"
    )
    total_len += ground_truth_len
print("total len: ", total_len)
print("ambiguous onest count: ", ambiguous_onset)

len(clean_subjects):  29
clean_subjects:  ['006' '007' '009' '010' '011' '012' '013' '014' '015' '016' '017' '018'
 '019' '020' '021' '022' '023' '024' '025' '026' '028' '030' '031' '032'
 '033' '034' '035' '036' '037']
len(clean_subjects_videos_code):  29
clean_subjects_videos_codes:  [['1', '2', '3', '5'], ['3', '4', '5', '6', '7'], ['2', '3'], ['2', '4'], ['1', '2', '3', '4', '5', '6', '7'], ['3', '7'], ['1', '7'], ['1', '2', '3', '5', '6', '7'], ['5'], ['7'], ['3', '6'], ['1', '3', '5', '7'], ['3', '4', '5'], ['1', '4', '7'], ['7'], ['2', '3', '4', '5'], ['1'], ['2'], ['4', '5', '6'], ['1', '2', '3', '5', '6', '7'], ['4'], ['1', '5'], ['3'], ['3', '4', '6'], ['1', '2'], ['3', '7'], ['1', '4', '5', '6', '7'], ['7'], ['3', '4']]
len(clean_subjects_videos_ground_truth_labels):  29
clean_subjects_videos_ground_truth_labels:  [[[[3561, 3631], [1911, 1987], [323, 402], [3342, 3423], [5159, 5258]], [[79, 173]], [[138, 201], [1286, 1355]], [[1135, 1213], [1829, 1925], [4957, 5050]]], [[[24

## Calculate `k`


In [10]:
k = label_processing.calculate_k(clean_subjects_videos_ground_truth_labels)

k (Half of average length of expression) =  37


## Labeling


In [11]:
if debug_preds is False:
    if labeling_function == "pseudo_labeling":
        labels = labeling.get_pseudo_labels(
            clean_videos_images, clean_subjects_videos_ground_truth_labels, k
        )
    elif labeling_function == "original_labeling":
        labels = labeling.get_original_labels(
            clean_videos_images, clean_subjects_videos_ground_truth_labels, k
        )

Total frames: 432201


## Prepare for LOSO


In [18]:
y, groups = loso_preparing.prepare_for_loso(
    labels,
    clean_subjects,
    clean_videos_images,
    clean_subjects_videos_ground_truth_labels,
    k,
)

Frame Index for each subject:-

subject s15 ( group = 0): 0 -> 18470
subject s15 has 7 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  7

subject s16 ( group = 1): 18470 -> 37392
subject s16 has 7 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  14

subject s19 ( group = 2): 37392 -> 43977
subject s19 has 3 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  17

subject s20 ( group = 3): 43977 -> 46233
subject s20 has 1 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  18

subject s21 ( group = 4): 46233 -> 51966
subject s21 has 2 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  20

subject s22 ( group = 5): 51966 -> 62840
subject s22 has 5 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  25

subject s23 ( group = 6): 62840 -> 71496
subject s23 has 3 clean video(s)
sum clean_subject_videos_ground_truth_labels_len:  28

subject s24 ( group = 7): 71496 -> 87084
subject s24 has 5 clean video

## Training


In [None]:
if debug_preds is False:
    from __utils__.training_dev import train

    preds = train(
        dataset_dir,
        clean_subjects,
        y=y,
        expression_type=expression_type,
        model_name=model_name,
        train_or_not=True,
        epochs=epochs,
        batch_size=batch_size,
    )
else:
    with open(preds_path, "rb") as pkl_file:
        preds = _pickle.load(pkl_file)
        pkl_file.close()

In [None]:
if save_preds is True:
    with open(preds_path, "wb") as pkl_file:
        _pickle.dump(preds, pkl_file)
        pkl_file.close()

## Spotting and Evaluation


In [None]:
metric_fn, result_dict = functions.spot_and_evaluate(
    preds,
    clean_subjects_videos_ground_truth_labels,
    clean_videos_images,
    clean_subjects,
    clean_subjects_videos_code,
    k,
    p=0.60,
    show_plot_or_not=False,
)

## Final Evaluation


In [None]:
functions.final_evaluate(metric_fn, result_dict)

| Parameters | Value | Value |
| --- | --- | --- |
| model | 3D-CNN | SOFTNet |
| epochs | | |
| batch_size | | 48 |
| learning_rate | | 0.0005 |
| True Positive | | 38 |
| False Positive | | 303 |
| False Negative | | 121 |
| Precision | | 0.1114 |
| Recall | | 0.3188 | 0.2309 |
| F1-Score | 0.0466 | 0.1520 |


| Parameters | Value | Value |
| --- | --- | --- |
| model | ViT (generator) | Vit (dev) |
| epochs | 20 | 20 |
| batch_size | 48 | 96 |
| learning_rate | 0.0005 | 0.0005 |
| True Positive | | |
| False Positive | | |
| False Negative | | |
| Precision | | |
| Recall | | |
| F1-Score | | |


| Parameters | Value |
| --- | --- | 
| model | SL-ViT |
| epochs | 20 ( m) |
| batch_size | 96 |
| learning_rate | 0.0005 |
| True Positive |  |
| False Positive |  |
| False Negative |  |
| Precision |  |
| Recall |  |
| F1-Score |  |


| Parameters | Value |
| --- | --- |
| model | SL-Swin-T |
| epochs | 25 ( m) |
| batch_size | 32 |
| learning_rate | 0.0005 |
| True Positive | |
| False Positive | |
| False Negative | |
| Precision | |
| Recall | |
| F1-Score | |


## Ablation Study


In [74]:
ablation_dict = functions.ablation_study_p_dev(
    preds,
    clean_subjects_videos_ground_truth_labels,
    clean_videos_images,
    clean_subjects,
    clean_subjects_videos_code,
    k,
)

 p | TP | FP | FN | Precision | Recall | F1-Score
0.01 | 169 | 2906 | 129 | 0.0550 | 0.5671 | 0.1002 |
0.02 | 164 | 2642 | 134 | 0.0584 | 0.5503 | 0.1057 |
0.03 | 162 | 2417 | 136 | 0.0628 | 0.5436 | 0.1126 |
0.04 | 160 | 2200 | 138 | 0.0678 | 0.5369 | 0.1204 |
0.05 | 160 | 2028 | 138 | 0.0731 | 0.5369 | 0.1287 |
0.06 | 157 | 1876 | 141 | 0.0772 | 0.5268 | 0.1347 |
0.07 | 156 | 1744 | 142 | 0.0821 | 0.5235 | 0.1419 |
0.08 | 154 | 1630 | 144 | 0.0863 | 0.5168 | 0.1479 |
0.09 | 154 | 1523 | 144 | 0.0918 | 0.5168 | 0.1559 |
0.10 | 151 | 1432 | 147 | 0.0954 | 0.5067 | 0.1606 |
0.11 | 151 | 1353 | 147 | 0.1004 | 0.5067 | 0.1676 |
0.12 | 148 | 1288 | 150 | 0.1031 | 0.4966 | 0.1707 |
0.13 | 148 | 1218 | 150 | 0.1083 | 0.4966 | 0.1779 |
0.14 | 148 | 1166 | 150 | 0.1126 | 0.4966 | 0.1836 |
0.15 | 145 | 1113 | 153 | 0.1153 | 0.4866 | 0.1864 |
0.16 | 140 | 1062 | 158 | 0.1165 | 0.4698 | 0.1867 |
0.17 | 140 | 1021 | 158 | 0.1206 | 0.4698 | 0.1919 |
0.18 | 139 | 976 | 159 | 0.1247 | 0.4664 | 0.1967