## Video Selection

Choose DAD videoclips based on Valence and Arousal scores

Created: Nov 18, 2025 <br>
Last modified: Nov 24, 2025

In [1]:
# Imports
import pandas as pd
import itertools

In [74]:
# Load data and round scores for sorting
df = pd.read_csv(r'C:\Users\aruizdargence\Downloads\LIRIS-ACCEDE-annotations\LIRIS-ACCEDE-annotations\annotations/ACCEDEranking.txt', sep='\t')

df["valenceValue_rounded"] = df["valenceValue"].round(2)
df["arousalValue_rounded"] = df["arousalValue"].round(2)

df

Unnamed: 0,id,name,valenceRank,arousalRank,valenceValue,arousalValue,valenceVariance,arousalVariance,valenceValue_rounded,arousalValue_rounded
0,0,ACCEDE00000.mp4,3755,787,2.764340,1.366938,0.118453,0.148906,2.76,1.37
1,1,ACCEDE00001.mp4,6711,6428,3.260093,2.792376,0.104211,0.154526,3.26,2.79
2,2,ACCEDE00002.mp4,5043,3591,3.047528,1.840160,0.109841,0.149310,3.05,1.84
3,3,ACCEDE00003.mp4,7584,5615,3.338461,2.470821,0.100953,0.164621,3.34,2.47
4,4,ACCEDE00004.mp4,5014,4463,3.042496,2.082151,0.110000,0.163321,3.04,2.08
...,...,...,...,...,...,...,...,...,...,...
9795,9795,ACCEDE09795.mp4,8315,3236,3.406446,1.754529,0.101927,0.141201,3.41,1.75
9796,9796,ACCEDE09796.mp4,9784,1775,3.591728,1.480602,0.163026,0.116369,3.59,1.48
9797,9797,ACCEDE09797.mp4,9729,1560,3.583121,1.450951,0.158730,0.118500,3.58,1.45
9798,9798,ACCEDE09798.mp4,9605,100,3.564251,1.321943,0.149619,0.203450,3.56,1.32


In [75]:
# create dictionary with 15 videos for each affect
affects = {
    "highV_highA" : [False, False],
    "highV_lowA" : [False, True],
    "lowV_highA" : [True, False],
    "lowV_lowA" : [True, True]
}

rows = []
minv = df.sort_values(by="valenceValue_rounded", ascending=True)[:800]
maxv = df.sort_values(by="valenceValue_rounded", ascending=False)[:800]

for label, (v_order, a_order) in affects.items():
    if v_order:
        subset = (minv.sort_values(by="arousalValue_rounded", ascending=a_order).head(200)[["name", "valenceValue_rounded", "arousalValue_rounded"]])
        # currently selecting 105 for pair selection but total video list should be 140 (420 videos total) to account for trimming

    else:
        subset = (maxv.sort_values(by="arousalValue_rounded", ascending=a_order).head(200)[["name", "valenceValue_rounded", "arousalValue_rounded"]])
    
    subset["path"] = "d:/optilab/synchrony/data/video"+ subset["name"]
    subset["name"] = subset["name"].str.replace(".mp4", "")
    subset ["affect"] = label

    rows.append(subset)

video_df = pd.concat(rows, ignore_index=True)
video_df

Unnamed: 0,name,valenceValue_rounded,arousalValue_rounded,path,affect
0,ACCEDE06618,3.59,4.49,d:/optilab/synchrony/data/videoACCEDE06618.mp4,highV_highA
1,ACCEDE00847,3.49,4.40,d:/optilab/synchrony/data/videoACCEDE00847.mp4,highV_highA
2,ACCEDE09013,3.52,4.40,d:/optilab/synchrony/data/videoACCEDE09013.mp4,highV_highA
3,ACCEDE07643,3.56,4.40,d:/optilab/synchrony/data/videoACCEDE07643.mp4,highV_highA
4,ACCEDE01212,3.59,4.36,d:/optilab/synchrony/data/videoACCEDE01212.mp4,highV_highA
...,...,...,...,...,...
795,ACCEDE05106,1.43,2.15,d:/optilab/synchrony/data/videoACCEDE05106.mp4,lowV_lowA
796,ACCEDE01926,1.55,2.15,d:/optilab/synchrony/data/videoACCEDE01926.mp4,lowV_lowA
797,ACCEDE04486,1.65,2.16,d:/optilab/synchrony/data/videoACCEDE04486.mp4,lowV_lowA
798,ACCEDE04703,1.56,2.18,d:/optilab/synchrony/data/videoACCEDE04703.mp4,lowV_lowA


In [76]:
# low_v videos for review
lowV_df = video_df[(video_df['affect'] == "lowV_highA") | (video_df['affect'] == "lowV_lowA")]

lowV_df.to_csv(r"C:\Users\aruizdargence\OneDrive - University of California, San Diego Health\Documents\optilab\BrainSync\Data/lowV_paths.csv")

In [78]:
# we will show 700 videos per session (420 pairs), 
# assuming some will be discarded during video selecting 
video_df = video_df[:700]

In [None]:
import random
import pandas as pd

conditions = (
    ["same_name_same_affect"] * 140 +
    ["same_affect_diff_name"] * 140 +
    ["diff_name_diff_affect"] * 140
)
random.shuffle(conditions)

pairs = []
used_v2_names = set()      # track videos already used as v2

video_df = video_df.reset_index(drop=True)

for row, cond in zip(video_df.itertuples(), conditions):

    v1_name = row.path
    v1_affect = row.affect

    # --- SAME NAME, SAME AFFECT --------------------------------------------
    if cond == "same_name_same_affect":
        v2_name = v1_name
        v2_affect = v1_affect

    # --- SAME AFFECT, DIFFERENT NAME ---------------------------------------
    elif cond == "same_affect_diff_name":
        subset = video_df[video_df["affect"] == v1_affect]

        # remove itself
        subset = subset[subset["path"] != v1_name]

        # remove names already used as v2
        subset = subset[~subset["path"].isin(used_v2_names)]

        # sample
        v2_row = subset.sample(1).iloc[0]
        v2_name = v2_row.path
        v2_affect = v2_row.affect

    # --- DIFFERENT AFFECT, DIFFERENT NAME ----------------------------------
    elif cond == "diff_name_diff_affect":
        subset = video_df[video_df["affect"] != v1_affect]

        # remove names already used as v2
        subset = subset[~subset["path"].isin(used_v2_names)]

        v2_row = subset.sample(1).iloc[0]
        v2_name = v2_row.path
        v2_affect = v2_row.affect

    else:
        raise ValueError("Unknown condition")

    # mark v2 as used
    used_v2_names.add(v2_name)

    pairs.append({
        "v1_name": v1_name,
        "v1_affect": v1_affect,
        "v2_name": v2_name,
        "v2_affect": v2_affect,
        "condition": cond
    })

pairs_df = pd.DataFrame(pairs)
pairs_df.shape


(420, 5)

In [127]:
pairs_df

Unnamed: 0,v1_name,v1_affect,v2_name,v2_affect,condition
0,d:/optilab/synchrony/data/videoACCEDE06618.mp4,highV_highA,d:/optilab/synchrony/data/videoACCEDE03981.mp4,highV_highA,same_affect_diff_name
1,d:/optilab/synchrony/data/videoACCEDE00847.mp4,highV_highA,d:/optilab/synchrony/data/videoACCEDE00880.mp4,highV_highA,same_affect_diff_name
2,d:/optilab/synchrony/data/videoACCEDE09013.mp4,highV_highA,d:/optilab/synchrony/data/videoACCEDE07673.mp4,lowV_highA,diff_name_diff_affect
3,d:/optilab/synchrony/data/videoACCEDE07643.mp4,highV_highA,d:/optilab/synchrony/data/videoACCEDE07643.mp4,highV_highA,same_name_same_affect
4,d:/optilab/synchrony/data/videoACCEDE01212.mp4,highV_highA,d:/optilab/synchrony/data/videoACCEDE07080.mp4,highV_highA,same_affect_diff_name
...,...,...,...,...,...
415,d:/optilab/synchrony/data/videoACCEDE02508.mp4,lowV_highA,d:/optilab/synchrony/data/videoACCEDE07756.mp4,lowV_highA,same_affect_diff_name
416,d:/optilab/synchrony/data/videoACCEDE07165.mp4,lowV_highA,d:/optilab/synchrony/data/videoACCEDE07165.mp4,lowV_highA,same_name_same_affect
417,d:/optilab/synchrony/data/videoACCEDE02554.mp4,lowV_highA,d:/optilab/synchrony/data/videoACCEDE02554.mp4,lowV_highA,same_name_same_affect
418,d:/optilab/synchrony/data/videoACCEDE02450.mp4,lowV_highA,d:/optilab/synchrony/data/videoACCEDE01179.mp4,highV_lowA,diff_name_diff_affect


In [128]:
# save into csv
pairs_df.to_csv(r"C:\Users\aruizdargence\OneDrive - University of California, San Diego Health\Documents\optilab\BrainSync\Data/paired_videos.csv")