In [1]:
#Imports 
import pandas as pd
import numpy as np

In [2]:
#Paths
file_official="/Users/christinasupino/Desktop/DIVES/divingdata.csv"
df_official=pd.read_csv(file_official)

In [4]:
#Load PCC-created in "ClipAndFrameCreation" notebook
df_pcc = pd.read_csv("/Users/christinasupino/Desktop/DIVES/DIVE_KEYPOINTS.csv")

#Extract LastName and DiveName from video filename
df_pcc["LastName"] = df_pcc["Video"].str.split("_").str[0].str.upper()
df_pcc["DiveName"] = (
    df_pcc["Video"].str.split("_").str[1]
    .str.replace(".mp4", "")
    .str.replace("DIVE", "")
    .str.upper()
)

#Filter to top 5 divers and top 5 dives
top_divers = ["Fung", "Brown", "Monroy Manriquez", "Hubert", "Palkhivala"]
top_dives = ["301B", "201B", "103B", "403B", "105B"]

#Standardize official data
df_official["LastName"] = df_official["LastName"].str.strip()
df_official["DiveName"] = df_official["DiveName"].str.strip().str.upper()

df_official_top = df_official[
    df_official["LastName"].isin(top_divers)&
    df_official["DiveName"].isin(top_dives)
].copy()


#Map PCC names to match official data
name_map = {
    "FUNG": "Fung",
    "BROWN": "Brown",
    "MONROY": "Monroy Manriquez",
    "HUBERT": "Hubert",
    "PALKHIVALA": "Palkhivala"
}
df_pcc["LastName"] = df_pcc["LastName"].map(name_map)


#Check that everything matches
print(set(df_pcc["LastName"]) == set(df_official_top["LastName"]))
print(set(df_pcc["DiveName"]) == set(df_official_top["DiveName"]))


True
True


In [8]:
#Load dive keypoints and phases
df_keypoints = pd.read_csv("DIVE_KEYPOINTS.csv")  
df_phases = pd.read_csv("DIVE_PHASES.csv")        
        

#Smoothness metric: captures continuity of movement by measuring jerk
#Lower jerk = smoother motion
def calculate_smoothness(coords):
    vel = np.diff(coords, axis=0)
    acc = np.diff(vel, axis=0)
    jerk = np.diff(acc, axis=0)
    mean_jerk = np.mean(np.abs(jerk)) if len(jerk) > 0 else 0
    return 1 / (1 + mean_jerk)

#Phase weights
weights = {'takeoff': 0.3, 'flight': 0.4, 'entry': 0.3}

#Store all raw metric values for normalization
raw_scores = []
all_std, all_smooth, all_entry_angle, all_symmetry, all_stability = [], [], [], [], []

#Loop through each video and extract movement metrics separately for each phase
for _, row in df_phases.iterrows():
    video = row['Video']
    last_name = video.split("_")[0].upper()
    dive_name = video.split("_")[1].replace(".mp4","").replace("DIVE","").upper()
    last_name = name_map.get(last_name, last_name)

    df_vid = df_keypoints[df_keypoints['Video'] == video]
    keypoint_cols = [c for c in df_vid.columns if "_x" in c or "_y" in c or "_z" in c]

    phase_scores = {}

    #Extract the frame ranges corresponding to each phase
    phase_frames = {
        'takeoff': (row['StartFrame'], row['TakeoffFrame']),
        'flight': (row['TakeoffFrame'], row['EntryFrame']),
        'entry': (row['EntryFrame'], row['LastFrame'])
    }

    #Loop through phases to calculate each metric
    for phase, (start_f, end_f) in phase_frames.items():
        df_phase = df_vid[(df_vid['Frame'] >= start_f) & (df_vid['Frame'] <= end_f)]
        coords = df_phase[keypoint_cols].to_numpy()

        #Standrd deviation & smoothness
        std_score = np.std(coords) if len(coords) > 1 else 0
        smooth_score = calculate_smoothness(coords) if len(coords) > 1 else 0

       #Entry angle: measures alignment at water entry
        if phase == "entry" and len(df_phase) > 2:
            sh_x = df_phase[['11_x','12_x']].mean(axis=1).mean()
            sh_y = df_phase[['11_y','12_y']].mean(axis=1).mean()
            an_x = df_phase[['27_x','28_x']].mean(axis=1).mean()
            an_y = df_phase[['27_y','28_y']].mean(axis=1).mean()
            dx = an_x - sh_x
            dy = an_y - sh_y
            angle = np.degrees(np.arctan2(dy, dx))
            entry_angle_score = abs(90 - abs(angle)) #Closer to 0 = better
        else:
            entry_angle_score = None

        #Symmetry: compares left vs right arm movement
        if len(df_phase) > 2:
            left_arm = df_phase[['11_x','11_y']].to_numpy()
            right_arm = df_phase[['12_x','12_y']].to_numpy()
            symmetry_score = np.mean(np.abs(left_arm - right_arm))
        else:
            symmetry_score = None

        # Stability: measures positional consistency during the flight phase
        if phase == "flight" and len(df_phase) > 2:
            stability_score = np.mean(np.std(df_phase[keypoint_cols], axis=0))
        else:
            stability_score = None

        #Store all metrics for specific phase
        phase_scores[phase] = {
            'std': std_score,
            'smooth': smooth_score,
            'entry_angle': entry_angle_score,
            'symmetry': symmetry_score,
            'stability': stability_score
        }

        #Collect for normalization
        if std_score is not None: all_std.append(std_score)
        if smooth_score is not None: all_smooth.append(smooth_score)
        if entry_angle_score is not None: all_entry_angle.append(entry_angle_score)
        if symmetry_score is not None: all_symmetry.append(symmetry_score)
        if stability_score is not None: all_stability.append(stability_score)

    #Store all metrics
    raw_scores.append({
        'LastName': last_name,
        'DiveName': dive_name,
        'PhaseScores': phase_scores
    })

#Normalization helper: rescales values to 0â€“1 so each metric contributes equally
def normalize(arr):
    arr = np.array(arr)
    return (arr - arr.min()) / (arr.max() - arr.min() + 1e-8)

std_norm = normalize(all_std)
smooth_norm = normalize(all_smooth)

#Combine phase-level metrics into one PCC score per dive: mirroring judging criteria
i_std = i_smooth= 0
pcc_results = []

for entry in raw_scores:
    last_name = entry['LastName']
    dive_name = entry['DiveName']
    phase_scores = entry['PhaseScores']

    pcc_total = 0

    for phase in ['takeoff','flight','entry']:
        std_component = 1 - std_norm[i_std]
        smooth_component = smooth_norm[i_smooth]

        # Combine the two
        phase_score = 0.5 * std_component + 0.5 * smooth_component
        pcc_total += weights[phase] * phase_score

        i_std += 1
        i_smooth += 1

    #Multiply by difficulty
    difficulty = df_official_top.loc[
        (df_official_top['LastName']==last_name) & 
        (df_official_top['DiveName']==dive_name),
        'Difficulty'
    ].values[0]

    pcc_weighted = pcc_total * difficulty
    pcc_results.append({'LastName': last_name, 'DiveName': dive_name, 'PCC': pcc_weighted})

#Final rescaling 1-10
df_pcc_scores = pd.DataFrame(pcc_results)
vals = df_pcc_scores['PCC'].values
df_pcc_scores['PCC'] = 1 + 9 * (vals - vals.min()) / (vals.max() - vals.min())

print(df_pcc_scores)

            LastName DiveName        PCC
0              Brown     103B   3.139802
1              Brown     105B   9.233253
2              Brown     201B   1.502711
3              Brown     301B   2.176396
4              Brown     403B   8.559236
5               Fung     103B   2.077172
6               Fung     105B   7.917267
7               Fung     201B   2.023013
8               Fung     301B   2.484125
9               Fung     403B   7.982304
10            Hubert     103B   2.103395
11            Hubert     105B   9.562879
12            Hubert     301B   2.903119
13            Hubert     403B   8.000479
14  Monroy Manriquez     103B   2.235449
15  Monroy Manriquez     105B   8.528987
16  Monroy Manriquez     201B   1.276812
17  Monroy Manriquez     301B   2.851265
18  Monroy Manriquez     403B   8.306766
19        Palkhivala     103B   3.131635
20        Palkhivala     105B  10.000000
21        Palkhivala     201B   1.000000
22        Palkhivala     301B   2.877607
23        Palkhi

In [9]:
#Calculate OfficialPoints per dive by FINA process
judge_cols = [f"Judge{i}" for i in range(1, 8)]

def calculate_official_points(row):
    scores = np.array(row[judge_cols])
    scores_sorted = np.sort(scores)
    valid_scores = scores_sorted[2:-2]  #Discard 2 lowest and 2 highest
    return valid_scores.sum() * row["Difficulty"]
    
df_official_top["OfficialPoints"]=df_official_top.apply(calculate_official_points,axis=1)


#Rescale Official Points 1-10
official_values = df_official_top["OfficialPoints"].values
official_min, official_max = official_values.min(), official_values.max()

df_official_top["ScaledPoints"] = 1 + 9 * (df_official_top["OfficialPoints"] - official_min) / (official_max - official_min)

In [10]:
#Merge PCC with official data
df_merged_scaled = pd.merge(
    df_official_top,
    df_pcc_scores,
    on=['LastName', 'DiveName']
)
df_merged_scaled.to_csv("df_merged_scaled.csv", index=False)

#Calculate overall correlation
overall_corr = df_merged_scaled['PCC'].corr(df_merged_scaled['ScaledPoints'])

#Calculate individual diver correlations
diver_corrs = {
    diver: df_merged_scaled[df_merged_scaled['LastName']==diver]['PCC'].corr(
        df_merged_scaled[df_merged_scaled['LastName']==diver]['ScaledPoints']
    )
    for diver in top_divers
}

#Print results
print("Overall correlation (top 5 divers & dives):", overall_corr)
print("Diver correlation:")
for diver, corr in diver_corrs.items():
    print(f"{diver}: {corr:.3f}")

Overall correlation (top 5 divers & dives): 0.8990423763132261
Diver correlation:
Fung: 0.969
Brown: 0.948
Monroy Manriquez: 0.982
Hubert: 0.723
Palkhivala: 0.899
