In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

In [17]:
# Paths
annotated_frames_dir = "/scratch2/bsow/Documents/ACSR/data/training_videos/annotated_frames"
extracted_features_dir = "/scratch2/bsow/Documents/ACSR/output/extracted_features"

In [18]:
def load_annotated_frames(video_name):
    """Load annotated frames for a specific video."""
    annotated_path = os.path.join(annotated_frames_dir, f"{video_name}.csv")
    annotated_frames = pd.read_csv(annotated_path)
    
    # Rename 'frame' column to 'frame_number' to match features
    if 'frame' in annotated_frames.columns:
        annotated_frames.rename(columns={'frame': 'frame_number'}, inplace=True)
    
    return annotated_frames

def load_extracted_features(video_name):
    """Load extracted features for a specific video."""
    features_path = os.path.join(extracted_features_dir, f"{video_name}_features.csv")
    return pd.read_csv(features_path)

def filter_features_for_annotated_frames(annotated_frames, features):
    """
    Merge annotated frames with extracted features based on frame_number.
    """
    # Merge on the 'frame_number' column
    merged_df = pd.merge(
        features,  # Extracted features
        annotated_frames[["frame_number", "shape", "position"]],  # Annotations
        on="frame_number",  # Merge key
        how="inner"  # Keep only rows with matching frame_number
    )
    return merged_df

In [19]:
# List of video names
video_names = [f"sent_{i:02d}" for i in range(1, 21)]

# Load and combine data for the first 14 videos (training/validation)
train_val_data = []
for video_name in video_names[:19]:
    annotated_frames = load_annotated_frames(video_name)
    features = load_extracted_features(video_name)
    filtered_features = filter_features_for_annotated_frames(annotated_frames, features)
    train_val_data.append(filtered_features)

train_val_df = pd.concat(train_val_data, ignore_index=True)
train_val_df.dropna(inplace=True)

# Load data for the 2 last videos for testing
test_data = []
for video_name in video_names[19:]:
    annotated_frames = load_annotated_frames(video_name)
    features = load_extracted_features(video_name)
    filtered_features = filter_features_for_annotated_frames(annotated_frames, features)
    test_data.append(filtered_features)

test_df = pd.concat(test_data, ignore_index=True)
test_df.dropna(inplace=True)

In [20]:
# Use all features except the first two columns (fn_video and frame_number)
feature_columns = train_val_df.columns[2:-2]  # Skip the first two columns

# Split into features and labels
X = train_val_df[feature_columns]  # All features
y_shape = train_val_df["shape"]  # Shape labels
y_position = train_val_df["position"]  # Position labels

# Split into training and validation sets
X_train, X_val, y_shape_train, y_shape_val = train_test_split(X, y_shape, test_size=0.2, random_state=42)
_, _, y_position_train, y_position_val = train_test_split(X, y_position, test_size=0.2, random_state=42)

In [21]:
# Train shape recognition model
shape_model = RandomForestClassifier(random_state=42)
shape_model.fit(X_train, y_shape_train)

# Train position recognition model
position_model = RandomForestClassifier(random_state=42)
position_model.fit(X_train, y_position_train)

In [22]:
# Evaluate shape model
y_shape_pred = shape_model.predict(X_val)
print("Shape Model Evaluation:")
print(classification_report(y_shape_val, y_shape_pred))
print(f"Accuracy: {accuracy_score(y_shape_val, y_shape_pred):.2f}")

# Evaluate position model
y_position_pred = position_model.predict(X_val)
print("Position Model Evaluation:")
print(classification_report(y_position_val, y_position_pred))
print(f"Accuracy: {accuracy_score(y_position_val, y_position_pred):.2f}")

Shape Model Evaluation:
              precision    recall  f1-score   support

           1       0.86      0.86      0.86         7
           2       0.67      1.00      0.80         6
           3       0.87      0.91      0.89        22
           4       0.86      0.55      0.67        11
           5       0.79      1.00      0.88        11
           6       0.75      0.60      0.67         5
           7       0.00      0.00      0.00         1
           8       0.60      0.50      0.55         6

    accuracy                           0.80        69
   macro avg       0.67      0.68      0.66        69
weighted avg       0.79      0.80      0.78        69

Accuracy: 0.80
Position Model Evaluation:
              precision    recall  f1-score   support

         1.0       0.86      1.00      0.92        30
         2.0       1.00      1.00      1.00         5
         3.0       0.94      0.80      0.86        20
         4.0       1.00      0.80      0.89         5
         5.0

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# Prepare test data
X_test = test_df[feature_columns]  # All features (excluding shape and position)
y_shape_test = test_df["shape"]  # Shape labels
y_position_test = test_df["position"]  # Position labels

# Test shape model
y_shape_test_pred = shape_model.predict(X_test)
print("Shape Model Test Results:")
print(classification_report(y_shape_test, y_shape_test_pred))
print(f"Accuracy: {accuracy_score(y_shape_test, y_shape_test_pred):.2f}")

# Test position model
y_position_test_pred = position_model.predict(X_test)
print("Position Model Test Results:")
print(classification_report(y_position_test, y_position_test_pred))
print(f"Accuracy: {accuracy_score(y_position_test, y_position_test_pred):.2f}")

Shape Model Test Results:
              precision    recall  f1-score   support

           1       0.50      1.00      0.67         1
           2       1.00      1.00      1.00         1
           3       0.83      1.00      0.91         5
           4       0.33      0.33      0.33         3
           5       1.00      0.50      0.67         4
           6       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         0

    accuracy                           0.67        15
   macro avg       0.52      0.55      0.51        15
weighted avg       0.71      0.67      0.66        15

Accuracy: 0.67
Position Model Test Results:
              precision    recall  f1-score   support

           1       0.62      0.83      0.71         6
           3       0.75      0.50      0.60         6
           4       1.00      0.50      0.67         2
           5       0.50      1.00      0.67         1

    accuracy                           0.67        15
   macr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [10]:
import pickle

# Save shape model with feature names
with open(r"/scratch2/bsow/Documents/ACSR/output/saved_models/model_rf_shape.pkl", "wb") as f:
    pickle.dump((shape_model, feature_columns), f)

# Save position model with feature names
with open(r"/scratch2/bsow/Documents/ACSR/output/saved_models/model_rf_position.pkl", "wb") as f:
    pickle.dump((position_model, feature_columns), f)

In [45]:
import pickle

def inspect_model_file(filename):
    with open(filename, 'rb') as f:
        contents = pickle.load(f)
        print(f"Contents of {filename}:")
        print(contents)
        print(f"Type of contents: {type(contents)}")
        if isinstance(contents, (list, tuple)):
            print(f"Number of items: {len(contents)}")
            for i, item in enumerate(contents):
                print(f"Item {i}: {item} (Type: {type(item)})")

# Example usage
inspect_model_file("/scratch2/bsow/Documents/ACSR/output/saved_models/model_rf_position.pkl")

Contents of /scratch2/bsow/Documents/ACSR/output/saved_models/model_rf_position.pkl:
RandomForestClassifier(random_state=42)
Type of contents: <class 'sklearn.ensemble._forest.RandomForestClassifier'>


In [58]:
features_dir = r"C:\Users\bouba\OneDrive\Documents\ACSR\ACSR\output\extracted_features_mp4"
annotations_dir = r"C:\Users\bouba\OneDrive\Documents\ACSR\ACSR\output\annotations"

In [66]:
df = pd.read_csv(r"C:\Users\bouba\OneDrive\Documents\ACSR\ACSR\output\extracted_features_mp4\csf001_features.csv")
df.dropna(how='all', inplace=True, subset=df.columns[-25:])
df.head()

Unnamed: 0,fn_video,frame_number,distance_face130_r_hand8,tan_angle_face130_r_hand8,distance_face152_r_hand8,tan_angle_face152_r_hand8,distance_face94_r_hand8,tan_angle_face94_r_hand8,distance_face130_r_hand9,tan_angle_face130_r_hand9,...,acceleration_x_r_hand8,acceleration_y_r_hand8,velocity_x_r_hand9,velocity_y_r_hand9,acceleration_x_r_hand9,acceleration_y_r_hand9,velocity_x_r_hand12,velocity_y_r_hand12,acceleration_x_r_hand12,acceleration_y_r_hand12
21,csf001.mp4,22,4.544161,0.215778,2.99359,0.198311,3.952022,0.148803,5.012093,0.068935,...,,,,,,,,,,
22,csf001.mp4,23,4.432777,0.22631,2.886811,0.213178,3.815771,0.157548,5.075631,0.046591,...,,,-0.016066,0.009251,,,0.009077,-0.019512,,
23,csf001.mp4,24,4.395037,0.227604,2.859063,0.215205,3.769736,0.158046,5.071182,0.037993,...,-0.00309,0.012441,-0.006354,-0.00157,0.009712,-0.010821,0.000696,-0.000249,-0.008381,0.019263
24,csf001.mp4,25,2.349447,0.122057,0.855116,-0.115871,1.760024,-0.052596,3.51449,-0.05268,...,-0.099307,-0.273851,-0.055388,-0.227089,-0.049035,-0.225518,-0.099749,-0.339577,-0.100445,-0.339328
25,csf001.mp4,26,2.048286,0.07755,0.628814,-0.449189,1.485955,-0.150858,3.263714,-0.084589,...,0.082108,0.241587,-0.013242,-0.036473,0.042146,0.190616,-0.021206,-0.033463,0.078543,0.306113


In [226]:
def process_files(features_path, annotations_path):
    # Load the feature file
    features_df = pd.read_csv(features_path)
    
    # Load the annotation file
    # Use header=0 to indicate that the first row is the header
    annotations_df = pd.read_csv(annotations_path, header=0, names=['frame', 'shape', 'position'])
    
    # Convert the 'frame' column to integers
    annotations_df['frame'] = pd.to_numeric(annotations_df['frame'], errors='coerce')
    
    # Drop rows with invalid frame numbers (e.g., NaN after conversion)
    # annotations_df.dropna(subset=['frame'], inplace=True)
    # annotations_df['frame'] = annotations_df['frame'].astype(int)
    
    # Strip whitespace from 'shape' and 'position' columns
    annotations_df['shape'] = annotations_df['shape'].astype(str).str.strip()
    annotations_df['position'] = annotations_df['position'].astype(str).str.strip()
    
    # Replace '_' with NaN in the 'shape' and 'position' columns
    annotations_df['shape'] = annotations_df['shape'].replace('_', np.nan)
    annotations_df['position'] = annotations_df['position'].replace('_', np.nan)
    
    # Find the first and last non-empty frame in the features DataFrame
    # Drop rows where all feature columns are NaN
    non_empty_features_df = features_df.dropna(how='all', subset=features_df.columns[2:])
    first_non_empty_frame = non_empty_features_df['frame_number'].min()
    last_non_empty_frame = non_empty_features_df['frame_number'].max()
    
    # Adjust the first and last frame in the annotations to match the first and last non-empty frame in the features
    if annotations_df.iloc[0]['frame'] != first_non_empty_frame:
        print(f"Adjusting first annotation frame from {annotations_df.iloc[0]['frame']} to {first_non_empty_frame}")
        annotations_df.iloc[0, annotations_df.columns.get_loc('frame')] = first_non_empty_frame
    if annotations_df.iloc[-1]['frame'] != last_non_empty_frame:
        print(f"Adjusting last annotation frame from {annotations_df.iloc[-1]['frame']} to {last_non_empty_frame}")
        annotations_df.iloc[-1, annotations_df.columns.get_loc('frame')] = last_non_empty_frame
    
    # Add shape and position columns to the features DataFrame
    # Explicitly cast to object (string) type to avoid dtype warnings
    features_df['shape'] = None
    features_df['shape'] = features_df['shape'].astype(object)
    features_df['position'] = None
    features_df['position'] = features_df['position'].astype(object)
    
    # Set shape and position to NaN for frames before the first annotation frame
    first_annotation_frame = annotations_df.iloc[0]['frame']
    features_df.loc[features_df['frame_number'] < first_annotation_frame, 'shape'] = np.nan
    features_df.loc[features_df['frame_number'] < first_annotation_frame, 'position'] = np.nan
    
    # Iterate through the annotation rows and fill in the shape and position
    for i in range(len(annotations_df) - 1):
        start_frame = annotations_df.iloc[i]['frame']
        end_frame = annotations_df.iloc[i + 1]['frame']
        shape = annotations_df.iloc[i]['shape']
        position = annotations_df.iloc[i]['position']
        
        # Print the values being assigned for debugging
        print(f"Assigning shape={shape}, position={position} for frames {start_frame} to {end_frame - 1}")
        
        # Fill in the shape and position for the range of frames
        features_df.loc[(features_df['frame_number'] >= start_frame) & 
                        (features_df['frame_number'] < end_frame), 'shape'] = shape
        features_df.loc[(features_df['frame_number'] >= start_frame) & 
                        (features_df['frame_number'] < end_frame), 'position'] = position
    
    # Handle the last row of annotations
    last_annotation_frame = annotations_df.iloc[-1]['frame']
    shape = annotations_df.iloc[-1]['shape']
    position = annotations_df.iloc[-1]['position']
    
    # Print the values being assigned for debugging
    print(f"Assigning shape={shape}, position={position} for frames {last_annotation_frame} to end")
    
    # Fill in the shape and position for the last range
    features_df.loc[features_df['frame_number'] >= last_annotation_frame, 'shape'] = shape
    features_df.loc[features_df['frame_number'] >= last_annotation_frame, 'position'] = position
    
    return features_df

In [239]:
# Initialize an empty list to store processed DataFrames
processed_dfs = []

# Iterate through all feature files
for feature_file in os.listdir(features_dir):
    if feature_file.endswith('.csv'):
        # Construct the full path to the feature file
        features_path = os.path.join(features_dir, feature_file)
        
        # Construct the corresponding annotation file path
        annotation_file = feature_file.replace('_features.csv', '_annotations.csv')
        annotations_path = os.path.join(annotations_dir, annotation_file)
        
        # Check if the annotation file exists
        if os.path.exists(annotations_path):
            # Process the files
            processed_df = process_files(features_path, annotations_path)
            processed_dfs.append(processed_df)
        else:
            print(f"Annotation file not found for {feature_file}")

# Concatenate all processed DataFrames
training_df = pd.concat(processed_dfs[:-1], ignore_index=True)
testing_df = processed_dfs[-1]

#training_df = pd.concat(processed_dfs[:3] + processed_dfs[4:], ignore_index=True)
#testing_df = processed_dfs[3]

Adjusting first annotation frame from 25 to 22
Adjusting last annotation frame from 133 to 131
Assigning shape=5, position=1 for frames 22 to 33
Assigning shape=6, position=1 for frames 34 to 40
Assigning shape=6, position=2 for frames 41 to 46
Assigning shape=5, position=3 for frames 47 to 54
Assigning shape=0, position=0 for frames 55 to 56
Assigning shape=2, position=1 for frames 57 to 65
Assigning shape=2, position=4 for frames 66 to 74
Assigning shape=3, position=4 for frames 75 to 85
Assigning shape=3, position=3 for frames 86 to 123
Assigning shape=3, position=0 for frames 124 to 126
Assigning shape=0, position=0 for frames 127 to 130
Assigning shape=nan, position=nan for frames 131 to end
Adjusting first annotation frame from 29 to 27
Adjusting last annotation frame from 373 to 372
Assigning shape=5, position=0 for frames 27 to 35
Assigning shape=5, position=3 for frames 36 to 45
Assigning shape=6, position=1 for frames 46 to 53
Assigning shape=0, position=0 for frames 54 to 61

In [240]:
from sklearn.model_selection import train_test_split

# Remove rows with missing values or NaNs
training_df.dropna(inplace=True)
testing_df.dropna(inplace=True)

# Prepare features and targets
X_train = training_df.drop(columns=['fn_video', 'frame_number', 'shape', 'position'])
y_shape = training_df['shape']
y_position = training_df['position']

X_test = testing_df.drop(columns=['fn_video', 'frame_number', 'shape', 'position'])
y_shape_test = testing_df['shape']
y_position_test = testing_df['position']

# Perform train-validation split (only once)
X_train, X_val, y_shape_train, y_shape_val, y_position_train, y_position_val = train_test_split(
    X_train, y_shape, y_position, test_size=0.2, random_state=42
)

print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("y_shape_train shape:", y_shape_train.shape)
print("y_shape_val shape:", y_shape_val.shape)
print("y_position_train shape:", y_position_train.shape)
print("y_position_val shape:", y_position_val.shape)

X_train shape: (1432, 28)
X_val shape: (359, 28)
y_shape_train shape: (1432,)
y_shape_val shape: (359,)
y_position_train shape: (1432,)
y_position_val shape: (359,)


In [241]:
from sklearn.ensemble import RandomForestClassifier
# import LogisticRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

# Train a model for shape
shape_model = RandomForestClassifier(random_state=42)
shape_model.fit(X_train, y_shape_train)

# Evaluate the shape model
y_shape_pred = shape_model.predict(X_val)
print("Shape Model Classification Report:")
print(classification_report(y_shape_val, y_shape_pred))

# Train a model for position
position_model = RandomForestClassifier(random_state=42)
position_model.fit(X_train, y_position_train)

# Evaluate the position model
y_position_pred = position_model.predict(X_val)
print("Position Model Classification Report:")
print(classification_report(y_position_val, y_position_pred))

Shape Model Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.88      0.86        43
           1       0.98      1.00      0.99        46
           2       0.96      0.96      0.96        27
           3       0.98      0.95      0.96        85
           4       0.96      0.96      0.96        26
           5       0.98      1.00      0.99        46
           6       0.98      0.93      0.96        59
           7       0.94      1.00      0.97        15
           8       1.00      1.00      1.00        12

    accuracy                           0.96       359
   macro avg       0.96      0.97      0.96       359
weighted avg       0.96      0.96      0.96       359

Position Model Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.91      0.91        81
           1       0.91      0.92      0.91        73
           2       0.89      1.00      0.94        25
     

In [242]:
from sklearn.metrics import classification_report

# Predict on the test set
y_shape_pred = shape_model.predict(X_test)
y_position_pred = position_model.predict(X_test)

# Print classification reports with zero_division=0 (default behavior)
print("Shape Model Test Classification Report:")
print(classification_report(y_shape_test, y_shape_pred, zero_division=0))

print("Position Model Test Classification Report:")
print(classification_report(y_position_test, y_position_pred, zero_division=0))

Shape Model Test Classification Report:
              precision    recall  f1-score   support

           0       0.31      0.53      0.39        43
           1       0.06      0.55      0.11        11
           2       0.88      0.59      0.71        39
           3       0.82      0.46      0.59        80
           4       0.00      0.00      0.00         0
           5       0.84      0.94      0.89        33
           6       0.67      0.35      0.46        40
           8       0.00      0.00      0.00        61

    accuracy                           0.44       307
   macro avg       0.45      0.43      0.39       307
weighted avg       0.55      0.44      0.46       307

Position Model Test Classification Report:
              precision    recall  f1-score   support

           0       0.28      0.65      0.39        71
           1       0.81      0.44      0.57        78
           3       0.34      1.00      0.51        11
           4       0.85      0.54      0.66      

In [None]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jun 24 11:37:05 2022

"""
import csv
import logging
import os
import pickle
import sys

import cv2  # Import opencv
import matplotlib.pyplot as plt
import mediapipe as mp  # Import mediapipe
import numpy as np
import pandas as pd
import textgrids
from scipy.signal import argrelextrema, savgol_filter
from sklearn.preprocessing import minmax_scale
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm


def load_model(filename):
    with open(filename, 'rb') as f:
        model, feature_names = pickle.load(f)
    return model, feature_names


def load_video(path2file):
    cap = cv2.VideoCapture(path2file)
    cap.set(3,640) # camera width
    cap.set(4,480) # camera height
    return cap


def extract_class_from_fn(fn):
    '''
    get class number from filename, e.g.,
    '4' from 'position_04.mp4'
    '''
    if fn is not None:
        st = fn.find('_') + 1
        ed = fn.find('.')
        c = fn[st:ed]#.split('_')[0]
        return int(c)
    else:
        return None


def get_distance(df_name, landmark1, landmark2, norm_factor=None):
    '''


    Parameters
    ----------
    df_name : TYPE
        DESCRIPTION.
    landmark1 : STR
        name of first landmark (e.g., hand20)
    landmark2 : STR
        name of second landmark (e.g., face234)

    Returns
    -------
    series for dataframe
    The distance between landmark1 and landmark2

    '''

    x1 = df_name[f'x_{landmark1}']
    x2 = df_name[f'x_{landmark2}']
    y1 = df_name[f'y_{landmark1}']
    y2 = df_name[f'y_{landmark2}']
    z1 = df_name[f'z_{landmark1}']
    z2 = df_name[f'z_{landmark2}']
    d = np.sqrt((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2)

    # NORMALIZE
    if norm_factor is not None:
        d /= norm_factor

    return  d

def get_delta_dim(df_name, landmark1, landmark2, dim, norm_factor=None):
    delta = df_name[f'{dim}_{landmark1}'] - df_name[f'{dim}_{landmark2}']
    # NORMALIZE
    if norm_factor is not None:
        delta /= norm_factor
    return  delta


def get_frames_around_event(fn_video, frame_number, n_neighbor_frames):
    st = frame_number - n_neighbor_frames
    ed = frame_number + n_neighbor_frames + 1
    frame_numbers = range(st, ed)

    extracted_frames = []
    cap = cv2.VideoCapture(fn_video)

    for frame_number in frame_numbers:
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
        ret, frame = cap.read()
        if ret:
            extracted_frames.append(frame)
    cap.release()
        
    return extracted_frames


def create_video_from_frames(fn_video, extracted_frames):
    out = None
    if extracted_frames:
        height, width, _ = extracted_frames[0].shape
        fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # Codec for MP4 video
        out = cv2.VideoWriter(fn_video, fourcc, 30.0, (width, height))
        for frame in extracted_frames:
            out.write(frame)
        out.release()
    
    return out


def extract_coordinates(cap, fn_video, show_video=False, verbose=True):

    if verbose:
        print(f"Extracting coordinates for: {fn_video}")
    mp_drawing = mp.solutions.drawing_utils  # Drawing helpers
    mp_holistic = mp.solutions.holistic  # Mediapipe Solutions

    columns = ["fn_video", "frame_number"]
    num_coords_face = 468
    num_coords_hand = 21

    # generate columns names
    for val in range(0, num_coords_face):
        columns += [
            "x_face{}".format(val),
            "y_face{}".format(val),
            "z_face{}".format(val),
            "v_face{}".format(val),
        ]

    for val in range(0, num_coords_hand):
        columns += [
            "x_r_hand{}".format(val),
            "y_r_hand{}".format(val),
            "z_r_hand{}".format(val),
            "v_r_hand{}".format(val),
        ]

    df_coords = pd.DataFrame(columns=columns)

    n_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if verbose:
        print(f"Number of frames in video: {n_frames}")
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        print(f"Frames per second: {fps}")
        video_length = n_frames / fps
        print(f"Video length: {video_length} seconds")
    pbar = tqdm(total=n_frames)

    # Initiate holistic model
    i_frame = 0
    with mp_holistic.Holistic(
        min_detection_confidence=0.5, min_tracking_confidence=0.5
    ) as holistic:

        while cap.isOpened():
            ret, frame = cap.read()
            i_frame += 1

            if not ret:
                break
            # Recolor Feed
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            results = holistic.process(
                image
            )

            # Recolor image back to BGR for rendering
            image.flags.writeable = True
            image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)

            # 4. Pose Detections
            if show_video:
                # Draw face landmarks
                mp_drawing.draw_landmarks(
                    image,
                    results.face_landmarks,
                    mp_holistic.FACEMESH_TESSELATION,
                    mp_drawing.DrawingSpec(
                        color=(80, 110, 10), thickness=1, circle_radius=1
                    ),
                    mp_drawing.DrawingSpec(
                        color=(80, 256, 121), thickness=1, circle_radius=1
                    ),
                )

                # Right hand landmarks
                mp_drawing.draw_landmarks(
                    image,
                    results.right_hand_landmarks,
                    mp_holistic.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(
                        color=(80, 22, 10), thickness=2, circle_radius=4
                    ),
                    mp_drawing.DrawingSpec(
                        color=(80, 44, 121), thickness=2, circle_radius=2
                    ),
                )
                # Pose landmarks
                mp_drawing.draw_landmarks(
                    image,
                    results.pose_landmarks,
                    mp_holistic.POSE_CONNECTIONS,
                    mp_drawing.DrawingSpec(
                        color=(245, 117, 66), thickness=2, circle_radius=4
                    ),
                    mp_drawing.DrawingSpec(
                        color=(245, 66, 230), thickness=2, circle_radius=2
                    ),
                )
                cv2.imshow("cued_estimated", image)

            # Export coordinates
            if results.face_landmarks is not None:
                face = results.face_landmarks.landmark
                face_row = list(
                    np.array(
                        [
                            [
                                landmark.x, landmark.y, landmark.z,
                                landmark.visibility
                            ]
                            for landmark in face
                        ]
                    ).flatten()
                )

            else:
                face_row = [None] * 4
            # Extract right hand landmarks
            if results.right_hand_landmarks is not None:
                r_hand = results.right_hand_landmarks.landmark
                r_hand_row = list(
                    np.array(
                        [
                            [
                                landmark.x, landmark.y, landmark.z,
                                landmark.visibility
                            ]
                            for landmark in r_hand
                        ]
                    ).flatten()
                )
            else:
                r_hand_row = [None] * 4

            # Create the row that will be written in the file
            row = [fn_video, i_frame] + face_row + r_hand_row
            curr_df = pd.DataFrame(dict(zip(columns, row)), index=[0])
            # print(i_frame, curr_df)
            df_coords = pd.concat([df_coords, curr_df], ignore_index=True)

            if cv2.waitKey(10) & 0xFF == ord("q"):
                break
                print("WARNING!" * 5)
                print('break due to cv2.waitKey(10) & 0xFF == ord("q"')
            pbar.update(1)

    cap.release()
    cv2.destroyAllWindows()

    # print(len(df_coords), n_frames)
    assert n_frames - df_coords.shape[0] <= 1

    return df_coords


def get_index_pairs(property_type):
    index_pairs = []
    if property_type == 'shape':
        index_pairs.extend([
            (2, 4), (5, 8), (9, 12), (13, 16), (17, 20),
            (4, 5), (4, 8), (8, 12), (7, 11), (6, 10), 
            (4, 12), (4, 16), (4, 20),  # Thumb to other fingertips
            (5, 9), (9, 13)   # Finger bases
        ])
    elif property_type == 'position':
        hand_indices = [8, 9, 12]  # index and middle fingers
        face_indices = [130, 152, 94]  # right eye, chin, nose
        for hand_index in hand_indices:
            for face_index in face_indices:
                index_pairs.append((hand_index, face_index))
    return index_pairs

def get_angle(p1, p2, p3):
    """
    Compute the angle between three points p1, p2, and p3.
    """
    v1 = p1 - p2
    v2 = p3 - p2
    cosine_angle = np.dot(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
    angle = np.arccos(cosine_angle)
    return np.degrees(angle)


def extract_features(df_coords):
    # Create the df of relevant features
    df_features = pd.DataFrame()
    df_features["fn_video"] = df_coords["fn_video"].copy()
    df_features["frame_number"] = df_coords["frame_number"]

    # Face width to normalize the distance
    face_width = get_distance(df_coords, "face234", "face454").mean()
    norm_factor = face_width
    print(f"Face width computed for normalization: {face_width}")

    # HAND-FACE DISTANCES AS FEATURES FOR POSITION DECODING
    position_index_pairs = get_index_pairs("position")
    for hand_index, face_index in position_index_pairs:
        dx = get_delta_dim(
            df_coords,
            f"face{face_index}",
            f"r_hand{hand_index}",
            "x",
            norm_factor=norm_factor,
        )

        dy = get_delta_dim(
            df_coords,
            f"face{face_index}",
            f"r_hand{hand_index}",
            "y",
            norm_factor=norm_factor,
        )

        # Handle division by zero or NaN
        with np.errstate(divide='ignore', invalid='ignore'):
            tan_angle = np.divide(dx, dy)
            tan_angle = np.nan_to_num(tan_angle, nan=0.0, posinf=0.0, neginf=0.0)  # Replace NaN and infinities with 0
        feature_name = f"tan_angle_face{face_index}_r_hand{hand_index}"
        df_features[feature_name] = tan_angle

    for hand_index, face_index in position_index_pairs:
        feature_name = f"distance_face{face_index}_r_hand{hand_index}"
        df_features[feature_name] = get_distance(
            df_coords,
            f"face{face_index}",
            f"r_hand{hand_index}",
            norm_factor=norm_factor,
        )

    # HAND-HAND DISTANCES AS FEATURES FOR SHAPE DECODING
    shape_index_pairs = get_index_pairs("shape")
    for hand_index1, hand_index2 in shape_index_pairs:
        feature_name = f"distance_r_hand{hand_index1}_r_hand{hand_index2}"
        df_features[feature_name] = get_distance(
            df_coords,
            f"r_hand{hand_index1}",
            f"r_hand{hand_index2}",
            norm_factor=norm_factor,
        )

    # HAND-HAND ANGLES AS FEATURES FOR SHAPE DECODING
    angle_pairs = [
        #(4, 8, 12),  # Thumb, index, middle
        (8, 12, 16)  # Index, middle, ring
    ]
    for p1, p2, p3 in angle_pairs:
        feature_name = f"angle_r_hand{p1}_r_hand{p2}_r_hand{p3}"
        df_features[feature_name] = df_coords.apply(
            lambda row: get_angle(
                row[[f"x_r_hand{p1}", f"y_r_hand{p1}"]],
                row[[f"x_r_hand{p2}", f"y_r_hand{p2}"]],
                row[[f"x_r_hand{p3}", f"y_r_hand{p3}"]]
            ),
            axis=1
        )

    # HAND-FACE ORIENTATION FEATURES FOR POSITION DECODING
    for hand_index in [8, 9, 12]:  # Index and middle fingers
        for face_index in [130, 152, 94]:  # Right eye, chin, nose
            # Horizontal offset
            feature_name = f"offset_x_face{face_index}_r_hand{hand_index}"
            df_features[feature_name] = get_delta_dim(
                df_coords,
                f"face{face_index}",
                f"r_hand{hand_index}",
                "x",
                norm_factor=norm_factor,
            )

            # Vertical offset
            feature_name = f"offset_y_face{face_index}_r_hand{hand_index}"
            df_features[feature_name] = get_delta_dim(
                df_coords,
                f"face{face_index}",
                f"r_hand{hand_index}",
                "y",
                norm_factor=norm_factor,
            )

    # TEMPORAL FEATURES
    for hand_index in [8, 9, 12]:  # Index and middle fingers
        # Velocity (change in position)
        feature_name = f"velocity_x_r_hand{hand_index}"
        df_features[feature_name] = df_coords[f"x_r_hand{hand_index}"].diff()
        feature_name = f"velocity_y_r_hand{hand_index}"
        df_features[feature_name] = df_coords[f"y_r_hand{hand_index}"].diff()

        # Acceleration (change in velocity)
        feature_name = f"acceleration_x_r_hand{hand_index}"
        df_features[feature_name] = df_features[f"velocity_x_r_hand{hand_index}"].diff()
        feature_name = f"acceleration_y_r_hand{hand_index}"
        df_features[feature_name] = df_features[f"velocity_y_r_hand{hand_index}"].diff()

    ## Replace NaN and infinite values in the entire DataFrame
    #df_features.replace([np.inf, -np.inf], np.nan, inplace=True)
    #df_features.fillna(0, inplace=True)
#
    ## Normalize features
    #scaler = StandardScaler()
    #df_features_normalized = scaler.fit_transform(df_features.drop(columns=["fn_video", "frame_number"]))
    #df_features_normalized = pd.DataFrame(df_features_normalized, columns=df_features.columns[2:])
    #df_features_normalized["fn_video"] = df_features["fn_video"]
    #df_features_normalized["frame_number"] = df_features["frame_number"]

    return df_features




def setup_logging(loglevel):
    """Setup basic logging

    Args:
        loglevel (int): Minimum loglevel for emitting messages
    """
    logformat = "[%(asctime)s] %(levelname)s:%(name)s:%(message)s"
    logging.basicConfig(
        level=loglevel, stream=sys.stdout, format=logformat,
        datefmt="%Y-%m-%d %H:%M:%S"
    )


def compute_predictions(model, df_features):
    '''
    model - sklean model
    df_features - dataframe with n_samples X n_features
    '''
    X = df_features.to_numpy()

    predicted_class, predicted_probs = [], []
    for X_i in X:
        if (None in X_i) or (np.nan in X_i) or any([xi!=xi for xi in X_i]):
            predicted_c = None
            predicted_p = None
        else:
            predicted_c = model.predict([X_i])[0]
            predicted_p = model.predict_proba([X_i])[0]
        predicted_class.append(predicted_c)
        predicted_probs.append(predicted_p)

    return np.asarray(predicted_probs, dtype=object), \
        np.asarray(predicted_class)


def compute_velocity(df, landmark, fn=None):
    frame_number = df['frame_number']
    x = df['x_' + landmark].values
    y = df['y_' + landmark].values
    z = df['z_' + landmark].values

    dx = np.gradient(x, frame_number)
    dy = np.gradient(y, frame_number)
    dz = np.gradient(z, frame_number)

    dx2 = np.gradient(dx, frame_number)
    dy2 = np.gradient(dy, frame_number)
    dz2 = np.gradient(dz, frame_number)

    v = np.sqrt(dx**2 + dy**2 + dz**2)
    a = np.sqrt(dx2**2 + dy2**2 + dz2**2)

    v_smoothed = savgol_filter(v, 9, 3) # window
    a_smoothed = savgol_filter(a, 9, 3) # window

    if fn is not None:
        fig, ax = plt.subplots()
        ax.plot(v_smoothed, lw=3, color='k')
        ax.plot(a_smoothed, lw=3, color='b')
        ax.set_xlabel('Frame', fontsize=16)
        ax.set_ylabel('Velocity', fontsize=16)
        ax.set_ylim([-0.01, 0.01])
        fig.savefig(fn + '.png')
    return  v_smoothed, a_smoothed


def get_phone_onsets(fn_textgrid):
    times, labels = [], []

    grid = textgrids.TextGrid(fn_textgrid)
    phones = grid['phones']
    for phone in phones:
        if phone.text.transcode() != '':
            times.append(phone.xmin)
            labels.append(phone.text.transcode())

    return times, labels


def get_stimulus_string(fn_video):
    fn_base = os.path.basename(fn_video)[:-4]
    fn_stimulus = fn_base + '.txt'
    fn_stimulus = os.path.join('ACSR/stimuli/words/mfa_in', fn_stimulus)
    s = open(fn_stimulus, 'r').readlines()
    return s[0].strip('\n')


def dict_phone_transcription():
    # Megalex (key) to MFA (value) phone labels
    d = {}
    d['R'] = 'ʁ'
    d['N'] = 'ɲ'
    d['§'] = 'ɔ̃'
    d['Z'] = 'ʒ'
    d['5'] = 'ɛ̃'
    d['E'] = 'ɛ'
    d['9'] = 'œ'
    d['8'] = 'ɥ'
    d['S'] = 'ʃ'
    d['O'] = 'ɔ'
    d['2'] = 'ø'
    d['g'] = 'ɟ'
    d['g'] = 'ɡ'
    d['@'] = 'ɑ̃'
    d['8'] = 'ɥ'
    return d

def find_syllable_onsets(lpc_syllables, times_phones, labels_phones):
    phones = labels_phones.copy()
    d_phone_transcription = dict_phone_transcription()
    #print(lpc_syllables)
    #[print(p, t) for p, t in zip(phones, times_phones)]
    #print('-'*100)
    times = []
    for syllable in lpc_syllables:
        first_phone = syllable[0]
        if first_phone in d_phone_transcription.keys():
            first_phone = d_phone_transcription[first_phone]
        for i, phone in enumerate(phones):
            if first_phone == phone:
                times.append(times_phones[i])
                del phones[i]
                del times_phones[i]
                break
    return times


def get_syllable_onset_frames_from_lpc_file(fn_video):
    fn_base = os.path.basename(fn_video)[:-4]

    # Get LPC parsing of stimulus, into separate SYLLABLES
    # (MFA is for ALL phones and we need to know which phones are at the beginning of each syllable)
    fn_lpc_parsing = fn_base + '.lpc'
    fn_lpc_parsing = os.path.join('ACSR/stimuli/words/txt', fn_lpc_parsing)
    lpc_syllables = open(fn_lpc_parsing, 'r').readlines()[0].strip('\n').split()

    return lpc_syllables

    return 
def get_syllable_onset_frames_from_mfa(fn_video, lpc_syllables):

    # Load video and get number of frames per second (fps)
    cap = load_video(fn_video)
    fps = int(cap.get(cv2.CAP_PROP_FPS)) # frames per second
    assert fps > 0; 'Frames per seconds is not a positive number'

    # Load corresponing TextGrid file
    fn_base = os.path.basename(fn_video)[:-4]
    fn_textgrid = fn_base + '.TextGrid'
    fn_textgrid = os.path.join('../stimuli/words/mfa_out', fn_textgrid)

    # Get LPC parsing of stimulus, into separate SYLLABLES
    # (MFA is for ALL phones and we need to know which phones are at the beginning of each syllable)
    #fn_lpc_parsing = fn_base + '.lpc'
    #fn_lpc_parsing = os.path.join('../stimuli/words/txt', fn_lpc_parsing)
    #lpc_syllables = open(fn_lpc_parsing, 'r').readlines()[0].strip('\n').split()

    # PHONE onests in seconds from MFA
    onset_secs_phones_mfa, labels_phones_textgrid = get_phone_onsets(fn_textgrid)
    print(onset_secs_phones_mfa, labels_phones_textgrid)
    # SYLLABLE ONSET from MFA based on the onset of their FIRST PHONE
    onset_secs_syllables_mfa = find_syllable_onsets(lpc_syllables, # in seconds
                                                    onset_secs_phones_mfa,
                                                    labels_phones_textgrid)
    onset_frames_syllables_mfa = [int(t*fps) for t in onset_secs_syllables_mfa] # in frames

    return onset_frames_syllables_mfa



def find_onsets_based_on_extrema(time_series,
                                 n_syllables=None,
                                 onset_frames_syllables_mfa=None,
                                 thresh=None): # condition: time_series > thresh

    if onset_frames_syllables_mfa is not None: 
        onset_frames_syllables_mfa = np.asarray(onset_frames_syllables_mfa)

    # find extrema
    onset_frames_extrema = argrelextrema(time_series, np.greater)[0]
    # Threshold
    if thresh is not None:
        onset_frames_extrema = np.asarray([onset_frame for onset_frame in onset_frames_extrema if time_series[onset_frame]>thresh])

    onset_frames_extrema_temp = onset_frames_extrema.copy()
    onset_frames_picked = []
    if onset_frames_syllables_mfa is not None: # use MFA onsets to constrain the solution
        if len(onset_frames_syllables_mfa) == len(onset_frames_extrema_temp):
            onset_frames_picked = onset_frames_extrema_temp
        else:
            for i_frame, onset_frame_syl_mfa in enumerate(onset_frames_syllables_mfa):
                # Find extremum that is nearest to current MFA onset
                delta = np.abs(onset_frames_extrema_temp - onset_frame_syl_mfa)
                IX_onset_frame_extremum_nearest_mfa = np.argmin(delta)
                onset_frame_extremum_nearest_mfa = onset_frames_extrema_temp[IX_onset_frame_extremum_nearest_mfa]
                onset_frames_picked.append(onset_frame_extremum_nearest_mfa)
                # Remove past indexes, in order to make sure the next onset frame is in the future
                onset_frames_extrema_temp = onset_frames_extrema_temp[onset_frames_extrema_temp > onset_frame_extremum_nearest_mfa]
                if len(onset_frames_extrema_temp)==0:
                    while len(onset_frames_picked) < len(onset_frames_syllables_mfa): # Fill None values if not enough identified extrema
                        onset_frames_picked.append(None)
                    break
    else:
        IXs = np.argpartition(onset_frames_extrema, -n_syllables)[-n_syllables:]
        onset_frames_picked = list(onset_frames_extrema[IXs])

    return onset_frames_picked, onset_frames_extrema

def scale_velocity(velocity):
    q25, q75 = np.percentile(velocity, 25), np.percentile(velocity, 75)
    iqr = q75 - q25
    cut_off = iqr * 1.5
    lower, upper = q25 - cut_off, q75 + cut_off
    velocity = np.clip(velocity, lower, upper)
    velocity_scaled = minmax_scale(velocity)
    return velocity_scaled


def get_joint_measure(df_predictions_pos,
                      df_predictions_shape,
                      velocity_scaled,
                      weight_velocity=1):

    # MAX PROBABILITIES (POSITION AND SHAPE)
    max_probs_pos = df_predictions_pos.copy().filter(regex=("p_class*")).to_numpy().max(axis=1)
    max_probs_shape = df_predictions_shape.copy().filter(regex=("p_class*")).to_numpy().max(axis=1)
    probs_product = max_probs_pos * max_probs_shape
    # JOINT
    joint_measure = (weight_velocity * (1-velocity_scaled) + probs_product)/(1+weight_velocity)
    joint_measure_smoothed = savgol_filter(joint_measure, 15, 3) # window, smooth
    # replace nans caused by smoothing with original values
    is_nan_smoothed = np.isnan(joint_measure_smoothed)
    joint_measure_smoothed[is_nan_smoothed] = joint_measure[is_nan_smoothed]

    return joint_measure_smoothed


def write_onsets_to_file(str_stimulus, lpc_syllables, onset_frames_picked, fn_txt):
    
    # HACK TO EQUALIZE THE NUMBER OF EXPECTED ONSETS (NUM SYLLABLES) AND THE ONE FOUND
    if len(lpc_syllables) < len(onset_frames_picked): # REMOVE EXTRA ONSETS
        onset_frames_picked = onset_frames_picked[:3]
    for i_sy in range(len(lpc_syllables)-len(onset_frames_picked)): # ADD DUMMY ONSETS
        onset_frames_picked = list(onset_frames_picked)
        last_onset = onset_frames_picked[-1]
        onset_frames_picked.append(last_onset + i_sy + 1)

    assert len(lpc_syllables) == len(onset_frames_picked)

    with open(fn_txt, 'w') as f:
        f.write(f'{str_stimulus}\n')
        f.write('event,stimulus,frame_number\n')
        for (syllable, onset) in zip(lpc_syllables, onset_frames_picked):
            f.write(f'SYLLABLE ONSET, {syllable}, {onset}\n')
    return None

# FROM HAGAR

def get_LPC_p(word):
    lex = pd.read_csv("/home/yair/projects/ACSR/data/hagar/Lexique380.utf8.csv")
    lex = lex[(lex.ortho.str.contains('-| ') == False) & (lex.phon.str.contains('°') == False)]  # suppress schwa
    lex = lex.drop_duplicates(subset='ortho', keep="first")
    lex = lex[['ortho','phon', 'p_cvcv','nbhomogr','cv-cv','syll']]
    dic = lex.set_index('ortho').to_dict()

    cv_dic = dic['cv-cv']
    p_cv_dic = dic['syll']
    phon_dic = dic['phon']    

    dev_syl = pd.read_csv("/home/yair/projects/ACSR/data/hagar/lpc_syl_configurations.csv")
    dev_syl['lpc_n'] = dev_syl['LPC_config'].apply(lambda x: x.split('-'))
    dev_syl['lpc_n'] = dev_syl['lpc_n'].apply(lambda x: len(x))
    dic2 = dev_syl.set_index('spoken_config').to_dict()
    
    g_cv_dic = dic2['LPC_config']
    
    lpc_cv = get_LPC_cv(word, cv_dic, g_cv_dic)
    
    new_word = ''
    phon = phon_dic[word]
    if lpc_cv == cv_dic[word]:
        return p_cv_dic[word]
    else:
        l_lpc = lpc_cv.split('-')
        for syl in l_lpc:
            new_word += phon[:len(syl)]+'-'
            phon = phon[len(syl):]
        return new_word[:-1]


def get_LPC_cv(word, cv_dic, g_cv_dic):
    

    LPC_cv = ''
    if word in cv_dic:
        cv_lst = cv_dic[word].split('-')
        for syl in cv_lst:
            LPC_cv = LPC_cv + g_cv_dic[syl] + '-'
        return LPC_cv[:-1]

    else:
        return word

def get_word_code(syll):
    position = {'a': '0', 'o': '0', '9': '0', '5': '1', '2': '1', 'i': '2', '§': '2', '@': '2', 'E': '3', 'u': '3', 'O': '3', '1': '4', 'y': '4', 'e': '4'}
    configuration = {'p': '0', 'd': '0', 'Z': '0', 'k': '1', 'v': '1', 'z': '1', 's': '2', 'R': '2', 'b': '3', 'n': '3', '8': '3', 't': '4', 'm': '4', 'f': '4', 'l': '5', 'S': '5', 'N': '5', 'w': '5', 'g': '6', 'j': '7', 'G': '7'}
    try:
        code_word = ''
        if len(syll) == 1:
            if syll in configuration:
                code_word += configuration[syll]
                code_word += '0'
            else:
                code_word += '4'
                code_word += position[syll]
        else:
            for i in range (0,len(syll)):
                if syll[i] in configuration:
                    code_word += configuration[syll[i]]
                else:
                    code_word += position[syll[i]]
        return code_word
    except:
        return None


def shape_position_code(word):
    code_word = ""
    syll_lst = get_LPC_p(word).split("-")
    for syll in syll_lst:
        code_word += get_word_code(syll) + '-'  
    return code_word[:-1]


In [1]:
import torch
print(torch.cuda.is_available())

True
