# We will be creating the folds for one-point tracking, six-point tracking, and all-points tracking!

In [2]:
# first track all points

K = 5

import mediapipe as mp 
from PIL import Image as im 
import mediapipe as mp

def hand_locations(frame, min_detection_confidence = 0.5, min_tracking_confidence = 0.5): 
    hands = mp.solutions.hands.Hands(min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence) # MAKE SURE THIS IS ALL GOOD 
    results = hands.process(frame.astype('uint8'))
    X_locations = [0] * 42 # use 0 as default if the class is not there 
    Y_locations = [0] * 42 
    Z_locations = [0] * 42
    x = y = z = 0 
    if results.multi_hand_landmarks:
        for hand, hand_landmark in enumerate(results.multi_hand_landmarks):
            for i in range(0, 21):
                landmark = hand_landmark.landmark[i]
                X_locations[x] = landmark.x
                Y_locations[y] = landmark.y
                Z_locations[z] = landmark.z
                x += 1; y += 1; z += 1; 
    locations = np.concatenate([X_locations, Y_locations, Z_locations])
    hands.close()
    return locations 

# time to actually do calibration. 

SECONDS_TO_DETECT = 2 # in seconds

import os, cv2
from tqdm import tqdm 
import numpy as np

ARMFLAPPING_VIDEOS = []
CONTROL_VIDEOS = []
ARMFLAPPING_FPS = [] # store the FPS of all armflapping videos 
CONTROL_FPS = [] # store the FPS of all control videos 
for video_name in tqdm(os.listdir('behavior_data/shorter_armflapping'), desc = "armflapping_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/armflapping' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break  
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image) 
        
        ARMFLAPPING_VIDEOS.append(FRAMES)
        ARMFLAPPING_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")

for video_name in tqdm(os.listdir('behavior_data/control'), desc = "control_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/shorter_control/' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break 
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image)
        
        CONTROL_VIDEOS.append(FRAMES)
        CONTROL_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")
    
# get the locations of all of the videos 

ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS = [], []
for FRAMES in tqdm(ARMFLAPPING_VIDEOS) :
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    ARMFLAPPING_LOCATIONS.append(locs)

for FRAMES in tqdm(CONTROL_VIDEOS):  
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    CONTROL_LOCATIONS.append(locs)

N = min([len(locs) for locs in [ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS]])
ARMFLAPPING_LOCATIONS = ARMFLAPPING_LOCATIONS[:N]
CONTROL_LOCATIONS = CONTROL_LOCATIONS[:N]
ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)

# we can create a padding function in order to pad 
def pad(locations, maxlen = 90, padding = "post", truncating = "post"): 
    new_locations = locations.tolist() 
    empty_row = np.zeros((1, 126))
    for i, video in tqdm(enumerate(new_locations)): 
        if len(video) < maxlen:  
            for new_row in range(maxlen - len(video)): 
                if padding == "post": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([new_locations[i], empty_rowP])
                if padding == "pre": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([empty_row, new_locations[i]])

        if len(video) > maxlen: 
            if truncating == "post": 
                new_locations[i] = new_locations[i][:maxlen]
            elif truncating == "pre": 
                new_locations[i] = new_locations[i][len(video) - maxlen : ]
    return np.array(new_locations)

padded_armflapping_locations = ARMFLAPPING_LOCATIONS
padded_control_locations = CONTROL_LOCATIONS
padded_armflapping_locations = pad(padded_armflapping_locations, maxlen = 90)
padded_control_locations = pad(padded_control_locations, maxlen = 90)
print(padded_control_locations.shape, padded_armflapping_locations.shape)
assert padded_armflapping_locations.shape == padded_control_locations.shape 

from sklearn.model_selection import train_test_split

def generate_data(ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS): 
    ARMFLAPPING_LABELS = np.ones(ARMFLAPPING_LOCATIONS.shape[0])
    CONTROL_LABELS = np.zeros(CONTROL_LOCATIONS.shape[0])
    
    # concatenate 
    data = np.concatenate([ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS])
    labels = np.concatenate([ARMFLAPPING_LABELS, CONTROL_LABELS])
    
    return data, labels 

X, y = generate_data(padded_armflapping_locations, padded_control_locations)

import pickle, numpy as np 

# shuffle
N = np.random.permutation(X.shape[0])
X, y = X[N], y[N]

splits = [] # stores k (X_i, y_i) splits
X_splits, y_splits = np.array_split(X, K), np.array_split(y, K)
for X_split, y_split in zip(X_splits, y_splits): 
    splits.append((X_split, y_split))

for i, split in enumerate(splits): 
    with open(f"all_points_folds/split{i+1}", 'wb') as f: 
        pickle.dump(split, f)

armflapping_videos:  15%|█▍        | 16/108 [00:00<00:02, 40.15it/s]failed on .DS_Store
armflapping_videos: 100%|██████████| 108/108 [00:04<00:00, 24.01it/s]
control_videos:  19%|█▉        | 12/62 [00:02<00:13,  3.75it/s]failed on .DS_Store
control_videos: 100%|██████████| 62/62 [00:06<00:00, 10.01it/s]
100%|██████████| 97/97 [03:30<00:00,  2.17s/it]
100%|██████████| 50/50 [01:43<00:00,  2.07s/it]
  ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
  CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)
50it [00:00, 13007.21it/s]
50it [00:00, 7687.79it/s](50, 90, 126) (50, 90, 126)



In [3]:
# next, track six points

K = 5

import mediapipe as mp 
from PIL import Image as im 
import numpy as np

np.random.seed(32) # set a random seed 

def hand_locations(frame, min_detection_confidence = 0.5, min_tracking_confidence = 0.5): 
    """Only give 6 landmarks"""

    hands = mp.solutions.hands.Hands(min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence) # MAKE SURE THIS IS ALL GOOD 
    results = hands.process(frame.astype('uint8'))
    X_locations = [0] * 12
    Y_locations = [0] * 12
    Z_locations = [0] * 12

    if results.multi_hand_landmarks:
        x = y = z = 0 
        for hand, hand_landmark in enumerate(results.multi_hand_landmarks):
            for i in range(0, 21):
                if i not in [0, 4, 8, 12, 16, 20]: continue 
                landmark = hand_landmark.landmark[i]
                X_locations[x] = landmark.x
                Y_locations[y] = landmark.y 
                Z_locations[z] = landmark.z
                x += 1; y += 1; z +=1; 
            
    hands.close()
    return np.concatenate([X_locations, Y_locations, Z_locations]) 

# time to actually do calibration. 

SECONDS_TO_DETECT = 2 # in seconds

import os, cv2
from tqdm import tqdm 
import numpy as np

ARMFLAPPING_VIDEOS = []
CONTROL_VIDEOS = []
ARMFLAPPING_FPS = [] # store the FPS of all armflapping videos 
CONTROL_FPS = [] # store the FPS of all control videos 

# we want to store the names of everything 
ARMFLAPPING_FILE_NAMES = []
CONTROL_FILE_NAMES = []
for video_name in tqdm(os.listdir('behavior_data/armflapping'), desc = "armflapping_videos"): 
    try: 
        os.mkdir("behavior_data/armflapping/" + video_name[1:])
        cap = cv2.VideoCapture('behavior_data/armflapping/' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        i = 0
        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break  
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image) 

            cv2.imwrite("behavior_data/armflapping/" + video_name[1:] + "/" + str(i + 1) + ".jpg", image)
            i += 1 

        ARMFLAPPING_VIDEOS.append(FRAMES)
        ARMFLAPPING_FPS.append(frame_rate)
        ARMFLAPPING_FILE_NAMES.append(video_name)

    except Exception as e: 
        print(e)
        print(f"failed on {video_name}")

for video_name in tqdm(os.listdir('behavior_data/control'), desc = "control_videos"): 
    try: 
        os.mkdir("behavior_data/control/" + video_name[1:])
        cap = cv2.VideoCapture('behavior_data/control/' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 
        i = 0
        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break 
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image)

            cv2.imwrite("behavior_data/control/" + video_name[1:] + "/" + str(i + 1) + ".jpg", image)
            
            i += 1
        CONTROL_VIDEOS.append(FRAMES)
        CONTROL_FPS.append(frame_rate)
        CONTROL_FILE_NAMES.append(video_name)
    

    except Exception as e: 
        print(e)
        print(f"failed on {video_name}")

ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS = [], []

for FRAMES, file_name in tqdm(zip(CONTROL_VIDEOS, CONTROL_FILE_NAMES)):  
    locs = []
    for i, frame in enumerate(FRAMES): 
        locs.append(hand_locations(np.array(frame)))

        #FRAME = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        #cv2.imwrite("behavior_data/control/" + file_name[1:] + "/" + str(i + 1) + ".jpg", frame)
    CONTROL_LOCATIONS.append(locs)


# get the locations of all of the videos 

for FRAMES, file_name in tqdm(zip(ARMFLAPPING_VIDEOS,ARMFLAPPING_FILE_NAMES)) :
    locs = []
    for i, frame in enumerate(FRAMES): 
        locs.append(hand_locations(np.array(frame)))

        #FRAME = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
        #cv2.imwrite("behavior_data/armflapping/" + file_name[1:] + "/" + str(i + 1) + ".jpg", frame)
    ARMFLAPPING_LOCATIONS.append(locs)

N = min([len(locs) for locs in [ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS]])
ARMFLAPPING_LOCATIONS = ARMFLAPPING_LOCATIONS[:N]
CONTROL_LOCATIONS = CONTROL_LOCATIONS[:N]
ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)

ARMFLAPPING_FILE_NAMES = np.array(ARMFLAPPING_FILE_NAMES[:N])
CONTROL_FILE_NAMES = np.array(CONTROL_FILE_NAMES[:N])

# we can create a padding function in order to pad 
def pad(locations, maxlen = 90, padding = "post", truncating = "post"): 
    new_locations = locations.tolist() 
    empty_row = np.zeros((1, 36))
    for i, video in tqdm(enumerate(new_locations)): 
        if len(video) < maxlen:  
            for new_row in range(maxlen - len(video)): 
                if padding == "post": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([new_locations[i], empty_row])
                if padding == "pre": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([empty_row, new_locations[i]])

        if len(video) > maxlen: 
            if truncating == "post": 
                new_locations[i] = new_locations[i][:maxlen]
            elif truncating == "pre": 
                new_locations[i] = new_locations[i][len(video) - maxlen : ]
    return np.array(new_locations)

padded_armflapping_locations = ARMFLAPPING_LOCATIONS
padded_control_locations = CONTROL_LOCATIONS
padded_armflapping_locations = pad(padded_armflapping_locations, maxlen = 90)
padded_control_locations = pad(padded_control_locations, maxlen = 90)
print(padded_control_locations.shape, padded_armflapping_locations.shape)
assert padded_armflapping_locations.shape == padded_control_locations.shape 

from sklearn.model_selection import train_test_split

def generate_data(ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS): 
    ARMFLAPPING_LABELS = np.ones(ARMFLAPPING_LOCATIONS.shape[0])
    CONTROL_LABELS = np.zeros(CONTROL_LOCATIONS.shape[0])
    
    # concatenate 
    data = np.concatenate([ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS])
    labels = np.concatenate([ARMFLAPPING_LABELS, CONTROL_LABELS])
    
    return data, labels 

X, y = generate_data(padded_armflapping_locations, padded_control_locations)


# get all file names 
FILE_NAMES = np.concatenate([ARMFLAPPING_FILE_NAMES, CONTROL_FILE_NAMES])

import pickle, numpy as np 

# shuffle
N = np.random.permutation(X.shape[0])
X, y = X[N], y[N]
FILE_NAMES = FILE_NAMES[N]

# shuffle again with a random seed of 65

np.random.seed(65)
N = np.random.permutation(X.shape[0])
X, y = X[N], y[N]
FILE_NAMES = FILE_NAMES[N]

splits = [] # stores k (X_i, y_i) splits
X_splits, y_splits = np.array_split(X, K), np.array_split(y, K)
FILE_NAME_SPLITS = np.array_split(FILE_NAMES, K)
for X_split, y_split in zip(X_splits, y_splits): 
    splits.append((X_split, y_split))
    print("file names: " , FILE_NAME_SPLITS)

for i, split in enumerate(splits): 
    with open(f"six_point_folds/split{i+1}", 'wb') as f: 
        pickle.dump(split, f)

armflapping_videos:  13%|█▎        | 14/108 [00:03<00:14,  6.39it/s]OpenCV: Couldn't read video stream from file "behavior_data/armflapping/.DS_Store"
[ERROR:0] global /private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/pip-req-build-xxsyexfp/opencv/modules/videoio/src/cap.cpp (162) open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.5.3) /private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/pip-req-build-xxsyexfp/opencv/modules/videoio/src/cap_images.cpp:253: error: (-5:Bad argument) CAP_IMAGES: can't find starting number (in the name of file): behavior_data/armflapping/.DS_Store in function 'icvExtractPattern'




float division by zero
failed on .DS_Store


armflapping_videos: 100%|██████████| 108/108 [00:34<00:00,  3.09it/s]
control_videos:  19%|█▉        | 12/62 [00:05<00:25,  1.98it/s]OpenCV: Couldn't read video stream from file "behavior_data/control/.DS_Store"
[ERROR:0] global /private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/pip-req-build-xxsyexfp/opencv/modules/videoio/src/cap.cpp (162) open VIDEOIO(CV_IMAGES): raised OpenCV exception:

OpenCV(4.5.3) /private/var/folders/24/8k48jl6d249_n_qfxwsl6xvm0000gn/T/pip-req-build-xxsyexfp/opencv/modules/videoio/src/cap_images.cpp:253: error: (-5:Bad argument) CAP_IMAGES: can't find starting number (in the name of file): behavior_data/control/.DS_Store in function 'icvExtractPattern'




float division by zero
failed on .DS_Store


control_videos: 100%|██████████| 62/62 [00:23<00:00,  2.69it/s]
0it [00:00, ?it/s]INFO: Created TensorFlow Lite XNNPACK delegate for CPU.
41it [01:23,  2.16s/it]

In [1]:
# finally, only track one point

K = 5

import mediapipe as mp 
from PIL import Image as im 
import mediapipe as mp


def hand_locations(frame, min_detection_confidence = 0.5, min_tracking_confidence = 0.5): 
    """Only give the 0th landmark"""

    hands = mp.solutions.hands.Hands(min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence)  
    results = hands.process(frame.astype('uint8'))
    X_locations = [0] * 2
    Y_locations = [0] * 2
    Z_locations = [0] * 2
    if results.multi_hand_landmarks:
        for hand, hand_landmark in enumerate(results.multi_hand_landmarks):
            for i in range(0, 21):
                landmark = hand_landmark.landmark[i]
                X_locations[hand] = landmark.x
                Y_locations[hand] = landmark.y
                Z_locations[hand] = landmark.z
                break # take only the first landmark
            
    hands.close()
    return np.concatenate([X_locations, Y_locations, Z_locations]) 

# time to actually do calibration. 

SECONDS_TO_DETECT = 2 # in seconds

import os, cv2
from tqdm import tqdm 
import numpy as np

ARMFLAPPING_VIDEOS = []
CONTROL_VIDEOS = []
ARMFLAPPING_FPS = [] # store the FPS of all armflapping videos 
CONTROL_FPS = [] # store the FPS of all control videos 
for video_name in tqdm(os.listdir('behavior_data/shorter_armflapping'), desc = "armflapping_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/armflapping' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break  
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image) 
        
        ARMFLAPPING_VIDEOS.append(FRAMES)
        ARMFLAPPING_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")

for video_name in tqdm(os.listdir('behavior_data/control'), desc = "control_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/shorter_control/' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break 
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image)
        
        CONTROL_VIDEOS.append(FRAMES)
        CONTROL_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")
    
# get the locations of all of the videos 

ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS = [], []
for FRAMES in tqdm(ARMFLAPPING_VIDEOS) :
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    ARMFLAPPING_LOCATIONS.append(locs)

for FRAMES in tqdm(CONTROL_VIDEOS):  
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    CONTROL_LOCATIONS.append(locs)

N = min([len(locs) for locs in [ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS]])
ARMFLAPPING_LOCATIONS = ARMFLAPPING_LOCATIONS[:N]
CONTROL_LOCATIONS = CONTROL_LOCATIONS[:N]
ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)

# we can create a padding function in order to pad 
def pad(locations, maxlen = 90, padding = "post", truncating = "post"): 
    new_locations = locations.tolist() 
    empty_row = np.zeros((1, 6))
    for i, video in tqdm(enumerate(new_locations)): 
        if len(video) < maxlen:  
            for new_row in range(maxlen - len(video)): 
                if padding == "post": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([new_locations[i], empty_row])
                if padding == "pre": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([empty_row, new_locations[i]])

        if len(video) > maxlen: 
            if truncating == "post": 
                new_locations[i] = new_locations[i][:maxlen]
            elif truncating == "pre": 
                new_locations[i] = new_locations[i][len(video) - maxlen : ]
    return np.array(new_locations)

padded_armflapping_locations = ARMFLAPPING_LOCATIONS
padded_control_locations = CONTROL_LOCATIONS
padded_armflapping_locations = pad(padded_armflapping_locations, maxlen = 90)
padded_control_locations = pad(padded_control_locations, maxlen = 90)
print(padded_control_locations.shape, padded_armflapping_locations.shape)
assert padded_armflapping_locations.shape == padded_control_locations.shape 

from sklearn.model_selection import train_test_split

def generate_data(ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS): 
    ARMFLAPPING_LABELS = np.ones(ARMFLAPPING_LOCATIONS.shape[0])
    CONTROL_LABELS = np.zeros(CONTROL_LOCATIONS.shape[0])
    
    # concatenate 
    data = np.concatenate([ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS])
    labels = np.concatenate([ARMFLAPPING_LABELS, CONTROL_LABELS])
    
    return data, labels 

X, y = generate_data(padded_armflapping_locations, padded_control_locations)

FileNotFoundError: [Errno 2] No such file or directory: 'behavior_data/shorter_armflapping'

In [29]:
# first track all points

K = 5

import mediapipe as mp 
from PIL import Image as im 
import mediapipe as mp

def hand_locations(frame, min_detection_confidence = 0.5, min_tracking_confidence = 0.5): 
    hands = mp.solutions.hands.Hands(min_detection_confidence=min_detection_confidence, min_tracking_confidence=min_tracking_confidence) # MAKE SURE THIS IS ALL GOOD 
    results = hands.process(frame.astype('uint8'))
    X_locations = [0] * 42 # use 0 as default if the class is not there 
    Y_locations = [0] * 42 
    Z_locations = [0] * 42
    x = y = z = 0 
    if results.multi_hand_landmarks:
        for hand, hand_landmark in enumerate(results.multi_hand_landmarks):
            for i in range(0, 21):
                landmark = hand_landmark.landmark[i]
                X_locations[x] = landmark.x
                Y_locations[y] = landmark.y
                Z_locations[z] = landmark.z
                x += 1; y += 1; z += 1; 
    locations = np.concatenate([X_locations, Y_locations, Z_locations])
    hands.close()
    return locations 

# time to actually do calibration. 

SECONDS_TO_DETECT = 2 # in seconds

import os, cv2
from tqdm import tqdm 
import numpy as np

ARMFLAPPING_VIDEOS = []
CONTROL_VIDEOS = []
ARMFLAPPING_FPS = [] # store the FPS of all armflapping videos 
CONTROL_FPS = [] # store the FPS of all control videos 
for video_name in tqdm(os.listdir('behavior_data/shorter_armflapping'), desc = "armflapping_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/armflapping' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break  
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image) 
        
        ARMFLAPPING_VIDEOS.append(FRAMES)
        ARMFLAPPING_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")

for video_name in tqdm(os.listdir('behavior_data/control'), desc = "control_videos"): 
    try: 
        cap = cv2.VideoCapture('behavior_data/shorter_control/' + video_name)  
        frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if cap.get(cv2.CAP_PROP_FRAME_COUNT) / frame_rate < SECONDS_TO_DETECT: continue # too short! 

        FRAMES = [] # frames for this video 

        while cap.isOpened(): 
            _, image = cap.read() 
            if not _ : 
                break 
            
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB. 
            FRAMES.append(image)
        
        CONTROL_VIDEOS.append(FRAMES)
        CONTROL_FPS.append(frame_rate)
    except Exception as e: 
        print(f"failed on {video_name}")
    
# get the locations of all of the videos 

ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS = [], []
for FRAMES in tqdm(ARMFLAPPING_VIDEOS) :
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    ARMFLAPPING_LOCATIONS.append(locs)

for FRAMES in tqdm(CONTROL_VIDEOS):  
    locs = []
    for frame in FRAMES: 
        locs.append(hand_locations(frame))
    CONTROL_LOCATIONS.append(locs)

N = min([len(locs) for locs in [ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS]])
ARMFLAPPING_LOCATIONS = ARMFLAPPING_LOCATIONS[:N]
CONTROL_LOCATIONS = CONTROL_LOCATIONS[:N]
ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)

# we can create a padding function in order to pad 
def pad(locations, maxlen = 90, padding = "post", truncating = "post"): 
    new_locations = locations.tolist() 
    empty_row = np.zeros((1, 126))
    for i, video in tqdm(enumerate(new_locations)): 
        if len(video) < maxlen:  
            for new_row in range(maxlen - len(video)): 
                if padding == "post": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([new_locations[i], empty_row])
                if padding == "pre": 
                    new_locations[i] = np.array(new_locations[i])
                    new_locations[i] = np.concatenate([empty_row, new_locations[i]])

        if len(video) > maxlen: 
            if truncating == "post": 
                new_locations[i] = new_locations[i][:maxlen]
            elif truncating == "pre": 
                new_locations[i] = new_locations[i][len(video) - maxlen : ]
    return np.array(new_locations)

padded_armflapping_locations = ARMFLAPPING_LOCATIONS
padded_control_locations = CONTROL_LOCATIONS
padded_armflapping_locations = pad(padded_armflapping_locations, maxlen = 90)
padded_control_locations = pad(padded_control_locations, maxlen = 90)
print(padded_control_locations.shape, padded_armflapping_locations.shape)
assert padded_armflapping_locations.shape == padded_control_locations.shape 

from sklearn.model_selection import train_test_split

def generate_data(ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS): 
    ARMFLAPPING_LABELS = np.ones(ARMFLAPPING_LOCATIONS.shape[0])
    CONTROL_LABELS = np.zeros(CONTROL_LOCATIONS.shape[0])
    
    # concatenate 
    data = np.concatenate([ARMFLAPPING_LOCATIONS, CONTROL_LOCATIONS])
    labels = np.concatenate([ARMFLAPPING_LABELS, CONTROL_LABELS])
    
    return data, labels 

X, y = generate_data(padded_armflapping_locations, padded_control_locations)

import pickle, numpy as np 

# shuffle 
N = np.random.permutation(X.shape[0])
X, y = X[N], y[N]

"""
import pandas as pd 
new_X = []
for i in range(X.shape[0]): 
    matrix_video = X[i]
    new_data = np.zeros((X.shape[1], 3))
    temp_df = pd.DataFrame(matrix_video)
    temp_df[temp_df == 0] = None 
    new_data[:, 0] = np.array(temp_df[:, :42].mean(axis = 1).fillna(0))
    new_data[:, 0] = np.array(temp_df[:, 42:84].mean(axis = 1).fillna(0))
    new_data[:, 0] = np.array(temp_df[:, 84:126].mean(axis = 1).fillna(0))
    new_X.append(new_data)
X = np.array(new_X)

assert X.shape == (100, 90, 3)

splits = [] # stores k (X_i, y_i) splits
X_splits, y_splits = np.array_split(X, K), np.array_split(y, K)
for X_split, y_split in zip(X_splits, y_splits): 
    splits.append((X_split, y_split))

for i, split in enumerate(splits): 
    with open(f"mean_point_folds/split{i+1}", 'wb') as f: 
        pickle.dump(split, f)
"""

armflapping_videos:  15%|█▍        | 16/108 [00:01<00:05, 16.62it/s]

failed on .DS_Store


armflapping_videos: 100%|██████████| 108/108 [00:07<00:00, 13.88it/s]
control_videos:  19%|█▉        | 12/62 [00:01<00:06,  7.96it/s]

failed on .DS_Store


control_videos: 100%|██████████| 62/62 [00:04<00:00, 14.39it/s]
100%|██████████| 97/97 [03:31<00:00,  2.18s/it]
100%|██████████| 50/50 [01:47<00:00,  2.15s/it]
  ARMFLAPPING_LOCATIONS = np.array(ARMFLAPPING_LOCATIONS)
  CONTROL_LOCATIONS = np.array(CONTROL_LOCATIONS)
50it [00:00, 10563.93it/s]
50it [00:00, 6572.91it/s]

(50, 90, 126) (50, 90, 126)





'\nimport pandas as pd \nnew_X = []\nfor i in range(X.shape[0]): \n    matrix_video = X[i]\n    new_data = np.zeros((X.shape[1], 3))\n    temp_df = pd.DataFrame(matrix_video)\n    temp_df[temp_df == 0] = None \n    new_data[:, 0] = np.array(temp_df[:, :42].mean(axis = 1).fillna(0))\n    new_data[:, 0] = np.array(temp_df[:, 42:84].mean(axis = 1).fillna(0))\n    new_data[:, 0] = np.array(temp_df[:, 84:126].mean(axis = 1).fillna(0))\n    new_X.append(new_data)\nX = np.array(new_X)\n\nassert X.shape == (100, 90, 3)\n\nsplits = [] # stores k (X_i, y_i) splits\nX_splits, y_splits = np.array_split(X, K), np.array_split(y, K)\nfor X_split, y_split in zip(X_splits, y_splits): \n    splits.append((X_split, y_split))\n\nfor i, split in enumerate(splits): \n    with open(f"mean_point_folds/split{i+1}", \'wb\') as f: \n        pickle.dump(split, f)\n'

In [30]:
def mean_locs(frame): 
    """take in a single frame, and return the mean location"""
    assert len(frame) == 126, len(frame) # make sure this is a vector
    X_locs_1, X_locs_2, Y_locs_1, Y_locs_2, Z_locs_1, Z_locs_2 = frame[:21], frame[21:42], frame[42:63], frame[63:84], frame[84:105], frame[105:]
    return [locs[np.nonzero(locs)].mean() for locs in [X_locs_1, X_locs_2, Y_locs_1, Y_locs_2, Z_locs_1, Z_locs_2] ]

new_X = [] 
for video in range(X.shape[0]): 
    matrix_video = X[video]
    new_X.append(np.apply_along_axis(mean_locs, 1, matrix_video))
    
X = np.array(new_X, copy = True) 
assert len(X.shape) == 3

splits = [] # stores k (X_i, y_i) splits
X_splits, y_splits = np.array_split(X, K), np.array_split(y, K)
for X_split, y_split in zip(X_splits, y_splits): 
    splits.append((X_split, y_split))

for i, split in enumerate(splits): 
    with open(f"mean_point_folds/split{i+1}", 'wb') as f: 
        pickle.dump(split, f)

  return [locs[np.nonzero(locs)].mean() for locs in [X_locs_1, X_locs_2, Y_locs_1, Y_locs_2, Z_locs_1, Z_locs_2] ]
  ret = ret.dtype.type(ret / rcount)


In [63]:
with open("temp_X.pkl", 'wb') as f: 
    pickle.dump(X, f)

with open("temp_X.pkl", 'rb') as f: 
    X = pickle.load(f)

In [27]:
X = np.nan_to_num(X)
splits = [] # stores k (X_i, y_i) splits
X_splits, y_splits = np.array_split(X, K), np.array_split(y, K)
for X_split, y_split in zip(X_splits, y_splits): 
    splits.append((X_split, y_split))

for i, split in enumerate(splits): 
    with open(f"mean_point_folds/split{i+1}", 'wb') as f: 
        pickle.dump(split, f)

In [4]:
for file in os.listdir("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control"):
         if os.path.isfile("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control/" + file):
             continue
         else:
             #shutil.rmtree("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/armflapping/" + file)
             for vid_name in os.listdir("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control/" + file):
                 image = cv2.imread("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control/" + file + "/" + vid_name)
                 image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                 os.remove("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control/" + file + "/" + vid_name)
                 cv2.imwrite("/Users/anish/Documents/Machine Learning Env/ActivRecognition-Autism-Diagnosis/behavior_data/control/" + file + "/" + vid_name, image)

In [3]:
import cv2, os