In [None]:
import os
import os.path
from os import path
import shutil
from tqdm import tqdm
import pandas as pd
import sys
import time
import logging
import numpy as np
import matplotlib.pyplot as plt
import cv2
import re
from pathlib import Path
from unidip import UniDip
import unidip.dip as dip
from sklearn.mixture import GaussianMixture
from frame_count import frame_count

In [None]:
def sorted_alphanumeric(data):
    convert = lambda text: int(text) if text.isdigit() else text.lower()
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(data, key=alphanum_key)

In [None]:
def find_min_round(array):
    array = np.array(array.loc[[0,1,2,5,8,11,15,16,17]])#remove the arms from the equation cause the bounding boxes are too large 
    x = round(np.min(array[np.nonzero(array)]))
    return(x)

def find_max_round(array):
    array = np.array(array.loc[[0,1,2,5,8,11,15,16,17]])
    x = round(np.max(array))
    return(x)

In [None]:
def bounding_box(frame,coords):
    bbox = frame[find_min_round(coords['y']):find_max_round(coords['y']),
           find_min_round(coords['x']):find_max_round(coords['x'])]
    return(bbox)

## iterate thru all of skeleton 1 (tracked) in all the videos


In [None]:
path_skeletons = Path('/Users/andreibirladeanu/Documents/Data/meal_tracked_exp/')
folders = [x for x in sorted_alphanumeric(os.listdir(path_skeletons)) if x[0]!="."]
path_videos = Path('/Users/andreibirladeanu/Documents/Data/meal_videos/')

In [None]:
### if one of the 3 skeleton folders is less than 10% of the whole data then discard

    
for folder in folders:
    sample_size = 0
    if folder[0] == ".":
        continue
    for skeleton in os.listdir(os.path.join(path_skeletons, folder)):
        if skeleton[0] ==".":
            continue
        sample_size += len(os.listdir(os.path.join(path_skeletons, folder, skeleton)))
    for skeleton in os.listdir(os.path.join(path_skeletons, folder)):
        if skeleton[0] ==".":
            continue
        if len(os.listdir(os.path.join(path_skeletons, folder, skeleton))) < (5/100*sample_size):
            shutil.rmtree(os.path.join(path_skeletons, folder, skeleton))
        

In [None]:
sk = 'skel3'
skeleton_folders = [os.path.join(path_skeletons, folder, sk) for folder in folders if folder[0]!="."]

In [None]:
folders

In [None]:
skeleton_folders, folders

In [None]:
for x in skeleton_folders:
    print(x)

## 2. for each folder get bounding boxes by using the skeleton and save them inside a numpy array

In [None]:

for skeleton, folder in tqdm(zip(skeleton_folders, folders)): ## outermost loop - iterate thru the video openpose folders
    if path.exists(skeleton) == False:
        continue
    cap = cv2.VideoCapture(os.path.join(path_videos, folder+'.mp4')) ## get video capture for the video
    skel = sorted_alphanumeric(os.listdir(skeleton)) ### get a list of skeleton files inside the folder
    if len(skel) < 10000: ## lower p value if the sample size is very high to avoid false positives
        p = 0.05
    else:
        p = 0.01
    if '.DS_Store' in skel:
        skel.remove('.DS_Store')
    bboxes = []
    print(folder)
    for file in skel: ### iterate thru the skeleton - innermost loop
        cap.set(1,int(file.split('.')[0])-1)
        ret, image = cap.read()
        coords = pd.read_csv(os.path.join(path_skeletons, skeleton, file))

        bbox = np.mean(image[find_min_round(coords['y']):find_max_round(coords['y']),
           find_min_round(coords['x']):find_max_round(coords['x'])], dtype=np.float32, axis=2)
        if bbox.shape[0]==0 or bbox.shape[1] ==0:
            continue
        bboxes.append(bbox)

    bboxes = np.array(bboxes)
    for_hist = np.array([[np.mean(x)] for x in bboxes])
    for_test = np.array([np.mean(x) for x in bboxes])
    for_test= np.msort(for_test)
    #plt.hist(for_hist)
    unimodal = dip.diptst(for_test)[1]
    if unimodal < p:
        gm = GaussianMixture(n_components=2, random_state=0).fit(for_hist)
        labels = gm.predict(for_hist) #### 
        ## make 2 new folders 
        
        path_1 = os.path.join(path_skeletons, folder, sk +'_tr1')
        path_2 = os.path.join(path_skeletons, folder, sk +'_tr2')
        os.mkdir(path_1);os.mkdir(path_2)
        for label, coords in zip(range(len(labels)),range(len(skel))):
            if labels[label] == 0:
                Path(os.path.join(skeleton, skel[coords])).rename(os.path.join(path_1, skel[coords]))
            elif labels[label] == 1:
                Path(os.path.join(skeleton, skel[coords])).rename(os.path.join(path_2, skel[coords]))
        shutil.rmtree(skeleton) # delete the original folder after the files have been split
    
        
        
print('success')
    
    
    
        


In [None]:
print('a')

### Evaluation

In [None]:
evaluation_path = Path('/Users/andreibirladeanu/Documents/Data/experimenting/tracked/1053_meal/skel1_tr1')
files = [x for x in sorted_alphanumeric(os.listdir(evaluation_path)) if x[0]!="."]
cap = cv2.VideoCapture(os.path.join(path_videos,'1053_meal.mp4'))
new_viz_path =  Path('/Users/andreibirladeanu/Documents/Data/experimenting/visualize/1053_meal/skel1_tr1')
os.mkdir(new_viz_path)
histogram = []

for file in files:
    cap.set(1,int(file.split('.')[0])-1)
    ret, image = cap.read()
    coords = pd.read_csv(os.path.join(evaluation_path, file))
    cv2.imwrite(os.path.join(new_viz_path, file.split('.')[0]+'.png'), image[find_min_round(coords['y']):find_max_round(coords['y']),
           find_min_round(coords['x']):find_max_round(coords['x'])])
   # histogram.append(image[find_min_round(coords['y']):find_max_round(coords['y']),
    #       find_min_round(coords['x']):find_max_round(coords['x'])])
    

In [None]:
for_hist =  np.array([[np.mean(np.mean(x, axis=2))] for x in histogram])
for_dip =  np.array([np.mean(np.mean(x, axis=2)) for x in histogram])

In [None]:
plt.hist(for_hist)


In [None]:
evaluation_path = Path('/Users/andreibirladeanu/Documents/Data/experimenting/tracked/1117_meal/skel3_tr1')
files = [x for x in sorted_alphanumeric(os.listdir(evaluation_path)) if x[0]!="."]
cap = cv2.VideoCapture(os.path.join(path_videos,'1117_meal.mp4'))
new_viz_path =  Path('/Users/andreibirladeanu/Documents/Data/experimenting/visualize/1117_meal/skel3_tr1')
new_viz_path.mkdir(parents=True, exist_ok=True)
histogram = []

for file in files:
    cap.set(1,int(file.split('.')[0])-1)
    ret, image = cap.read()
    coords = pd.read_csv(os.path.join(evaluation_path, file))
    bbox = image[find_min_round(coords['y']):find_max_round(coords['y']),
           find_min_round(coords['x']):find_max_round(coords['x'])]
    if bbox.shape[0]==0 or bbox.shape[1] ==0:
            continue
    cv2.imwrite(os.path.join(new_viz_path, file.split('.')[0]+'.png'), image[find_min_round(coords['y']):find_max_round(coords['y']),
           find_min_round(coords['x']):find_max_round(coords['x'])])

In [None]:
histogram[-1]

In [None]:
filename = Path('/Users/andreibirladeanu/Documents/Data/experimenting/tracked/1122_meal/skel2/1206.csv')
data = pd.read_csv(filename)

In [None]:
find_min_round(data['y']), find_max_round(data['y'])

In [None]:
find_min_round(data['x']), find_max_round(data['x'])

In [None]:
frame_count(os.path.join(path_videos,'1097_meal.mp4'), manual=True)

In [None]:
cap.set(1,21350)
ret,image= cap.read()

In [None]:
plt.imshow(image)

## let's converge: cluster the folders into 2 

In [None]:
path_to_tracked = Path('/Users/andreibirladeanu/Documents/Data/experimenting/tracked/1117_meal')
skeletons = [x for x in sorted_alphanumeric(os.listdir(path_to_tracked)) if x[0]!="."]

In [None]:
all_torsos = []
for skeleton in skeletons:
    torso_sizes = []
    for coords in skeleton:
        data = pd.read_csv(os.path.join(path_to_tracked, skeleton, coords))
        torso = torso_size(data)
        if torso != 0:
            torso_sizes.append(torso)
    all_torsos.append(np.mean(torso_sizes))
    

In [None]:
neigh = NearestNeighbors(n_neighbors=1, algorithm = 'brute', metric='euclidean')

neigh.fit([[new_skeleton1[-1][0][1],  new_skeleton1[-1][1][1]]])
dist, nn = neigh.kneighbors([[skeletons[i]['skeleton1'][0][1],  skeletons[i]['skeleton1'][1][1]]], 
                                    return_distance = True)