# Marking the start and end of the exercise

In [1]:
import os
import sys
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf

from tensorflow import keras

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


## 1. Configurations

In [2]:
mark_before =  2
mark_after =  2
save_kinect_as_well = True

output_path = '../../datasets/marked_uncut/'

cut_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/kinect_good_preprocessed/"
uncut_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/kinect_good_preprocessed_not_cut/"
posetnet_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/posenet-uncut/"

## 2. Marking logic

In [3]:
def mark_start_and_stop( file_name ):
    print('Marking "{}"'.format(file_name), end='')
    
    try:
        # read diffrent version of the given file name
        cut_data = pd.read_csv(cut_dataset_path+'{}_kinect.csv'.format( file_name ))
        uncut_data = pd.read_csv(uncut_dataset_path+'{}_kinect.csv'.format( file_name ))
        posenet_data = pd.read_csv(posetnet_dataset_path+'{}.csv'.format( file_name ))

        # find the start and end frame number from cutted dataset
        start = cut_data.iloc[0]['FrameNo']
        end = cut_data.iloc[len(cut_data)-1]['FrameNo']

        # find the start and end frame number from uncutted dataset
        start_uncut = uncut_data.iloc[0]['FrameNo']
        end_uncut = uncut_data.iloc[len(uncut_data)-1]['FrameNo']

        # in some cases even uncuted version won't start from zero
        # to be sure we will go for the maximum/maximum :D
        start = np.maximum(start, start_uncut)
        end = np.minimum(end, end_uncut)

        print(' | start:{} - end:{}'.format(start,end), end='')

        # add a new column for the start/end classes
        posenet_data['status'] = None
        uncut_data['status'] = None

        # mark the coresponding frame and it's surrounds in posenet dataset
        posenet_data.loc[
            ((posenet_data['FrameNo'] >= start-mark_before) & 
            (posenet_data['FrameNo'] <= start+mark_after)),
            'status'
        ] = 'Start'

        posenet_data.loc[
            ((posenet_data['FrameNo'] >= end-mark_before) & 
            (posenet_data['FrameNo'] <= end+mark_after)),
            'status'
        ] = 'End'


        # mark the coresponding frame and it's surrounds in uncut kinect dataset
        uncut_data.loc[
            ((uncut_data['FrameNo'] >= start-mark_before) & 
            (uncut_data['FrameNo'] <= start+mark_after)),
            'status'
        ] = 'Start'

        uncut_data.loc[
            ((uncut_data['FrameNo'] >= end-mark_before) & 
            (uncut_data['FrameNo'] <= end+mark_after)),
            'status'
        ] = 'End'

        print(' Done!')
        
        return posenet_data, uncut_data
    
    except IOError as e:
        print(e)
        return None,None
    

## 3. Mark the datasets and save them

### 3.1 The A series (A1-A159)

In [4]:
for a in range(1,160):
    # marke start and end frames
    posenet,uncut_kinect = mark_start_and_stop( 'A{}'.format(a) )
    
    # jump to next file if current one is unavailable
    if(posenet is None):
        continue
    
    # save posenet dataset to a csv file
    posenet.to_csv(output_path+'A{}.csv'.format(a))
    
    if save_kinect_as_well:
        # save uncut kinect dataset to a csv file
        uncut_kinect.to_csv(output_path+'A{}_kinect.csv'.format(a))

Marking "A1" | start:68.0 - end:179.0 Done!
Marking "A2" | start:70.0 - end:223.0 Done!
Marking "A3" | start:55.0 - end:182.0 Done!
Marking "A4" | start:56.0 - end:219.0 Done!
Marking "A5" | start:55.0 - end:168.0 Done!
Marking "A6" | start:30.0 - end:172.0 Done!
Marking "A7" | start:46.0 - end:187.0 Done!
Marking "A8" | start:51.0 - end:194.0 Done!
Marking "A9" | start:41.0 - end:174.0 Done!
Marking "A10" | start:42.0 - end:202.0 Done!
Marking "A11" | start:24.0 - end:241.0 Done!
Marking "A12" | start:13.0 - end:179.0 Done!
Marking "A13" | start:14.0 - end:178.0 Done!
Marking "A14" | start:0.0 - end:162.0 Done!
Marking "A15" | start:12.0 - end:134.0 Done!
Marking "A16" | start:30.0 - end:150.0 Done!
Marking "A17" | start:21.0 - end:142.0 Done!
Marking "A18" | start:54.0 - end:167.0 Done!
Marking "A19" | start:29.0 - end:148.0 Done!
Marking "A20" | start:24.0 - end:127.0 Done!
Marking "A21" | start:24.0 - end:142.0 Done!
Marking "A22" | start:38.0 - end:135.0 Done!
Marking "A23" | star

### 3.2 The B series (B1-B22)

In [5]:
for b in range(1,23):
    # marke start and end frames
    posenet,uncut_kinect = mark_start_and_stop( 'B{}'.format(b) )
    
    # jump to next file if current one is unavailable
    if(posenet is None):
        continue
        
    # save posenet dataset to a csv file
    posenet.to_csv(output_path+'B{}.csv'.format(b))
    
    if save_kinect_as_well:
        # save uncut kinect dataset to a csv file
        uncut_kinect.to_csv(output_path+'B{}_kinect.csv'.format(b))

Marking "B1" | start:69.0 - end:121.0 Done!
Marking "B2" | start:29.0 - end:117.0 Done!
Marking "B3" | start:23.0 - end:109.0 Done!
Marking "B4" | start:0.0 - end:98.0 Done!
Marking "B5" | start:41.0 - end:139.0 Done!
Marking "B6" | start:157.0 - end:262.0 Done!
Marking "B7" | start:123.0 - end:218.0 Done!
Marking "B8" | start:108.0 - end:195.0 Done!
Marking "B9" | start:96.0 - end:183.0 Done!
Marking "B10" | start:108.0 - end:193.0 Done!
Marking "B11" | start:70.0 - end:190.0 Done!
Marking "B12" | start:106.0 - end:189.0 Done!
Marking "B13" | start:153.0 - end:239.0 Done!
Marking "B14" | start:113.0 - end:204.0 Done!
Marking "B15" | start:111.0 - end:203.0 Done!
Marking "B16" | start:124.0 - end:207.0 Done!
Marking "B17" | start:110.0 - end:207.0 Done!
Marking "B18" | start:108.0 - end:196.0 Done!
Marking "B19" | start:101.0 - end:195.0 Done!
Marking "B20" | start:98.0 - end:196.0 Done!
Marking "B21" | start:98.0 - end:200.0 Done!
Marking "B22" | start:90.0 - end:173.0 Done!


## 4. Measure Class Imbalance

In [6]:
dataset = None

for file in os.listdir(output_path):
    if file.endswith('.csv') and 'kinect' in file:
        data = pd.read_csv(output_path + file, index_col=0)
    
    if dataset is None:
        dataset = data
    else:
        dataset = pd.concat((dataset, data),ignore_index=True)

In [7]:
dataset['status'].unique()

array(['Start', nan, 'End'], dtype=object)

In [8]:
train_gc = dataset.groupby(['status']).size()
print(
    'Total: {}\n\nStart: {} ({:.2f}% of total)\nEnd: {} ({:.2f}% of total)\n'
    .format(
      len(dataset), 
      train_gc['Start'],
      train_gc['Start'] / len(dataset),
      train_gc['End'],
      train_gc['End'] / len(dataset)
    )
  )

Total: 74485

Start: 1707 (0.02% of total)
End: 1669 (0.02% of total)

