# Marking the start and end of the exercise

In [1]:
import sys
import pandas as pd
import numpy as np
import os

## 1. Configurations

In [2]:
mark_before =  2
mark_after =  2
save_kinect_as_well = True

output_path = '../../datasets/new_posenet_marked_start_end/'

cut_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/kinect_good_preprocessed/"
uncut_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/kinect_fixed_not_cut_dup_fNo/"
posetnet_dataset_path = "https://raw.githubusercontent.com/digitacs/4dv652-ml/main/datasets/posenet-uncut/"

## 2. Marking logic

In [3]:
def mark_start_and_stop( file_name ):
    print('Marking "{}"'.format(file_name), end='')
    
    try:
        # read diffrent version of the given file name
        cut_data = pd.read_csv(cut_dataset_path+'{}_kinect.csv'.format( file_name ))
        uncut_data = pd.read_csv(uncut_dataset_path+'{}_kinect.csv'.format( file_name ))
        posenet_data = pd.read_csv(posetnet_dataset_path+'{}.csv'.format( file_name ))
        
        uncut_data['FrameNo'] = uncut_data['FrameNo'].astype(int)

        # find the start and end frame number from cutted dataset
        start = cut_data.iloc[0]['FrameNo']
        end = cut_data.iloc[len(cut_data)-1]['FrameNo']

        # find the start and end frame number from uncutted dataset
        start_uncut = uncut_data.iloc[0]['FrameNo']
        end_uncut = uncut_data.iloc[len(uncut_data)-1]['FrameNo']
        

        # in some cases even uncuted version won't start from zero
        # to be sure we will go for the maximum/maximum :D
        start = int(max(start, start_uncut))
        end = int(min(end, end_uncut))
        
        # Find the coresponding frame number in posenet dataset
        start_frame = max(uncut_data['Unnamed: 0'][uncut_data['FrameNo'] == start])
        stop_frame = min(uncut_data['Unnamed: 0'][uncut_data['FrameNo'] == end])

        print(' | start:{} - end:{}'.format(start_frame,stop_frame), end='')
                

        # add a new columns for the start/end classes
        posenet_data['start'] = None
        posenet_data['end'] = None

        # mark the coresponding frame       
        posenet_data.loc[:start_frame,'start'] = '1'
        posenet_data.loc[stop_frame:,'end'] = '1'

        print(' Done!')
        
        return posenet_data
    
    except IOError as e:
        print(e)
        return None
    

## 3. Mark the datasets and save them

### 3.1 The A series (A1-A159)

In [4]:
for a in range(1,160):
    # marke start and end frames
    posenet = mark_start_and_stop( 'A{}'.format(a) )
    
    # jump to next file if current one is unavailable
    if(posenet is None):
        continue
    
    # save posenet dataset to a csv file
    posenet.to_csv(output_path+'A{}.csv'.format(a),index=False)
    

Marking "A1" | start:137 - end:358 Done!
Marking "A2" | start:212 - end:711 Done!
Marking "A3" | start:223 - end:701 Done!
Marking "A4" | start:227 - end:829 Done!
Marking "A5" | start:223 - end:653 Done!
Marking "A6" | start:123 - end:667 Done!
Marking "A7" | start:187 - end:715 Done!
Marking "A8" | start:207 - end:741 Done!
Marking "A9" | start:167 - end:672 Done!
Marking "A10" | start:171 - end:788 Done!
Marking "A11" | start:74 - end:786 Done!
Marking "A12" | start:55 - end:696 Done!
Marking "A13" | start:59 - end:691 Done!
Marking "A14" | start:3 - end:639 Done!
Marking "A15" | start:51 - end:536 Done!
Marking "A16" | start:123 - end:572 Done!
Marking "A17" | start:87 - end:546 Done!
Marking "A18" | start:219 - end:652 Done!
Marking "A19" | start:119 - end:568 Done!
Marking "A20" | start:99 - end:497 Done!
Marking "A21" | start:74 - end:426 Done!
Marking "A22" | start:155 - end:514 Done!
Marking "A23" | start:139 - end:462 Done!
Marking "A24" | start:183 - end:575 Done!
Marking "A

### 3.2 The B series (B1-B22)

In [5]:
for b in range(1,23):
    # marke start and end frames
    posenet = mark_start_and_stop( 'B{}'.format(b) )
    
    # jump to next file if current one is unavailable
    if(posenet is None):
        continue
        
    # save posenet dataset to a csv file
    posenet.to_csv(output_path+'B{}.csv'.format(b),index=False)
    

Marking "B1" | start:209 - end:363 Done!
Marking "B2" | start:119 - end:456 Done!
Marking "B3" | start:95 - end:425 Done!
Marking "B4" | start:3 - end:383 Done!
Marking "B5" | start:167 - end:546 Done!
Marking "B6" | start:631 - end:998 Done!
Marking "B7" | start:495 - end:849 Done!
Marking "B8" | start:435 - end:756 Done!
Marking "B9" | start:387 - end:714 Done!
Marking "B10" | start:435 - end:752 Done!
Marking "B11" | start:212 - end:598 Done!
Marking "B12" | start:427 - end:725 Done!
Marking "B13" | start:599 - end:898 Done!
Marking "B14" | start:455 - end:788 Done!
Marking "B15" | start:447 - end:776 Done!
Marking "B16" | start:499 - end:804 Done!
Marking "B17" | start:443 - end:787 Done!
Marking "B18" | start:435 - end:758 Done!
Marking "B19" | start:407 - end:757 Done!
Marking "B20" | start:395 - end:761 Done!
Marking "B21" | start:395 - end:767 Done!
Marking "B22" | start:363 - end:676 Done!


## 4. Measure Class Imbalance

In [8]:
dataset = None

for file in os.listdir(output_path):
    if file.endswith('.csv'):
        data = pd.read_csv(output_path + file, index_col=0)
    
    if dataset is None:
        dataset = data
    else:
        dataset = pd.concat((dataset, data),ignore_index=True)

In [9]:
dataset['start'].unique()

array([ 1., nan])

In [10]:
dataset['end'].unique()

array([nan,  1.])

In [11]:
train_gc = dataset.groupby(['start']).size()
print(
    'Total: {}\n\nStart: {} ({:.2f}% of total)\n'
    .format(
      len(dataset), 
      train_gc[1],
      train_gc[1] / len(dataset),
    )
  )

Total: 142872

Start: 29333 (0.21% of total)



In [12]:
train_gc = dataset.groupby(['end']).size()
print(
    'Total: {}\n\nEnd: {} ({:.2f}% of total)\n'
    .format(
      len(dataset), 
      train_gc[1],
      train_gc[1] / len(dataset),
    )
  )

Total: 142872

End: 23455 (0.16% of total)

