# Brief Data Analysis

- Explore the class distribution for each binary problem
- Describe the characteristics of each behavior within the video

In [1]:
import pandas as pd
import os

In [2]:
labels_location = '../data/processed/ImageDatasetRGB/labels'
dfs = [pd.read_csv(os.path.join(labels_location, csv)) for csv in os.listdir(labels_location)]

In [3]:
behaviors = dfs[0].columns

for behavior in behaviors:
    total = 0
    pos = 0
    for df in dfs:
        total += len(df)
        pos += df[behavior].sum()

    print('{} samples:'.format(behavior.capitalize()))
    print('\tPositive samples: {} ({}%)'.format(pos, round(100*pos/total, 4)))
    print('\tTotal samples: {}'.format(total))

Grooming samples:
	Positive samples: 12077 (10.7638%)
	Total samples: 112200
Rearing_mig samples:
	Positive samples: 11881 (10.5891%)
	Total samples: 112200
Rearing_paret samples:
	Positive samples: 14759 (13.1542%)
	Total samples: 112200


In [4]:
groomings = []
mid_rearings = []
wall_rearings = []
for df in dfs:
    groom_duration = 0
    mid_rear_duration = 0
    wall_rear_duration = 0

    for i, row in df.iterrows():
        if row['grooming'] == 1:
            groom_duration += 1
        else:
            if groom_duration > 5: # append only if the duration > 5
                groomings.append(groom_duration)
            groom_duration = 0
        if row['rearing_mig'] == 1:
            mid_rear_duration += 1
        else:
            if mid_rear_duration > 5:
                mid_rearings.append(mid_rear_duration)
            mid_rear_duration = 0
        if row['rearing_paret'] == 1:
            wall_rear_duration += 1
        else:
            if wall_rear_duration > 5:
                wall_rearings.append(wall_rear_duration)
            wall_rear_duration = 0

In [5]:
import numpy as np

groomings = np.array(groomings)
mid_rearings = np.array(mid_rearings)
wall_rearings = np.array(wall_rearings)

print('Groomings Duration: {} frames'.format(groomings.mean()))
print('Mid Rearings Duration: {} frames'.format(mid_rearings.mean()))
print('Wall Rearings Duration: {} frames'.format(wall_rearings.mean()))

Groomings Duration: 106.95535714285714 frames
Mid Rearings Duration: 24.455301455301456 frames
Wall Rearings Duration: 21.288824383164005 frames
