In [1]:
import cv2
from pathlib import Path
import os
import pandas as pd
import numpy as np

pd.set_option('display.max_rows',None)


In [2]:
df = pd.DataFrame(columns=['a', 'b'])
for i, j in zip(range(10), range(10)):
    x = [i, j]
    df.loc[len(df.index)] = x
print(df)

   a  b
0  0  0
1  1  1
2  2  2
3  3  3
4  4  4
5  5  5
6  6  6
7  7  7
8  8  8
9  9  9


In [3]:
df.loc[0]

a    0
b    0
Name: 0, dtype: int64

In [4]:
df.iloc[0]

a    0
b    0
Name: 0, dtype: int64

In [6]:
df['a']

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
Name: a, dtype: int64

In [8]:
df.loc[df['a'] == 2]

Unnamed: 0,a,b
2,2,2


In [9]:
df.where('a' == 2)

ValueError: Array conditional must be same shape as self

In [8]:
p_img_file = Path('../src/data/explorationroom_largest.val')
p_pose_file = Path('../src/data/explorationroom_largest-pose.val')

df = pd.DataFrame({'file_position':pd.Series(dtype='int'), 'monkey':pd.Series(dtype='str'), 'recording':pd.Series(dtype='str'), 'camera':pd.Series(dtype='str'), 'frame':pd.Series(dtype='int'), 'pose':pd.Series(dtype='int')})

with p_img_file.open('r') as img_f:
    with p_pose_file.open('r') as pose_f:
        for i, (img_path, pose) in enumerate(zip([x.rstrip() for x in img_f.readlines()], [x.rstrip() for x in pose_f.readlines()])):
            # print(i, img, pose)
            p_img = Path(img_path)
            img_n = p_img.stem
            # following does not work! border case: recordings including "M1-2" in name. M1-2 is appended to recording name by "_", in that case it's part of the name...
            # vals = img_n.split(sep='_')
            vals = [i, img_n[0:3], img_n[4:img_n.find('cam')-1], *img_n.split(sep='_')[-2:], pose]
            # contains: [name, recording, camera, frame]
            df.loc[len(df.index)] = vals
            
# print(df)
df.camera.unique()
df['camera'].unique()

array(['cam19415032', 'cam19415037', 'cam21013369', 'cam19415034',
       'cam19415039'], dtype=object)

In [34]:
df.sort_values(by=['monkey', 'recording', 'camera', 'pose'], inplace=True)
df

Unnamed: 0,monkey,recording,camera,frame,pose
127,hum,20092021_M1-1,cam19415032,9730,0
244,hum,20092021_M1-1,cam19415032,42175,0
897,hum,20092021_M1-1,cam19415032,8225,0
1036,hum,20092021_M1-1,cam19415032,64820,0
1622,hum,20092021_M1-1,cam19415032,19425,0
...,...,...,...,...,...
1859,luk,27082021_PRR-1,cam21013369,38535,3
292,luk,27082021_PRR-1,cam21013369,45395,4
698,luk,27082021_PRR-1,cam21013369,385,4
946,luk,27082021_PRR-1,cam21013369,45850,4


In [36]:
df.groupby(by=['monkey', 'recording', 'camera', 'pose']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,frame
monkey,recording,camera,pose,Unnamed: 4_level_1
hum,20092021_M1-1,cam19415032,0,8
hum,20092021_M1-1,cam19415032,1,8
hum,20092021_M1-1,cam19415032,2,4
hum,20092021_M1-1,cam19415032,3,6
hum,20092021_M1-1,cam19415032,4,4
...,...,...,...,...
luk,27082021_PRR-1,cam21013369,0,4
luk,27082021_PRR-1,cam21013369,1,16
luk,27082021_PRR-1,cam21013369,2,1
luk,27082021_PRR-1,cam21013369,3,5


In [45]:
print(df.groupby(by=['monkey', 'camera', 'pose'])['frame'].count())

monkey  camera       pose
hum     cam19415032  0        32
                     1        63
                     2        15
                     3        33
                     4        37
        cam19415034  0        32
                     1        55
                     2         9
                     3        31
                     4        53
        cam19415037  0        22
                     1        66
                     2        11
                     3        33
                     4        48
        cam19415039  0        22
                     1        50
                     2         8
                     3        43
                     4        57
        cam21013369  0        35
                     1        67
                     2        11
                     3        35
                     4        32
ken     cam19415032  0        56
                     1        76
                     2         5
                     3         9
                 

In [15]:
df.groupby(by=['monkey', 'recording']).size()

monkey  recording             
hum     20092021_M1-1             150
        21092021_M1-1             150
        21092021_M1-2             150
        22092021_PMd-1_1          150
        22092021_PMd-1_2          150
        23092021_SMA-2            150
ken     01112021-1                150
        01112021-2                150
        02112021-1                150
        02112021-2                150
        03112021-2                143
        03112021-3                150
luk     24082021_M1-2             120
        24082021_PMd-2            120
        24082021_PRR-1            120
        25082021_M1-2             120
        25082021_M1-2_standing    120
        26082021_M1-2_1           120
        26082021_M1-2_2           120
        27082021_M1-1             120
        27082021_PMd-2            120
        27082021_PRR-1            120
dtype: int64

In [16]:
df.groupby(by=['monkey', 'recording', 'camera']).size()

monkey  recording               camera     
hum     20092021_M1-1           cam19415032    30
                                cam19415034    30
                                cam19415037    30
                                cam19415039    30
                                cam21013369    30
        21092021_M1-1           cam19415032    30
                                cam19415034    30
                                cam19415037    30
                                cam19415039    30
                                cam21013369    30
        21092021_M1-2           cam19415032    30
                                cam19415034    30
                                cam19415037    30
                                cam19415039    30
                                cam21013369    30
        22092021_PMd-1_1        cam19415032    30
                                cam19415034    30
                                cam19415037    30
                                cam19415039    30
      

In [9]:
df.groupby(by=['monkey', 'camera']).size()

monkey  camera     
hum     cam19415032    180
        cam19415034    180
        cam19415037    180
        cam19415039    180
        cam21013369    180
ken     cam19415032    179
        cam19415034    179
        cam19415037    179
        cam19415039    178
        cam21013369    178
luk     cam19415032    300
        cam19415034    300
        cam19415039    300
        cam21013369    300
dtype: int64

In [46]:
df.groupby(by=['monkey', 'camera', 'pose']).size()

monkey  camera       pose
hum     cam19415032  0        32
                     1        63
                     2        15
                     3        33
                     4        37
        cam19415034  0        32
                     1        55
                     2         9
                     3        31
                     4        53
        cam19415037  0        22
                     1        66
                     2        11
                     3        33
                     4        48
        cam19415039  0        22
                     1        50
                     2         8
                     3        43
                     4        57
        cam21013369  0        35
                     1        67
                     2        11
                     3        35
                     4        32
ken     cam19415032  0        56
                     1        76
                     2         5
                     3         9
                 

In [54]:
sampled_val = df.groupby(by=['monkey', 'camera', 'pose']).sample(n=5)
sampled_val_np = sampled_val['file_position'].values

In [59]:
p_img_file = Path('../src/data/explorationroom_largest.val')
p_pose_file = Path('../src/data/explorationroom_largest-pose.val')

p_img_file_sampled = Path('../src/data/explorationroom_largest_sampled.val')
p_pose_file_sampled = Path('../src/data/explorationroom_largest_sampled-pose.val')

img_f = p_img_file.open('r')
pose_f = p_pose_file.open('r')

imgs = img_f.readlines()
poses = pose_f.readlines()


p_img_file_sampled.touch()
p_pose_file_sampled.touch()
img_f_out = p_img_file_sampled.open('w')
pose_f_out = p_pose_file_sampled.open('w')

for x in sampled_val_np:
    img_f_out.write(imgs[x])
    pose_f_out.write(poses[x])
