In [1]:
import random
import numpy as np
import pandas as pd

In [2]:
#################
# configuration #
#################

# number of individuals per gender
SAMPLE_SIZE = 2500

# number of steps from original distribution to corrected distribution
STEP_SIZE = 5

# number of repetitions per configuration
REPEAT = 100

# valid annotation values
valid_height = set([i for i in range(1,401)])
valid_frames = set([i for i in range(1,401)])

# bin size for height and speed
bin_size = 5

In [3]:
#############################
# csv loading and filtering #
#############################

# load annotations
df = pd.read_csv('OUMVLP_annotations.csv')

# discretize annotation values
df['height'] = (df['height']//bin_size)*bin_size
df['frames'] = (df['frames']//bin_size)*bin_size

# get subjects with valid annotations only
df = df.loc[df['height'].isin(valid_height) & df['frames'].isin(valid_frames) & (df['test'] == True)]

# reset table indices
df = df.reset_index(drop=True)

# get separate lists of males and females
df_male = df.loc[(df['gender'] == 'M')].drop(['gender', 'age', 'test'], axis=1)
df_female = df.loc[(df['gender'] == 'F')].drop(['gender', 'age', 'test'], axis=1)

print(len(df_male), len(df_female))

2397 2502


In [None]:
#############################
# create population samples #
#############################

for _p_height in range(0, STEP_SIZE+1):
    p_height = _p_height/STEP_SIZE
    for _p_frame in range(0, STEP_SIZE+1):
        p_frame = _p_frame/STEP_SIZE
        for _p_dress in range(0, STEP_SIZE+1):
            p_dress = _p_dress/STEP_SIZE

            for r in range(REPEAT):
                selectedM = []
                selectedF = []
                i=0
                while i < SAMPLE_SIZE:
                    q_height = random.random()
                    q_frame = random.random()
                    q_dress = random.random()
                    
                    if random.random() < 0.5:
                        sample = df_male.sample()
                        idM = sample.iloc[0]['ID']
                        #print('M', sample.iloc[0]['dress'], sample.iloc[0]['height'], sample.iloc[0]['frames'])
                        
                        _df = df_female
                        if q_dress < p_dress:
                            _df = _df.loc[_df['dress'] == False]
                        if q_height < p_height:
                            _df = _df.loc[_df['height'] == sample.iloc[0]['height']]
                        if q_frame < p_frame:
                            _df = _df.loc[_df['frames'] == sample.iloc[0]['frames']]
                        if len(_df) == 0:
                            continue
                        sample = _df.sample()
                        idF = sample.iloc[0]['ID']
                        #print('F', sample.iloc[0]['dress'], sample.iloc[0]['height'], sample.iloc[0]['frames'])
                    else:
                        _df = df_female
                        if q_dress < p_dress:
                            _df = _df.loc[_df['dress'] == False]
                        sample = _df.sample()
                        idF = sample.iloc[0]['ID']
                        #print('F', sample.iloc[0]['dress'], sample.iloc[0]['height'], sample.iloc[0]['frames'])

                        _df = df_male
                        if q_height < p_height:
                            _df = _df.loc[_df['height'] == sample.iloc[0]['height']]
                        if q_frame < p_frame:
                            _df = _df.loc[_df['frames'] == sample.iloc[0]['frames']]
                        if len(_df) == 0:
                            continue
                        sample = _df.sample()
                        idM = sample.iloc[0]['ID']
                        #print('M', sample.iloc[0]['dress'], sample.iloc[0]['height'], sample.iloc[0]['frames'])

                    selectedM.append(idM)
                    selectedF.append(idF)
                    i += 1

                with open('0_samples/{:03d}_{:03d}_{:03d}_{:03d}.txt'.format(_p_height*100//STEP_SIZE, _p_frame*100//STEP_SIZE, _p_dress*100//STEP_SIZE, r), 'w') as fp:
                    print(*sorted(selectedM), file=fp)
                    print(*sorted(selectedF), file=fp)
