In [1]:
import pandas as pd
import numpy as np

import os
from copy import deepcopy

In [2]:
METADATA = '/home/dangkhoadl/DL-Audio/data/metadata.csv'
WORKSPACE_DIR = '/home/dangkhoadl/DL-Audio'

In [3]:
data_df = pd.read_csv(METADATA)
data_df['wav_f_path'] = 'DATA/' + data_df['wav_f_path']

# Exp 1

In [4]:
# Prefilter
df = deepcopy(data_df)
df = df[df['Dummy Class'].isin([0,1,2,3])]
df = df[df['Hair Condition'].isin([0])]
df = df[df['Duration (s)'].isin([5])].reset_index(drop=True)
df['label'] = df['Dummy Class']

In [5]:
# Hair cond 1
all_rounds = [2,3,6,7,8,9]
N = len(all_rounds)

for idx, test_round in enumerate(all_rounds):
    train_round_s = [all_rounds[(idx+N-i)%N] for i in range(1,6) ]
    round_id = 'Haircondition1-' + str(test_round) + '-' + \
        '_'.join( [f'{all_rounds[(idx+N-i)%N]}' for i in range(1,6)] )

    df_train = df[df['Round Number'].isin(
        train_round_s)].reset_index(drop=True)
    df_test = df[df['Round Number'].isin(
        [test_round])].reset_index(drop=True)

    if not os.path.exists(f"data/exp-1/{round_id}"):
        os.makedirs(f"{WORKSPACE_DIR}/data/exp-1/{round_id}")
    df_train.to_csv(f"{WORKSPACE_DIR}/data/exp-1/{round_id}/train.csv", index=False)
    df_test.to_csv(f"{WORKSPACE_DIR}/data/exp-1/{round_id}/test.csv", index=False)

    print(f'{round_id}')
    print(f'\ttrain: {len(df_train)}')
    print(f'\ttest: {len(df_test)}')

Haircondition1-2-9_8_7_6_3
	train: 1574
	test: 302
Haircondition1-3-2_9_8_7_6
	train: 1518
	test: 358
Haircondition1-6-3_2_9_8_7
	train: 1581
	test: 295
Haircondition1-7-6_3_2_9_8
	train: 1545
	test: 331
Haircondition1-8-7_6_3_2_9
	train: 1561
	test: 315
Haircondition1-9-8_7_6_3_2
	train: 1601
	test: 275


# Exp 2

In [6]:
# Prefilter
df = deepcopy(data_df)
df = df[df['Dummy Class'].isin([0,1,2,3])]
df = df[df['Hair Condition'].isin([0, 1])]
df = df[df['Duration (s)'].isin([5])]
df['label'] = df['Hair Condition']

In [7]:
test_round_1 = [2,3,6,7,8,9]
test_round_2 = [12,13,16,17,18,19]
all_rounds = test_round_1 + test_round_2

N = len(test_round_1)
for idx, _ in enumerate(test_round_1):
    round_id = 'testrounds-' + str(test_round_1[idx]) + '-' + str(test_round_2[idx])
    df_test = df[df['Round Number']\
        .isin([test_round_1[idx], test_round_2[idx]])] \
        .reset_index(drop=True)
    df_test = df_test[df_test['Dummy Class'].isin([2,3])].reset_index(drop=True)

    df_train = df[df['Round Number'].isin(all_rounds)] \
        .reset_index(drop=True)
    df_train = df_train[~df_train['Round Number']\
        .isin([test_round_1[idx], test_round_2[idx]])] \
        .reset_index(drop=True)
    df_train = df_train[df_train['Dummy Class'].isin([0,1])].reset_index(drop=True)

    if not os.path.exists(f"data/exp-2/{round_id}"):
        os.makedirs(f"{WORKSPACE_DIR}/data/exp-2/{round_id}")
    df_train.to_csv(f"{WORKSPACE_DIR}/data/exp-2/{round_id}/train.csv", index=False)
    df_test.to_csv(f"{WORKSPACE_DIR}/data/exp-2/{round_id}/test.csv", index=False)

    print(f'{round_id}')
    print(f'\ttrain: {len(df_train)}')
    print(f'\ttest: {len(df_test)}')
    print(f"\t{sorted(df_train['Round Number'].unique())}")

testrounds-2-12
	train: 1683
	test: 293
	[3, 6, 7, 8, 9, 13, 16, 17, 18, 19]
testrounds-3-13
	train: 1648
	test: 322
	[2, 6, 7, 8, 9, 12, 16, 17, 18, 19]
testrounds-6-16
	train: 1632
	test: 289
	[2, 3, 7, 8, 9, 12, 13, 17, 18, 19]
testrounds-7-17
	train: 1666
	test: 354
	[2, 3, 6, 8, 9, 12, 13, 16, 18, 19]
testrounds-8-18
	train: 1627
	test: 362
	[2, 3, 6, 7, 9, 12, 13, 16, 17, 19]
testrounds-9-19
	train: 1634
	test: 333
	[2, 3, 6, 7, 8, 12, 13, 16, 17, 18]


In [8]:
sorted(data_df[data_df['Hair Condition'].isin([2])]['Round Number'].unique())

[22, 23, 24, 25, 26, 27, 28, 29, 30, 31]

# Exp 3

In [9]:
# Prefilter
df = deepcopy(data_df)
df = df[df['Dummy Class'].isin([0,1,2,3])]
df = df[df['Hair Condition'].isin([0, 1, 2])]
df = df[df['Duration (s)'].isin([5])]
df['label'] = df['Hair Condition']

In [10]:
test_round_1 = [2, 3, 6, 7, 8, 9]
test_round_2 = [12,13,16,17,18,19]
test_round_3 = [22,23,26,27,28,29]
all_rounds = test_round_1 + test_round_2 + test_round_3

In [11]:
N = len(test_round_1)
for idx, _ in enumerate(test_round_1):
    round_id = 'testrounds-' + str(test_round_1[idx]) + '-' + str(test_round_2[idx]) + '-' + str(test_round_3[idx])
    df_test = df[df['Round Number']\
        .isin([test_round_1[idx], test_round_2[idx], test_round_3[idx]])] \
        .reset_index(drop=True)
    df_test = df_test[df_test['Dummy Class'].isin([2,3])].reset_index(drop=True)

    df_train = df[df['Round Number'].isin(all_rounds)] \
        .reset_index(drop=True)
    df_train = df_train[~df_train['Round Number']\
        .isin([test_round_1[idx], test_round_2[idx], test_round_3[idx]])] \
        .reset_index(drop=True)
    df_train = df_train[df_train['Dummy Class'].isin([0,1])].reset_index(drop=True)

    if not os.path.exists(f"data/exp-3/{round_id}"):
        os.makedirs(f"{WORKSPACE_DIR}/data/exp-3/{round_id}")
    df_train.to_csv(f"{WORKSPACE_DIR}/data/exp-3/{round_id}/train.csv", index=False)
    df_test.to_csv(f"{WORKSPACE_DIR}/data/exp-3/{round_id}/test.csv", index=False)

    print(f'{round_id}')
    print(f'\ttrain: {len(df_train)}')
    print(f'\ttest: {len(df_test)}')
    print(f"\t{sorted(df_train['Round Number'].unique())}")

testrounds-2-12-22
	train: 2851
	test: 510
	[3, 6, 7, 8, 9, 13, 16, 17, 18, 19, 23, 26, 27, 28, 29]
testrounds-3-13-23
	train: 2837
	test: 532
	[2, 6, 7, 8, 9, 12, 16, 17, 18, 19, 22, 26, 27, 28, 29]
testrounds-6-16-26
	train: 2788
	test: 543
	[2, 3, 7, 8, 9, 12, 13, 17, 18, 19, 22, 23, 27, 28, 29]
testrounds-7-17-27
	train: 2811
	test: 621
	[2, 3, 6, 8, 9, 12, 13, 16, 18, 19, 22, 23, 26, 28, 29]
testrounds-8-18-28
	train: 2812
	test: 587
	[2, 3, 6, 7, 9, 12, 13, 16, 17, 19, 22, 23, 26, 27, 29]
testrounds-9-19-29
	train: 2806
	test: 593
	[2, 3, 6, 7, 8, 12, 13, 16, 17, 18, 22, 23, 26, 27, 28]
