# Create XACLE metadata for EVAR

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path

XACLE_DATA = Path('../../datasets/XACLE_dataset')

## Training & Validation

In [3]:
def readone(filepath, split, folder_name=None):
    folder_name = split if folder_name is None else folder_name
    df = pd.read_csv(filepath)
    df['file_name'] = f'{folder_name}/' + df['wav_file_name']
    df['caption'] = df['text']
    df['score'] = df['average_score']
    df['label'] = df.score.apply(lambda x: np.round(x))
    df = df[['file_name', 'caption', 'score', 'label']]
    df['split'] = split
    return df

df = pd.concat([
    readone(XACLE_DATA/'meta_data/train_average.csv', 'train'),
    readone(XACLE_DATA/'meta_data/validation_average.csv', 'valid', 'validation'),
    readone(XACLE_DATA/'meta_data/validation_average.csv', 'test', 'validation'),
    ])
df.to_csv('evar/evar/metadata/xacle.csv', index=False)
df

Unnamed: 0,file_name,caption,score,label,split
0,train/00000.wav,A water vehicle travels through the water with...,8.00,8.0,train
1,train/00001.wav,A motorcycle drives by,7.50,8.0,train
2,train/00002.wav,Some liquid flows followed by something sink,7.50,8.0,train
3,train/00003.wav,Rain falling with distant thunder roaring,9.25,9.0,train
4,train/00004.wav,The propellers of a helicopter scream as someo...,2.00,2.0,train
...,...,...,...,...,...
2995,validation/10495.wav,Distant murmuring and distant engine revving,4.00,4.0,test
2996,validation/10496.wav,A power tool is turned on and off several times,8.50,8.0,test
2997,validation/10497.wav,Pigeons make sounds and flapping noises,3.75,4.0,test
2998,validation/10498.wav,Several varying hisses,9.00,9.0,test


## Test

Duplicating test data as dummy train.

In [4]:
def readone(filepath, split, folder_name=None):
    folder_name = split if folder_name is None else folder_name
    df = pd.read_csv(filepath)
    df['file_name'] = f'{folder_name}/' + df['wav_file_name']
    df['caption'] = df['text']
    df['score'] = 0.0
    df['label'] = 0
    df = df[['file_name', 'caption', 'score', 'label']]
    df['split'] = split
    return df

df = pd.concat([
    readone(XACLE_DATA/'meta_data/test.csv', 'train'),  # dummy 
    readone(XACLE_DATA/'meta_data/test.csv', 'valid'),  # dummy
    readone(XACLE_DATA/'meta_data/test.csv', 'test'),
    ])
df.to_csv('evar/evar/metadata/xacle_test.csv', index=False)
df

Unnamed: 0,file_name,caption,score,label,split
0,train/10500.wav,The sound of water slowly trickling into a poo...,0.0,0,train
1,train/10501.wav,A male speaking urgently through a loud and vi...,0.0,0,train
2,train/10502.wav,So we just dropped into the start section,0.0,0,train
3,train/10503.wav,"A sound of heavy breathing is heard, it may be...",0.0,0,train
4,train/10504.wav,"A station announcement in the background, foll...",0.0,0,train
...,...,...,...,...,...
2995,test/13495.wav,A horse moves away and then comes forward whil...,0.0,0,test
2996,test/13496.wav,A woman sneezes quietly three times in succession,0.0,0,test
2997,test/13497.wav,People talk quietly until a sneeze at the end,0.0,0,test
2998,test/13498.wav,Muffled sound of audience cheering and then ap...,0.0,0,test
