In [5]:
""" Import script for IDMT-Traffic dataset
Ref:
    J. Abeßer, S. Gourishetti, A. Kátai, T. Clauß, P. Sharma, J. Liebetrau: IDMT-Traffic: An Open Benchmark
    Dataset for Acoustic Traffic Monitoring Research, EUSIPCO, 2021
"""

import os
import glob
import pandas as pd

__author__ = 'Jakob Abeßer (jakob.abesser@idmt.fraunhofer.de)'


def import_idmt_traffic_dataset(fn_txt: str = "idmt_traffic_all") -> pd.DataFrame:
    """ Import IDMT-Traffic dataset
    Args:
        fn_txt (str): Text file with all WAV files
    Returns:
        df_dataset (pd.Dataframe): File-wise metadata
            Columns:
                'file': WAV filename,
                'is_background': True if recording contains background noise (no vehicle), False else
                'date_time': Recording time (YYYY-MM-DD-HH-mm)
                'location': Recording location
                'speed_kmh': Speed limit at recording site (km/h), UNK if unknown,
                'sample_pos': Sample position (centered) within the original audio recording,
                'daytime': M(orning) or (A)fternoon,
                'weather': (D)ry or (W)et road condition,
                'vehicle': (B)us, (C)ar, (M)otorcycle, or (T)ruck,
                'source_direction': Source direction of passing vehicle: from (L)eft or from (R)ight,
                'microphone': (SE)= (high-quality) sE8 microphones, (ME) = (low-quality) MEMS microphones (ICS-43434),
                'channel': Original stereo pair channel (12) or (34)
    """
    # load file list
    df_files = pd.read_csv(fn_txt, names=('file',))
    fn_file_list = df_files['file'].to_list()

    # load metadata from file names
    df_dataset = []

    for f, fn in enumerate(fn_file_list):
        fn = fn.replace('.wav', '')
        parts = fn.split('_')

        # background noise files
        if '-BG' in fn:
            date_time, location, speed_kmh, sample_pos, mic, channel = parts
            vehicle, source_direction, weather, daytime = 'None', 'None', 'None', 'None'
            is_background = True

        # files with vehicle passings
        else:
            date_time, location, speed_kmh, sample_pos, daytime, weather, vehicle_direction, mic, channel = parts
            vehicle, source_direction = vehicle_direction
            is_background = False

        channel = channel.replace('-BG', '')
        speed_kmh = speed_kmh.replace('unknownKmh', 'UNK')
        speed_kmh = speed_kmh.replace('Kmh', '')

        df_dataset.append({'file': fn,
                           'is_background': is_background,
                           'date_time': date_time,
                           'location': location,
                           'speed_kmh': speed_kmh,
                           'sample_pos': sample_pos,
                           'daytime': daytime,
                           'weather': weather,
                           'vehicle': vehicle,
                           'source_direction': source_direction,
                           'microphone': mic,
                           'channel': channel})

    df_dataset = pd.DataFrame(df_dataset, columns=('file', 'is_background', 'date_time', 'location', 'speed_kmh', 'sample_pos', 'daytime', 'weather', 'vehicle',
                                                   'source_direction', 'microphone', 'channel'))

    return df_dataset


if __name__ == '__main__':

    # example use
    fn_txt_list = ["idmt_traffic_all.txt",    # complete IDMT-Traffic dataset
                   "eusipco_2021_train.txt",  # training set of EUSIPCO 2021 paper
                   "eusipco_2021_test.txt"]   # test set of EUSIPCO 2021 paper

    # import metadata
    for fn_txt in fn_txt_list:
        print('Metadata for {}:'.format(fn_txt))
        print(import_idmt_traffic_dataset(fn_txt))

# Save df_dataset to XLSX
import_idmt_traffic_dataset("idmt_traffic_all.txt").to_excel('df_dataset.xlsx', index=True)

Metadata for idmt_traffic_all.txt:
                                                    file  is_background  \
0      2019-10-22-08-40_Fraunhofer-IDMT_30Kmh_1007744...           True   
1      2019-10-22-08-40_Fraunhofer-IDMT_30Kmh_1007744...           True   
2      2019-10-22-08-40_Fraunhofer-IDMT_30Kmh_1017344...           True   
3      2019-10-22-08-40_Fraunhofer-IDMT_30Kmh_1017344...           True   
4      2019-10-22-08-40_Fraunhofer-IDMT_30Kmh_1026944...           True   
...                                                  ...            ...   
17501  2020-08-29-16-07_Hohenwarte_unknownKmh_728153_...          False   
17502  2020-08-29-16-07_Hohenwarte_unknownKmh_728740_...          False   
17503  2020-08-29-16-07_Hohenwarte_unknownKmh_728740_...          False   
17504  2020-08-29-16-07_Hohenwarte_unknownKmh_730760_...          False   
17505  2020-08-29-16-07_Hohenwarte_unknownKmh_730760_...          False   

              date_time         location speed_kmh sample_pos da