In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
def merge_csv_files(directory_path):
    """
    Merges all CSV files in a directory into a single pandas DataFrame.
    
    Args:
    directory_path (str): The directory path containing the CSV files.
    
    Returns:
    merged_df (pandas.DataFrame): The merged pandas DataFrame of all CSV files in the directory.
    """
    dfs = []

    # loop through each file in the directory
    for file in os.listdir(directory_path):
        # check if the file is a CSV file
        if file.endswith('.csv'):
            file_path = os.path.join(directory_path, file)
            if directory_path.endswith("dataverse_file"):
                df = pd.read_csv(file_path, delimiter=",")
            else:
                df = pd.read_csv(file_path, delimiter=";")
            dfs.append(df)

    # concatenate all dataframes 
    merged_df = pd.concat(dfs, ignore_index=True)

    return merged_df

In [4]:
# Noise data

folder_path = '/content/drive/MyDrive/MDA_KUL_personal'

file40 = merge_csv_files(folder_path + '/export_40')
file41 = merge_csv_files(folder_path + '/export_41')
file42 = merge_csv_files(folder_path + '/export_42')

# Noise percentile

In [6]:
file40.shape

(50320, 27)

In [8]:
file40.head(10)

Unnamed: 0,#object_id,description,result_timestamp,laf005_per_hour,laf005_per_hour_unit,laf01_per_hour,laf01_per_hour_unit,laf05_per_hour,laf05_per_hour_unit,laf10_per_hour,...,laf90_per_hour,laf90_per_hour_unit,laf95_per_hour,laf95_per_hour_unit,laf98_per_hour,laf98_per_hour_unit,laf99_per_hour,laf99_per_hour_unit,laf995_per_hour,laf995_per_hour_unit
0,255444,MP 07: Naamsestraat 81,17/02/2022 17:00:00.000,72.5,dB(A),70.4,dB(A),65.1,dB(A),63.0,...,45.5,dB(A),43.6,dB(A),42.0,dB(A),41.2,dB(A),40.6,dB(A)
1,255444,MP 07: Naamsestraat 81,17/02/2022 18:00:00.000,73.3,dB(A),71.9,dB(A),66.0,dB(A),63.5,...,46.0,dB(A),44.6,dB(A),43.1,dB(A),42.2,dB(A),41.4,dB(A)
2,255444,MP 07: Naamsestraat 81,17/02/2022 19:00:00.000,74.9,dB(A),73.2,dB(A),67.8,dB(A),65.0,...,46.2,dB(A),44.3,dB(A),42.4,dB(A),41.5,dB(A),40.9,dB(A)
3,255444,MP 07: Naamsestraat 81,17/02/2022 20:00:00.000,73.8,dB(A),72.9,dB(A),69.5,dB(A),66.8,...,46.7,dB(A),45.5,dB(A),44.3,dB(A),43.5,dB(A),42.9,dB(A)
4,255444,MP 07: Naamsestraat 81,17/02/2022 21:00:00.000,72.4,dB(A),71.3,dB(A),67.1,dB(A),64.7,...,46.6,dB(A),44.6,dB(A),43.0,dB(A),42.1,dB(A),41.5,dB(A)
5,255444,MP 07: Naamsestraat 81,17/02/2022 22:00:00.000,73.7,dB(A),71.7,dB(A),66.0,dB(A),62.8,...,43.6,dB(A),42.3,dB(A),41.1,dB(A),40.4,dB(A),39.8,dB(A)
6,255444,MP 07: Naamsestraat 81,17/02/2022 23:00:00.000,71.1,dB(A),69.6,dB(A),65.4,dB(A),62.4,...,44.2,dB(A),43.0,dB(A),41.5,dB(A),40.3,dB(A),39.0,dB(A)
7,255444,MP 07: Naamsestraat 81,18/02/2022 00:00:00.000,79.5,dB(A),75.7,dB(A),67.6,dB(A),64.1,...,41.1,dB(A),38.8,dB(A),36.7,dB(A),35.7,dB(A),35.1,dB(A)
8,255444,MP 07: Naamsestraat 81,18/02/2022 01:00:00.000,70.9,dB(A),68.8,dB(A),62.1,dB(A),58.3,...,36.7,dB(A),35.4,dB(A),34.5,dB(A),34.0,dB(A),33.8,dB(A)
9,255444,MP 07: Naamsestraat 81,18/02/2022 02:00:00.000,68.3,dB(A),65.9,dB(A),58.7,dB(A),54.8,...,35.2,dB(A),34.2,dB(A),33.6,dB(A),33.3,dB(A),33.1,dB(A)


In [13]:
## change name object

file40.rename(columns={'#object_id': 'object_id'}, inplace =True)

In [14]:
file40.columns

Index(['object_id', 'description', 'result_timestamp', 'laf005_per_hour',
       'laf005_per_hour_unit', 'laf01_per_hour', 'laf01_per_hour_unit',
       'laf05_per_hour', 'laf05_per_hour_unit', 'laf10_per_hour',
       'laf10_per_hour_unit', 'laf25_per_hour', 'laf25_per_hour_unit',
       'laf50_per_hour', 'laf50_per_hour_unit', 'laf75_per_hour',
       'laf75_per_hour_unit', 'laf90_per_hour', 'laf90_per_hour_unit',
       'laf95_per_hour', 'laf95_per_hour_unit', 'laf98_per_hour',
       'laf98_per_hour_unit', 'laf99_per_hour', 'laf99_per_hour_unit',
       'laf995_per_hour', 'laf995_per_hour_unit'],
      dtype='object')

In [16]:
np.unique(file40.object_id)

array([255439, 255440, 255441, 255442, 255443, 255444, 280324, 303910],
      dtype=object)

In [17]:
np.unique(file40.description)

array(['MP 01: Naamsestraat 35  Maxim', 'MP 02: Naamsestraat 57 Xior',
       'MP 03: Naamsestraat 62 Taste', 'MP 04: His & Hears',
       'MP 05: Calvariekapel KU Leuven',
       'MP 06: Parkstraat 2 La Filosovia', 'MP 07: Naamsestraat 81',
       'MP08bis - Vrijthof'], dtype=object)

In [18]:
np.unique(file40.laf005_per_hour_unit)

array(['dB(A)'], dtype=object)

In [19]:
char = "_unit"
unit_col = []
for column in file40.columns:
  unit_col += [char in column]

In [23]:
np.unique(file40.loc[:,unit_col])
## => all unit is dB(A)

array(['dB(A)'], dtype=object)

# Noise event

In [24]:
file41.head(10)

Unnamed: 0,#object_id,description,result_timestamp,noise_event_laeq_model_id,noise_event_laeq_model_id_unit,noise_event_laeq_primary_detected_certainty,noise_event_laeq_primary_detected_certainty_unit,noise_event_laeq_primary_detected_class,noise_event_laeq_primary_detected_class_unit
0,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 08:27:21.737,,,,%,,
1,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 13:58:21.356,,,,%,,
2,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 16:43:15.393,,,,%,,
3,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 19:22:48.428,,,,%,,
4,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 20:32:20.440,,,,%,,
5,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 22:17:57.452,,,,%,,
6,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 22:49:34.456,,,,%,,
7,255439,MP 01: Naamsestraat 35 Maxim,28/02/2022 23:40:57.461,,,,%,,
8,255439,MP 01: Naamsestraat 35 Maxim,01/03/2022 00:07:59.463,,,,%,,
9,255439,MP 01: Naamsestraat 35 Maxim,01/03/2022 01:24:17.470,,,,%,,


In [26]:
file41.shape

(81056, 9)

In [28]:
file41.isna().mean()

#object_id                                          0.000000
description                                         0.000000
result_timestamp                                    0.000000
noise_event_laeq_model_id                           0.047153
noise_event_laeq_model_id_unit                      1.000000
noise_event_laeq_primary_detected_certainty         0.047153
noise_event_laeq_primary_detected_certainty_unit    0.000000
noise_event_laeq_primary_detected_class             0.047153
noise_event_laeq_primary_detected_class_unit        1.000000
dtype: float64

In [30]:
file41.loc[~file41.noise_event_laeq_model_id.isnull(), :]

Unnamed: 0,#object_id,description,result_timestamp,noise_event_laeq_model_id,noise_event_laeq_model_id_unit,noise_event_laeq_primary_detected_certainty,noise_event_laeq_primary_detected_certainty_unit,noise_event_laeq_primary_detected_class,noise_event_laeq_primary_detected_class_unit
228,255439,MP 01: Naamsestraat 35 Maxim,07/03/2022 16:46:49.515,13.0,,86.0,%,Transport road - Passenger car,
229,255439,MP 01: Naamsestraat 35 Maxim,07/03/2022 19:04:05.530,13.0,,83.0,%,Transport road - Siren,
230,255439,MP 01: Naamsestraat 35 Maxim,07/03/2022 19:48:06.536,13.0,,87.0,%,Transport road - Passenger car,
231,255439,MP 01: Naamsestraat 35 Maxim,07/03/2022 20:04:21.538,13.0,,89.0,%,Human voice - Shouting,
232,255439,MP 01: Naamsestraat 35 Maxim,07/03/2022 22:22:34.553,13.0,,99.0,%,Transport road - Passenger car,
...,...,...,...,...,...,...,...,...,...
81051,255442,MP 05: Calvariekapel KU Leuven,31/12/2022 23:46:06.401,16.0,,99.0,%,Human voice - Shouting,
81052,255442,MP 05: Calvariekapel KU Leuven,31/12/2022 23:46:13.402,16.0,,0.0,%,Unsupported,
81053,255442,MP 05: Calvariekapel KU Leuven,31/12/2022 23:46:23.402,16.0,,0.0,%,Unsupported,
81054,255442,MP 05: Calvariekapel KU Leuven,31/12/2022 23:47:05.402,16.0,,0.0,%,Unsupported,


In [31]:
np.min(file41.noise_event_laeq_primary_detected_certainty)

0.0

In [32]:
np.unique(file41.noise_event_laeq_primary_detected_certainty)

array([  0.,  12.,  15.,  18.,  19.,  21.,  22.,  23.,  24.,  25.,  26.,
        27.,  28.,  29.,  30.,  31.,  32.,  33.,  34.,  35.,  36.,  37.,
        38.,  39.,  40.,  41.,  42.,  43.,  44.,  45.,  46.,  47.,  48.,
        49.,  50.,  51.,  52.,  53.,  54.,  55.,  56.,  57.,  58.,  59.,
        60.,  61.,  62.,  63.,  64.,  65.,  66.,  67.,  68.,  69.,  70.,
        71.,  72.,  73.,  74.,  75.,  76.,  77.,  78.,  79.,  80.,  81.,
        82.,  83.,  84.,  85.,  86.,  87.,  88.,  89.,  90.,  91.,  92.,
        93.,  94.,  95.,  96.,  97.,  98.,  99., 100.,  nan])

In [34]:
np.unique(file41.noise_event_laeq_model_id, return_counts=True)

(array([ 3.,  8.,  9., 12., 13., 16., nan]),
 array([  516,   493,  1648,  6442,  3849, 64286,  3822]))