In [None]:
from google.colab import drive
drive.mount('/content/drive')



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import datetime

In [None]:
def merge_csv_files(directory_path):
    """
    Merges all CSV files in a directory into a single pandas DataFrame.
    
    Args:
    directory_path (str): The directory path containing the CSV files.
    
    Returns:
    merged_df (pandas.DataFrame): The merged pandas DataFrame of all CSV files in the directory.
    """
    dfs = []

    # loop through each file in the directory
    for file in os.listdir(directory_path):
        # check if the file is a CSV file
        if file.endswith('.csv'):
            file_path = os.path.join(directory_path, file)
            if directory_path.endswith("dataverse_file"):
                df = pd.read_csv(file_path, delimiter=",")
            else:
                df = pd.read_csv(file_path, delimiter=";")
            dfs.append(df)

    # concatenate all dataframes 
    merged_df = pd.concat(dfs, ignore_index=True)

    return merged_df

In [None]:
# Noise data

folder_path = '/content/drive/MyDrive/MDA_TIEN'

file40 = merge_csv_files(folder_path + '/export_40')
file41 = merge_csv_files(folder_path + '/export_41')
file42 = merge_csv_files(folder_path + '/export_42')


## file 41 - noise event

In [None]:
file41.head(10)

Unnamed: 0,#object_id,description,result_timestamp,noise_event_laeq_model_id,noise_event_laeq_model_id_unit,noise_event_laeq_primary_detected_certainty,noise_event_laeq_primary_detected_certainty_unit,noise_event_laeq_primary_detected_class,noise_event_laeq_primary_detected_class_unit
0,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 03:52:12.714,,,,%,,
1,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 05:07:17.712,,,,%,,
2,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 05:07:34.711,,,,%,,
3,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 08:28:22.694,,,,%,,
4,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 08:38:59.695,,,,%,,
5,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 10:25:58.687,,,,%,,
6,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 11:17:29.681,,,,%,,
7,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 11:51:50.679,,,,%,,
8,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 12:27:57.676,,,,%,,
9,255440,MP 02: Naamsestraat 57 Xior,01/03/2022 14:45:54.669,,,,%,,


In [None]:
file41.shape

(81056, 9)

In [None]:
# check missing values
file41.isna().mean()

#object_id                                          0.000000
description                                         0.000000
result_timestamp                                    0.000000
noise_event_laeq_model_id                           0.047153
noise_event_laeq_model_id_unit                      1.000000
noise_event_laeq_primary_detected_certainty         0.047153
noise_event_laeq_primary_detected_certainty_unit    0.000000
noise_event_laeq_primary_detected_class             0.047153
noise_event_laeq_primary_detected_class_unit        1.000000
dtype: float64

In [None]:
file41.loc[~file41["noise_event_laeq_model_id"].isnull()].head(10)

Unnamed: 0,#object_id,description,result_timestamp,noise_event_laeq_model_id,noise_event_laeq_model_id_unit,noise_event_laeq_primary_detected_certainty,noise_event_laeq_primary_detected_certainty_unit,noise_event_laeq_primary_detected_class,noise_event_laeq_primary_detected_class_unit
94,255440,MP 02: Naamsestraat 57 Xior,07/03/2022 22:22:41.166,13.0,,75.0,%,Transport road - Passenger car,
95,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 01:25:51.161,13.0,,75.0,%,Transport road - Siren,
96,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 07:20:38.135,13.0,,98.0,%,Transport road - Passenger car,
97,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 08:37:44.126,13.0,,81.0,%,Transport road - Passenger car,
98,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 08:48:07.124,13.0,,87.0,%,Transport road - Passenger car,
99,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 09:07:24.122,13.0,,66.0,%,Transport road - Siren,
100,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 10:07:33.120,13.0,,92.0,%,Transport road - Passenger car,
101,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 10:22:33.120,13.0,,87.0,%,Transport road - Passenger car,
102,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 11:59:55.114,13.0,,48.0,%,Transport road - Passenger car,
103,255440,MP 02: Naamsestraat 57 Xior,08/03/2022 14:24:40.110,13.0,,47.0,%,Transport road - Siren,


In [None]:
# drop unncessary cols
cols_to_drop = ["noise_event_laeq_model_id_unit", "noise_event_laeq_model_id", 
                "noise_event_laeq_primary_detected_certainty_unit", 
                "noise_event_laeq_primary_detected_class_unit"]

file41.drop(cols_to_drop, axis=1, inplace=True)

In [None]:
# rename cols 
file41.columns = ['object_id', 'location', 'result_timestamp', "noise_event_certainty", "noise_event"]
file41.tail(5)

Unnamed: 0,object_id,location,result_timestamp,noise_event_certainty,noise_event
81051,280324,MP08bis - Vrijthof,29/12/2022 09:08:11.171,99.0,Human voice - Shouting
81052,280324,MP08bis - Vrijthof,30/12/2022 13:54:27.224,99.0,Nature elements - Wind
81053,280324,MP08bis - Vrijthof,30/12/2022 13:56:57.225,0.0,Unsupported
81054,280324,MP08bis - Vrijthof,30/12/2022 15:09:33.233,100.0,Nature elements - Wind
81055,280324,MP08bis - Vrijthof,31/12/2022 12:01:26.480,100.0,Nature elements - Wind


In [None]:
## remove the noise_event that are unsupported
file41 = file41.loc[file41.noise_event != "Unsupported"]

In [None]:
file41.shape

(60265, 5)

In [None]:
file41.groupby(["object_id", "noise_event"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,location,result_timestamp,noise_event_certainty
object_id,noise_event,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
255439,Human voice - Shouting,2470,2470,2470
255439,Human voice - Singing,239,239,239
255439,Music non-amplified,2,2,2
255439,Nature elements - Wind,17,17,17
255439,Transport road - Passenger car,15402,15402,15402
255439,Transport road - Siren,118,118,118
255440,Human voice - Shouting,424,424,424
255440,Human voice - Singing,43,43,43
255440,Music non-amplified,4,4,4
255440,Nature elements - Wind,9,9,9


In [None]:
pd.to_datetime(file41['result_timestamp']).dt.date

94       2022-07-03
95       2022-08-03
96       2022-08-03
97       2022-08-03
98       2022-08-03
            ...    
81047    2022-12-28
81051    2022-12-29
81052    2022-12-30
81054    2022-12-30
81055    2022-12-31
Name: result_timestamp, Length: 55657, dtype: object

In [None]:
# extract from timestamp
file41["result_timestamp"] = pd.to_datetime(file41['result_timestamp'])
file41['time'] = file41['result_timestamp'].dt.time
file41['date'] = file41['result_timestamp'].dt.date
file41['hour'] = file41['result_timestamp'].dt.hour
file41["month"] = file41["result_timestamp"].dt.month
file41['weekday'] = file41['result_timestamp'].dt.strftime('%a')
file41.head(10)

Unnamed: 0,object_id,location,result_timestamp,noise_event_certainty,noise_event,time,date,hour,month,weekday
0,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 03:52:12.714,,,03:52:12.714000,2022-01-03,3,1,Mon
1,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 05:07:17.712,,,05:07:17.712000,2022-01-03,5,1,Mon
2,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 05:07:34.711,,,05:07:34.711000,2022-01-03,5,1,Mon
3,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 08:28:22.694,,,08:28:22.694000,2022-01-03,8,1,Mon
4,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 08:38:59.695,,,08:38:59.695000,2022-01-03,8,1,Mon
5,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 10:25:58.687,,,10:25:58.687000,2022-01-03,10,1,Mon
6,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 11:17:29.681,,,11:17:29.681000,2022-01-03,11,1,Mon
7,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 11:51:50.679,,,11:51:50.679000,2022-01-03,11,1,Mon
8,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 12:27:57.676,,,12:27:57.676000,2022-01-03,12,1,Mon
9,255440,MP 02: Naamsestraat 57 Xior,2022-01-03 14:45:54.669,,,14:45:54.669000,2022-01-03,14,1,Mon


In [None]:
np.unique(file41.object_id)

array([255439, 255440, 255441, 255442, 255443, 255444, 280324],
      dtype=object)

In [None]:
np.unique(file41.date)

array([datetime.date(2022, 1, 2), datetime.date(2022, 1, 3),
       datetime.date(2022, 1, 4), datetime.date(2022, 1, 5),
       datetime.date(2022, 1, 6), datetime.date(2022, 1, 7),
       datetime.date(2022, 1, 8), datetime.date(2022, 1, 9),
       datetime.date(2022, 1, 10), datetime.date(2022, 1, 11),
       datetime.date(2022, 1, 12), datetime.date(2022, 1, 13),
       datetime.date(2022, 1, 14), datetime.date(2022, 1, 15),
       datetime.date(2022, 1, 16), datetime.date(2022, 1, 17),
       datetime.date(2022, 1, 18), datetime.date(2022, 1, 19),
       datetime.date(2022, 1, 20), datetime.date(2022, 1, 21),
       datetime.date(2022, 1, 22), datetime.date(2022, 1, 23),
       datetime.date(2022, 1, 24), datetime.date(2022, 1, 25),
       datetime.date(2022, 1, 26), datetime.date(2022, 1, 27),
       datetime.date(2022, 1, 28), datetime.date(2022, 1, 29),
       datetime.date(2022, 1, 30), datetime.date(2022, 1, 31),
       datetime.date(2022, 2, 2), datetime.date(2022, 2, 3),
  

## File 40  - Noise percentile

In [None]:
file40.head(10)

Unnamed: 0,#object_id,description,result_timestamp,laf005_per_hour,laf005_per_hour_unit,laf01_per_hour,laf01_per_hour_unit,laf05_per_hour,laf05_per_hour_unit,laf10_per_hour,...,laf90_per_hour,laf90_per_hour_unit,laf95_per_hour,laf95_per_hour_unit,laf98_per_hour,laf98_per_hour_unit,laf99_per_hour,laf99_per_hour_unit,laf995_per_hour,laf995_per_hour_unit
0,303910,MP 04: His & Hears,16/10/2022 17:00:00.000,77.6,dB(A),75.7,dB(A),70.6,dB(A),65.2,...,31.3,dB(A),31.0,dB(A),30.8,dB(A),30.7,dB(A),30.6,dB(A)
1,303910,MP 04: His & Hears,16/10/2022 18:00:00.000,49.5,dB(A),47.3,dB(A),42.1,dB(A),39.1,...,30.8,dB(A),30.6,dB(A),30.5,dB(A),30.4,dB(A),30.4,dB(A)
2,303910,MP 04: His & Hears,16/10/2022 19:00:00.000,59.5,dB(A),57.1,dB(A),49.9,dB(A),44.8,...,31.4,dB(A),31.1,dB(A),30.8,dB(A),30.7,dB(A),30.6,dB(A)
3,303910,MP 04: His & Hears,16/10/2022 20:00:00.000,45.4,dB(A),43.1,dB(A),39.6,dB(A),38.3,...,30.8,dB(A),30.7,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A)
4,303910,MP 04: His & Hears,16/10/2022 21:00:00.000,42.6,dB(A),40.4,dB(A),36.2,dB(A),34.3,...,30.7,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A),30.4,dB(A)
5,303910,MP 04: His & Hears,16/10/2022 22:00:00.000,41.7,dB(A),39.5,dB(A),36.0,dB(A),34.1,...,30.7,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A),30.4,dB(A)
6,303910,MP 04: His & Hears,16/10/2022 23:00:00.000,37.6,dB(A),35.7,dB(A),33.1,dB(A),31.7,...,30.6,dB(A),30.5,dB(A),30.4,dB(A),30.4,dB(A),30.4,dB(A)
7,303910,MP 04: His & Hears,17/10/2022 00:00:00.000,35.7,dB(A),35.3,dB(A),32.6,dB(A),31.5,...,30.6,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A),30.4,dB(A)
8,303910,MP 04: His & Hears,17/10/2022 01:00:00.000,35.8,dB(A),35.3,dB(A),32.6,dB(A),31.6,...,30.7,dB(A),30.6,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A)
9,303910,MP 04: His & Hears,17/10/2022 02:00:00.000,35.7,dB(A),35.2,dB(A),32.4,dB(A),31.5,...,30.7,dB(A),30.6,dB(A),30.5,dB(A),30.5,dB(A),30.4,dB(A)


In [None]:
file40.isna().mean()

#object_id              0.0
description             0.0
result_timestamp        0.0
laf005_per_hour         0.0
laf005_per_hour_unit    0.0
laf01_per_hour          0.0
laf01_per_hour_unit     0.0
laf05_per_hour          0.0
laf05_per_hour_unit     0.0
laf10_per_hour          0.0
laf10_per_hour_unit     0.0
laf25_per_hour          0.0
laf25_per_hour_unit     0.0
laf50_per_hour          0.0
laf50_per_hour_unit     0.0
laf75_per_hour          0.0
laf75_per_hour_unit     0.0
laf90_per_hour          0.0
laf90_per_hour_unit     0.0
laf95_per_hour          0.0
laf95_per_hour_unit     0.0
laf98_per_hour          0.0
laf98_per_hour_unit     0.0
laf99_per_hour          0.0
laf99_per_hour_unit     0.0
laf995_per_hour         0.0
laf995_per_hour_unit    0.0
dtype: float64

In [None]:
### we would delete unit column because of same values
# drop all _unit columns
cols_to_drop  = [col for col in file40.columns if col.endswith('unit')]
file40.drop(cols_to_drop, axis=1, inplace=True)

# rename columns
file40.rename(columns={'description':'location',
                       "#object_id": "object_id"}, inplace=True)

In [None]:
file40.head(10)

Unnamed: 0,object_id,location,result_timestamp,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,laf50_per_hour,laf75_per_hour,laf90_per_hour,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour
0,303910,MP 04: His & Hears,16/10/2022 17:00:00.000,77.6,75.7,70.6,65.2,43.7,35.6,32.1,31.3,31.0,30.8,30.7,30.6
1,303910,MP 04: His & Hears,16/10/2022 18:00:00.000,49.5,47.3,42.1,39.1,34.8,32.4,31.2,30.8,30.6,30.5,30.4,30.4
2,303910,MP 04: His & Hears,16/10/2022 19:00:00.000,59.5,57.1,49.9,44.8,38.4,33.8,32.2,31.4,31.1,30.8,30.7,30.6
3,303910,MP 04: His & Hears,16/10/2022 20:00:00.000,45.4,43.1,39.6,38.3,35.6,32.1,31.1,30.8,30.7,30.6,30.5,30.5
4,303910,MP 04: His & Hears,16/10/2022 21:00:00.000,42.6,40.4,36.2,34.3,31.9,31.2,30.9,30.7,30.6,30.5,30.5,30.4
5,303910,MP 04: His & Hears,16/10/2022 22:00:00.000,41.7,39.5,36.0,34.1,31.8,31.2,30.9,30.7,30.6,30.5,30.5,30.4
6,303910,MP 04: His & Hears,16/10/2022 23:00:00.000,37.6,35.7,33.1,31.7,31.2,31.0,30.8,30.6,30.5,30.4,30.4,30.4
7,303910,MP 04: His & Hears,17/10/2022 00:00:00.000,35.7,35.3,32.6,31.5,31.2,31.0,30.8,30.6,30.6,30.5,30.5,30.4
8,303910,MP 04: His & Hears,17/10/2022 01:00:00.000,35.8,35.3,32.6,31.6,31.3,31.0,30.8,30.7,30.6,30.6,30.5,30.5
9,303910,MP 04: His & Hears,17/10/2022 02:00:00.000,35.7,35.2,32.4,31.5,31.2,31.0,30.8,30.7,30.6,30.5,30.5,30.4


In [None]:
# Convert the 'result_timestamp' column to a datetime data type
file40['result_timestamp'] = pd.to_datetime(file40['result_timestamp'])
file40['date'] = file40['result_timestamp'].dt.date
file40['hour'] = file40['result_timestamp'].dt.hour
file40["month"] = file40["result_timestamp"].dt.month
file40['weekday'] = file40['result_timestamp'].dt.strftime('%a')
file40.head(4)

Unnamed: 0,object_id,location,result_timestamp,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,laf50_per_hour,laf75_per_hour,laf90_per_hour,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour,date,hour,month,weekday
0,303910,MP 04: His & Hears,2022-10-16 17:00:00,77.6,75.7,70.6,65.2,43.7,35.6,32.1,31.3,31.0,30.8,30.7,30.6,2022-10-16,17,10,Sun
1,303910,MP 04: His & Hears,2022-10-16 18:00:00,49.5,47.3,42.1,39.1,34.8,32.4,31.2,30.8,30.6,30.5,30.4,30.4,2022-10-16,18,10,Sun
2,303910,MP 04: His & Hears,2022-10-16 19:00:00,59.5,57.1,49.9,44.8,38.4,33.8,32.2,31.4,31.1,30.8,30.7,30.6,2022-10-16,19,10,Sun
3,303910,MP 04: His & Hears,2022-10-16 20:00:00,45.4,43.1,39.6,38.3,35.6,32.1,31.1,30.8,30.7,30.6,30.5,30.5,2022-10-16,20,10,Sun


In [None]:
np.unique(file40.date)

array([datetime.date(2022, 1, 3), datetime.date(2022, 1, 4),
       datetime.date(2022, 1, 5), datetime.date(2022, 1, 6),
       datetime.date(2022, 1, 7), datetime.date(2022, 1, 8),
       datetime.date(2022, 1, 9), datetime.date(2022, 1, 10),
       datetime.date(2022, 1, 11), datetime.date(2022, 1, 12),
       datetime.date(2022, 2, 3), datetime.date(2022, 2, 4),
       datetime.date(2022, 2, 5), datetime.date(2022, 2, 6),
       datetime.date(2022, 2, 7), datetime.date(2022, 2, 8),
       datetime.date(2022, 2, 9), datetime.date(2022, 2, 10),
       datetime.date(2022, 2, 11), datetime.date(2022, 2, 12),
       datetime.date(2022, 2, 13), datetime.date(2022, 2, 14),
       datetime.date(2022, 2, 15), datetime.date(2022, 2, 16),
       datetime.date(2022, 2, 17), datetime.date(2022, 2, 18),
       datetime.date(2022, 2, 19), datetime.date(2022, 2, 20),
       datetime.date(2022, 2, 21), datetime.date(2022, 2, 22),
       datetime.date(2022, 2, 23), datetime.date(2022, 2, 24),
      

In [None]:
np.unique(file40.object_id)

array([255439, 255440, 255441, 255442, 255443, 255444, 280324, 303910],
      dtype=object)

In [None]:
file40.shape

(50320, 19)

## file 42 - noise level data

In [None]:
file42.head(10)

Unnamed: 0,#object_id,description,result_timestamp,lamax,lamax_unit,laeq,laeq_unit,lceq,lceq_unit,lcpeak,lcpeak_unit
0,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:00.349,87.6,dB(A),82.7,dB(A),83.61,dB(C),97.17,dB(C)
1,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:01.349,84.5,dB(A),83.1,dB(A),84.42,dB(C),96.41,dB(C)
2,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:02.349,84.8,dB(A),82.7,dB(A),84.19,dB(C),96.24,dB(C)
3,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:03.349,81.9,dB(A),79.3,dB(A),81.08,dB(C),94.03,dB(C)
4,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:04.349,78.3,dB(A),76.0,dB(A),77.12,dB(C),89.81,dB(C)
5,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:05.349,80.5,dB(A),77.8,dB(A),78.91,dB(C),91.08,dB(C)
6,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:06.350,78.4,dB(A),73.9,dB(A),75.87,dB(C),89.28,dB(C)
7,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:07.350,70.1,dB(A),67.3,dB(A),70.17,dB(C),88.17,dB(C)
8,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:08.350,65.1,dB(A),61.5,dB(A),64.78,dB(C),78.95,dB(C)
9,255441,MP 03: Naamsestraat 62 Taste,01/01/2022 00:00:09.350,73.7,dB(A),71.2,dB(A),73.62,dB(C),91.9,dB(C)


In [None]:
file42.isna().mean()

#object_id          0.000000e+00
description         0.000000e+00
result_timestamp    0.000000e+00
lamax               2.812916e-07
lamax_unit          2.812916e-07
laeq                2.812916e-07
laeq_unit           2.812916e-07
lceq                1.125166e-06
lceq_unit           1.125166e-06
lcpeak              1.125166e-06
lcpeak_unit         1.125166e-06
dtype: float64

In [None]:
file42.shape

(10665090, 11)

In [None]:
# rename columns
file42.rename(columns={'description':'location',
                       "#object_id": "object_id"}, inplace=True)

In [None]:
## remove obs that null
file42 = file42.loc[~file42.lamax.isnull(), ]
file42 = file42.loc[~file42.lceq.isnull(), ]

In [None]:
file42.isna().mean()

object_id           0.0
location            0.0
result_timestamp    0.0
lamax               0.0
lamax_unit          0.0
laeq                0.0
laeq_unit           0.0
lceq                0.0
lceq_unit           0.0
lcpeak              0.0
lcpeak_unit         0.0
dtype: float64

In [None]:
file42.shape

(10665075, 11)

In [None]:
# Convert the 'result_timestamp' column to a datetime data type
file42['result_timestamp'] = pd.to_datetime(file42['result_timestamp'])
file42['date'] = file42['result_timestamp'].dt.date
file42['hour'] = file42['result_timestamp'].dt.hour
file42["month"] = file42["result_timestamp"].dt.month
file42['weekday'] = file42['result_timestamp'].dt.strftime('%a')
file42.head(4)

Unnamed: 0,object_id,location,result_timestamp,lamax,lamax_unit,laeq,laeq_unit,lceq,lceq_unit,lcpeak,lcpeak_unit,date,hour,month,weekday
0,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:00.349,87.6,dB(A),82.7,dB(A),83.61,dB(C),97.17,dB(C),2022-01-01,0,1,Sat
1,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:01.349,84.5,dB(A),83.1,dB(A),84.42,dB(C),96.41,dB(C),2022-01-01,0,1,Sat
2,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:02.349,84.8,dB(A),82.7,dB(A),84.19,dB(C),96.24,dB(C),2022-01-01,0,1,Sat
3,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:03.349,81.9,dB(A),79.3,dB(A),81.08,dB(C),94.03,dB(C),2022-01-01,0,1,Sat


In [None]:
np.unique(file42.hour)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [None]:
np.unique(file42.laeq_unit)

array(['dB(A)'], dtype=object)

In [None]:
np.unique(file42.lamax_unit)

array(['dB(A)'], dtype=object)

In [None]:
np.unique(file42.lceq_unit)

array(['dB(C)'], dtype=object)

In [None]:
np.unique(file42.lcpeak_unit)

array(['dB(C)'], dtype=object)

In [None]:
np.max(file40.date)

datetime.date(2022, 12, 31)

In [None]:
np.min(file40.date)

datetime.date(2022, 1, 3)

In [None]:
file42.drop(["lamax_unit", "laeq_unit",		"lceq_unit"	,	"lcpeak_unit"], inplace = True, axis  = 1)

## Meteo Data



In [None]:
meteo = pd.concat([pd.read_csv("/content/drive/MyDrive/MDA_TIEN/LC_data/LC_2022Q1.csv", delimiter=","),
                  pd.read_csv("/content/drive/MyDrive/MDA_TIEN/LC_data/LC_2022Q2.csv", delimiter=","),
                  pd.read_csv("/content/drive/MyDrive/MDA_TIEN/LC_data/LC_2022Q3.csv", delimiter=","),
                  pd.read_csv("/content/drive/MyDrive/MDA_TIEN/LC_data/LC_2022Q4.csv", delimiter=",")])

In [None]:
meteo.columns

Index(['DATEUTC', 'ID', 'LC_HUMIDITY', 'LC_DWPTEMP', 'LC_n', 'LC_RAD',
       'LC_RAININ', 'LC_DAILYRAIN', 'LC_WINDDIR', 'LC_WINDSPEED', 'Date',
       'Year', 'Month', 'Day', 'Hour', 'Minute', 'LC_RAD60', 'LC_TEMP_QCL0',
       'LC_TEMP_QCL1', 'LC_TEMP_QCL2', 'LC_TEMP_QCL3'],
      dtype='object')

In [None]:
meteo.shape

(5546880, 21)

In [None]:
meteo.head(4)

Unnamed: 0,DATEUTC,ID,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,...,Year,Month,Day,Hour,Minute,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3
0,2022-01-01 00:10:00,LC-002,92.0,11.78,38.0,0.0,0.0,0.0,-169.0,0.43,...,2022,1,1,0,10,0.0,13.11,13.11,13.0515,13.048027
1,2022-01-01 00:20:00,LC-002,92.0,11.73,37.0,0.0,0.0,0.0,-170.0,0.33,...,2022,1,1,0,20,0.0,13.01,13.01,12.9515,12.985849
2,2022-01-01 00:30:00,LC-002,92.0,11.73,38.0,0.0,0.0,0.0,-167.0,0.46,...,2022,1,1,0,30,0.0,13.0,13.0,12.9415,12.950322
3,2022-01-01 00:40:00,LC-002,92.0,11.72,37.0,0.0,0.0,0.0,-160.0,0.52,...,2022,1,1,0,40,0.0,13.0,13.0,12.9415,12.94955


In [None]:
meteo.dtypes

DATEUTC         datetime64[ns]
ID                      object
LC_HUMIDITY            float64
LC_DWPTEMP             float64
LC_n                   float64
LC_RAD                 float64
LC_RAININ              float64
LC_DAILYRAIN           float64
LC_WINDDIR             float64
LC_WINDSPEED           float64
Date                    object
Year                     int64
Month                    int64
Day                      int64
Hour                     int64
Minute                   int64
LC_RAD60               float64
LC_TEMP_QCL0           float64
LC_TEMP_QCL1           float64
LC_TEMP_QCL2           float64
LC_TEMP_QCL3           float64
date                    object
hour                     int64
month                    int64
dtype: object

In [None]:
meteo['weekday'] = meteo['DATEUTC'].dt.strftime('%a')


Unnamed: 0,DATEUTC,ID,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,...,Minute,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3,date,hour,month,weekday
0,2022-01-01 00:10:00,LC-002,92.0,11.78,38.0,0.0,0.0,0.0,-169.0,0.43,...,10,0.0,13.11,13.11,13.0515,13.048027,2022-01-01,0,1,Sat
1,2022-01-01 00:20:00,LC-002,92.0,11.73,37.0,0.0,0.0,0.0,-170.0,0.33,...,20,0.0,13.01,13.01,12.9515,12.985849,2022-01-01,0,1,Sat
2,2022-01-01 00:30:00,LC-002,92.0,11.73,38.0,0.0,0.0,0.0,-167.0,0.46,...,30,0.0,13.0,13.0,12.9415,12.950322,2022-01-01,0,1,Sat
3,2022-01-01 00:40:00,LC-002,92.0,11.72,37.0,0.0,0.0,0.0,-160.0,0.52,...,40,0.0,13.0,13.0,12.9415,12.94955,2022-01-01,0,1,Sat


In [None]:
meteo.head(4)

Unnamed: 0,DATEUTC,ID,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,...,Month,Day,Hour,Minute,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3,weekday
0,2022-01-01 00:10:00,LC-002,92.0,11.78,38.0,0.0,0.0,0.0,-169.0,0.43,...,1,1,0,10,0.0,13.11,13.11,13.0515,13.048027,Sat
1,2022-01-01 00:20:00,LC-002,92.0,11.73,37.0,0.0,0.0,0.0,-170.0,0.33,...,1,1,0,20,0.0,13.01,13.01,12.9515,12.985849,Sat
2,2022-01-01 00:30:00,LC-002,92.0,11.73,38.0,0.0,0.0,0.0,-167.0,0.46,...,1,1,0,30,0.0,13.0,13.0,12.9415,12.950322,Sat
3,2022-01-01 00:40:00,LC-002,92.0,11.72,37.0,0.0,0.0,0.0,-160.0,0.52,...,1,1,0,40,0.0,13.0,13.0,12.9415,12.94955,Sat


In [None]:
meteo.isna().mean()

DATEUTC         0.000000
ID              0.000000
LC_HUMIDITY     0.056770
LC_DWPTEMP      0.056770
LC_n            0.056770
LC_RAD          0.056770
LC_RAININ       0.056770
LC_DAILYRAIN    0.056770
LC_WINDDIR      0.056770
LC_WINDSPEED    0.056770
Date            0.000000
Year            0.000000
Month           0.000000
Day             0.000000
Hour            0.000000
Minute          0.000000
LC_RAD60        0.049942
LC_TEMP_QCL0    0.056770
LC_TEMP_QCL1    0.062285
LC_TEMP_QCL2    0.062285
LC_TEMP_QCL3    0.062285
weekday         0.000000
dtype: float64

In [None]:
meteo['date'] = meteo['DATEUTC'].dt.date


In [None]:
meteo = meteo[~meteo.LC_HUMIDITY.isnull()]

In [None]:
meteo.isna().mean()

DATEUTC         0.000000
ID              0.000000
LC_HUMIDITY     0.000000
LC_DWPTEMP      0.000000
LC_n            0.000000
LC_RAD          0.000000
LC_RAININ       0.000000
LC_DAILYRAIN    0.000000
LC_WINDDIR      0.000000
LC_WINDSPEED    0.000000
Date            0.000000
Year            0.000000
Month           0.000000
Day             0.000000
Hour            0.000000
Minute          0.000000
LC_RAD60        0.000000
LC_TEMP_QCL0    0.000000
LC_TEMP_QCL1    0.005846
LC_TEMP_QCL2    0.005846
LC_TEMP_QCL3    0.005846
weekday         0.000000
dtype: float64

## Data processing 


In [None]:
file40.head(4)

Unnamed: 0,object_id,location,result_timestamp,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,laf50_per_hour,laf75_per_hour,laf90_per_hour,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour,date,hour,month,weekday
0,303910,MP 04: His & Hears,2022-10-16 17:00:00,77.6,75.7,70.6,65.2,43.7,35.6,32.1,31.3,31.0,30.8,30.7,30.6,2022-10-16,17,10,Sun
1,303910,MP 04: His & Hears,2022-10-16 18:00:00,49.5,47.3,42.1,39.1,34.8,32.4,31.2,30.8,30.6,30.5,30.4,30.4,2022-10-16,18,10,Sun
2,303910,MP 04: His & Hears,2022-10-16 19:00:00,59.5,57.1,49.9,44.8,38.4,33.8,32.2,31.4,31.1,30.8,30.7,30.6,2022-10-16,19,10,Sun
3,303910,MP 04: His & Hears,2022-10-16 20:00:00,45.4,43.1,39.6,38.3,35.6,32.1,31.1,30.8,30.7,30.6,30.5,30.5,2022-10-16,20,10,Sun


In [None]:
file40 = file40.groupby(["object_id", "date", "hour", "month", "weekday"]).mean().reset_index()

In [None]:
file40.head(4)

Unnamed: 0,object_id,date,hour,month,weekday,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,laf50_per_hour,laf75_per_hour,laf90_per_hour,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour
0,255439,2022-01-04,0,1,Tue,77.2,75.4,70.8,68.9,66.0,62.8,60.1,58.0,56.9,55.9,55.1,54.5
1,255439,2022-01-04,1,1,Tue,78.8,77.2,73.6,72.1,69.8,67.6,65.4,63.5,62.4,61.1,60.4,59.7
2,255439,2022-01-04,2,1,Tue,79.7,77.2,72.5,70.8,68.6,66.2,64.0,62.1,60.9,59.8,59.0,58.3
3,255439,2022-01-04,3,1,Tue,75.0,73.6,69.8,68.1,65.9,63.7,61.6,59.8,58.8,57.8,57.1,56.5


In [None]:
file40.shape

(50314, 17)

In [None]:
file41.head(4)

Unnamed: 0,object_id,location,result_timestamp,noise_event_certainty,noise_event,time,date,hour,month,weekday
94,255440,MP 02: Naamsestraat 57 Xior,2022-07-03 22:22:41.166,75.0,Transport road - Passenger car,22:22:41.166000,2022-07-03,22,7,Sun
95,255440,MP 02: Naamsestraat 57 Xior,2022-08-03 01:25:51.161,75.0,Transport road - Siren,01:25:51.161000,2022-08-03,1,8,Wed
96,255440,MP 02: Naamsestraat 57 Xior,2022-08-03 07:20:38.135,98.0,Transport road - Passenger car,07:20:38.135000,2022-08-03,7,8,Wed
97,255440,MP 02: Naamsestraat 57 Xior,2022-08-03 08:37:44.126,81.0,Transport road - Passenger car,08:37:44.126000,2022-08-03,8,8,Wed


In [None]:
file41 = pd.pivot_table(file41, index = ["object_id", "date", "hour", "month", "weekday"], columns=["noise_event"], aggfunc='count').xs("location", level=0, axis=1).reset_index()

In [None]:
file41.head(4)

noise_event,object_id,date,hour,month,weekday,Human voice - Shouting,Human voice - Singing,Music non-amplified,Nature elements - Wind,Transport road - Passenger car,Transport road - Siren
0,255439,2022-01-04,0,1,Tue,4.0,,,,,1.0
1,255439,2022-01-04,1,1,Tue,10.0,,,,,
2,255439,2022-01-04,2,1,Tue,8.0,,,,,
3,255439,2022-01-04,3,1,Tue,6.0,,,,,


In [None]:
file41.fillna(0, inplace=True)

In [None]:
file41.head(4)

noise_event,object_id,date,hour,month,weekday,Human voice - Shouting,Human voice - Singing,Music non-amplified,Nature elements - Wind,Transport road - Passenger car,Transport road - Siren
0,255439,2022-01-04,0,1,Tue,4.0,0.0,0.0,0.0,0.0,1.0
1,255439,2022-01-04,1,1,Tue,10.0,0.0,0.0,0.0,0.0,0.0
2,255439,2022-01-04,2,1,Tue,8.0,0.0,0.0,0.0,0.0,0.0
3,255439,2022-01-04,3,1,Tue,6.0,0.0,0.0,0.0,0.0,0.0


In [None]:
file42.head(4)

Unnamed: 0,object_id,location,result_timestamp,lamax,laeq,lceq,lcpeak,date,hour,month,weekday
0,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:00.349,87.6,82.7,83.61,97.17,2022-01-01,0,1,Sat
1,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:01.349,84.5,83.1,84.42,96.41,2022-01-01,0,1,Sat
2,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:02.349,84.8,82.7,84.19,96.24,2022-01-01,0,1,Sat
3,255441,MP 03: Naamsestraat 62 Taste,2022-01-01 00:00:03.349,81.9,79.3,81.08,94.03,2022-01-01,0,1,Sat


In [None]:
file42 = file42.groupby(["object_id", "date", "hour", "month", "weekday"]).mean().reset_index()

  file42 = file42.groupby(["object_id", "date", "hour", "month", "weekday"]).mean().reset_index()


In [None]:
file42.head(4)

Unnamed: 0,object_id,date,hour,month,weekday,lamax,laeq,lceq,lcpeak
0,255441,2022-01-01,0,1,Sat,60.322528,57.126833,63.10465,76.595981
1,255441,2022-01-01,1,1,Sat,53.033583,50.853806,58.648786,71.017533
2,255441,2022-01-01,2,1,Sat,52.173702,50.049903,58.282633,70.64055
3,255441,2022-01-01,3,1,Sat,50.821311,48.964907,57.793745,69.698255


In [None]:
file42.shape

(2964, 9)

In [None]:
np.unique(k.date)

array([datetime.date(2022, 1, 1), datetime.date(2022, 1, 13),
       datetime.date(2022, 1, 14), datetime.date(2022, 1, 15),
       datetime.date(2022, 1, 16), datetime.date(2022, 1, 17),
       datetime.date(2022, 1, 18), datetime.date(2022, 1, 19),
       datetime.date(2022, 1, 20), datetime.date(2022, 1, 21),
       datetime.date(2022, 1, 22), datetime.date(2022, 1, 23),
       datetime.date(2022, 1, 24), datetime.date(2022, 1, 25),
       datetime.date(2022, 1, 26), datetime.date(2022, 1, 27),
       datetime.date(2022, 1, 28), datetime.date(2022, 1, 29),
       datetime.date(2022, 1, 30), datetime.date(2022, 1, 31),
       datetime.date(2022, 2, 1), datetime.date(2022, 3, 1),
       datetime.date(2022, 4, 1), datetime.date(2022, 5, 1),
       datetime.date(2022, 6, 1), datetime.date(2022, 7, 1),
       datetime.date(2022, 8, 1), datetime.date(2022, 9, 1),
       datetime.date(2022, 10, 1), datetime.date(2022, 11, 1),
       datetime.date(2022, 12, 1)], dtype=object)

In [None]:
meteo.shape

(5231981, 23)

In [None]:
meteo.head(2)

Unnamed: 0,DATEUTC,ID,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,...,Day,Hour,Minute,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3,weekday,date
0,2022-01-01 00:10:00,LC-002,92.0,11.78,38.0,0.0,0.0,0.0,-169.0,0.43,...,1,0,10,0.0,13.11,13.11,13.0515,13.048027,Sat,2022-01-01
1,2022-01-01 00:20:00,LC-002,92.0,11.73,37.0,0.0,0.0,0.0,-170.0,0.33,...,1,0,20,0.0,13.01,13.01,12.9515,12.985849,Sat,2022-01-01


In [None]:
meteo.columns

Index(['DATEUTC', 'ID', 'LC_HUMIDITY', 'LC_DWPTEMP', 'LC_n', 'LC_RAD',
       'LC_RAININ', 'LC_DAILYRAIN', 'LC_WINDDIR', 'LC_WINDSPEED', 'Date',
       'Year', 'Month', 'Day', 'Hour', 'Minute', 'LC_RAD60', 'LC_TEMP_QCL0',
       'LC_TEMP_QCL1', 'LC_TEMP_QCL2', 'LC_TEMP_QCL3', 'weekday', 'date'],
      dtype='object')

In [None]:
meteo = meteo.groupby(["date", "weekday", "Month", "Hour"]).mean().reset_index()

  meteo = meteo.groupby(["date", "weekday", "Month", "Hour"]).mean().reset_index()


In [None]:
meteo.drop(["Year", "Day", "Minute"], axis = 1, inplace = True)

In [None]:
meteo.head(4)

Unnamed: 0,date,weekday,Month,Hour,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3
0,2022-01-01,Sat,1,0,90.367865,11.50186,36.353066,0.082452,0.0,0.0,-17.124736,0.153679,0.082452,13.043446,13.054252,13.039616,13.013112
1,2022-01-01,Sat,1,1,89.504394,11.35065,36.316344,0.093146,7e-06,0.0,-17.16696,0.229297,0.084359,13.032021,13.042771,13.028941,13.022762
2,2022-01-01,Sat,1,2,89.311072,11.132355,36.525483,0.094903,9e-06,0.0,-14.453427,0.224025,0.093146,12.837575,12.850302,12.836191,12.828541
3,2022-01-01,Sat,1,3,89.4,10.985421,36.463158,0.085965,0.0,0.0,-17.203509,0.183456,0.092982,12.674368,12.68633,12.671846,12.688939


## combine data

In [None]:
file40.head(4)

Unnamed: 0,object_id,date,hour,month,weekday,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,laf50_per_hour,laf75_per_hour,laf90_per_hour,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour
0,255439,2022-01-04,0,1,Tue,77.2,75.4,70.8,68.9,66.0,62.8,60.1,58.0,56.9,55.9,55.1,54.5
1,255439,2022-01-04,1,1,Tue,78.8,77.2,73.6,72.1,69.8,67.6,65.4,63.5,62.4,61.1,60.4,59.7
2,255439,2022-01-04,2,1,Tue,79.7,77.2,72.5,70.8,68.6,66.2,64.0,62.1,60.9,59.8,59.0,58.3
3,255439,2022-01-04,3,1,Tue,75.0,73.6,69.8,68.1,65.9,63.7,61.6,59.8,58.8,57.8,57.1,56.5


In [None]:
file41.head(4)

noise_event,object_id,date,hour,month,weekday,Human voice - Shouting,Human voice - Singing,Music non-amplified,Nature elements - Wind,Transport road - Passenger car,Transport road - Siren
0,255439,2022-01-04,0,1,Tue,4.0,0.0,0.0,0.0,0.0,1.0
1,255439,2022-01-04,1,1,Tue,10.0,0.0,0.0,0.0,0.0,0.0
2,255439,2022-01-04,2,1,Tue,8.0,0.0,0.0,0.0,0.0,0.0
3,255439,2022-01-04,3,1,Tue,6.0,0.0,0.0,0.0,0.0,0.0


In [None]:
file41 = file41.rename(columns={'Human voice - Shouting': 'HM_voice_shout',
                        'Human voice - Singing': 'HM_voice_sing',
                        'Music non-amplified': 'MS_non_amplified',
                        'Nature elements - Wind': 'NE_wind',
                        'Transport road - Passenger car': 'TR_passenger_car',
                        'Transport road - Siren': 'TR_siren'})


In [None]:
file41.head(4)

noise_event,object_id,date,hour,month,weekday,HM_voice_shout,HM_voice_sing,MS_non_amplified,NE_wind,TR_passenger_car,TR_siren
0,255439,2022-01-04,0,1,Tue,4.0,0.0,0.0,0.0,0.0,1.0
1,255439,2022-01-04,1,1,Tue,10.0,0.0,0.0,0.0,0.0,0.0
2,255439,2022-01-04,2,1,Tue,8.0,0.0,0.0,0.0,0.0,0.0
3,255439,2022-01-04,3,1,Tue,6.0,0.0,0.0,0.0,0.0,0.0


In [None]:
file41.shape

(20692, 11)

In [None]:
file40.shape

(50314, 17)

In [None]:
np.unique(file41.object_id)

array([255439, 255440, 255441, 255442, 255443, 255444, 280324])

In [None]:
np.unique(file40.object_id)

array([255439, 255440, 255441, 255442, 255443, 255444, 280324, 303910])

In [None]:
data_model = file40.merge(file41, how = "inner",
                          left_on = ["object_id", "date", "hour", "month", "weekday"],
                          right_on = ["object_id", "date", "hour", "month", "weekday"])

In [None]:
data_model.shape

(20414, 23)

In [None]:
data_model.columns

Index(['object_id', 'date', 'hour', 'month', 'weekday', 'laf005_per_hour',
       'laf01_per_hour', 'laf05_per_hour', 'laf10_per_hour', 'laf25_per_hour',
       'laf50_per_hour', 'laf75_per_hour', 'laf90_per_hour', 'laf95_per_hour',
       'laf98_per_hour', 'laf99_per_hour', 'laf995_per_hour', 'HM_voice_shout',
       'HM_voice_sing', 'MS_non_amplified', 'NE_wind', 'TR_passenger_car',
       'TR_siren'],
      dtype='object')

In [None]:
data_model.head(3)

Unnamed: 0,object_id,date,hour,month,weekday,laf005_per_hour,laf01_per_hour,laf05_per_hour,laf10_per_hour,laf25_per_hour,...,laf95_per_hour,laf98_per_hour,laf99_per_hour,laf995_per_hour,HM_voice_shout,HM_voice_sing,MS_non_amplified,NE_wind,TR_passenger_car,TR_siren
0,255439,2022-01-04,0,1,Tue,77.2,75.4,70.8,68.9,66.0,...,56.9,55.9,55.1,54.5,4.0,0.0,0.0,0.0,0.0,1.0
1,255439,2022-01-04,1,1,Tue,78.8,77.2,73.6,72.1,69.8,...,62.4,61.1,60.4,59.7,10.0,0.0,0.0,0.0,0.0,0.0
2,255439,2022-01-04,2,1,Tue,79.7,77.2,72.5,70.8,68.6,...,60.9,59.8,59.0,58.3,8.0,0.0,0.0,0.0,0.0,0.0


In [None]:
meteo.head(2)

Unnamed: 0,date,weekday,Month,Hour,LC_HUMIDITY,LC_DWPTEMP,LC_n,LC_RAD,LC_RAININ,LC_DAILYRAIN,LC_WINDDIR,LC_WINDSPEED,LC_RAD60,LC_TEMP_QCL0,LC_TEMP_QCL1,LC_TEMP_QCL2,LC_TEMP_QCL3
0,2022-01-01,Sat,1,0,90.367865,11.50186,36.353066,0.082452,0.0,0.0,-17.124736,0.153679,0.082452,13.043446,13.054252,13.039616,13.013112
1,2022-01-01,Sat,1,1,89.504394,11.35065,36.316344,0.093146,7e-06,0.0,-17.16696,0.229297,0.084359,13.032021,13.042771,13.028941,13.022762


In [None]:
meteo.shape

(8761, 17)

In [None]:
data_model = data_model.merge(meteo, how = "inner",
                              left_on = ["date", "hour", "month", "weekday"],
                              right_on = ["date", "Hour", "Month", "weekday"])

In [None]:
data_model.columns

Index(['object_id', 'date', 'hour', 'month', 'weekday', 'laf005_per_hour',
       'laf01_per_hour', 'laf05_per_hour', 'laf10_per_hour', 'laf25_per_hour',
       'laf50_per_hour', 'laf75_per_hour', 'laf90_per_hour', 'laf95_per_hour',
       'laf98_per_hour', 'laf99_per_hour', 'laf995_per_hour', 'HM_voice_shout',
       'HM_voice_sing', 'MS_non_amplified', 'NE_wind', 'TR_passenger_car',
       'TR_siren', 'Month', 'Hour', 'LC_HUMIDITY', 'LC_DWPTEMP', 'LC_n',
       'LC_RAD', 'LC_RAININ', 'LC_DAILYRAIN', 'LC_WINDDIR', 'LC_WINDSPEED',
       'LC_RAD60', 'LC_TEMP_QCL0', 'LC_TEMP_QCL1', 'LC_TEMP_QCL2',
       'LC_TEMP_QCL3'],
      dtype='object')

In [None]:
data_model.shape

(20414, 38)

In [None]:
data_model.dtypes

object_id             int64
date                 object
hour                  int64
month                 int64
weekday              object
laf005_per_hour     float64
laf01_per_hour      float64
laf05_per_hour      float64
laf10_per_hour      float64
laf25_per_hour      float64
laf50_per_hour      float64
laf75_per_hour      float64
laf90_per_hour      float64
laf95_per_hour      float64
laf98_per_hour      float64
laf99_per_hour      float64
laf995_per_hour     float64
HM_voice_shout      float64
HM_voice_sing       float64
MS_non_amplified    float64
NE_wind             float64
TR_passenger_car    float64
TR_siren            float64
Month                 int64
Hour                  int64
LC_HUMIDITY         float64
LC_DWPTEMP          float64
LC_n                float64
LC_RAD              float64
LC_RAININ           float64
LC_DAILYRAIN        float64
LC_WINDDIR          float64
LC_WINDSPEED        float64
LC_RAD60            float64
LC_TEMP_QCL0        float64
LC_TEMP_QCL1        

In [None]:
data_model.to_csv('/content/drive/MyDrive/MDA_TIEN/data_model.csv', index=False)
