In [1]:
import pandas as pd
from pathlib import Path
from datetime import datetime
from tqdm import tqdm

In [2]:
path_scratch = Path("/scratch/labia")
path_previsions = path_scratch / "tourniert/gelee_blanche/previsions"
path_obs = path_scratch / "tchoffoc/donnees_mises_en_forme"

## Fichier de prévision

In [3]:
df_maa = pd.read_csv(path_previsions / 'maa_20200808160000_20230808162000.txt',
                     skiprows=2, sep=' - ')

# Suppression de la colonne 'Date de réception' car similaire à 'Date du message'
df_maa = df_maa.drop('Date de réception', axis=1)

# Conversion au format datetime
df_maa['date'] = pd.to_datetime(df_maa['Date du message'], format="%d/%m/%Y %H:%M:%S")
df_maa = df_maa.drop('Date du message', axis=1)

df_maa

  df_maa = pd.read_csv(path_previsions / 'maa_20200808160000_20230808162000.txt',


Unnamed: 0,Contenu du message,date
0,LFPG AD WRNG 2 VALID 081618/082130 CNL AD WRNG...,2020-08-08 16:18:00
1,LFPG AD WRNG 1 VALID 091500/092100 TS FCST.=,2020-08-09 13:00:00
2,LFPG AD WRNG 2 VALID 091500/092000 SFC WSPD MA...,2020-08-09 13:01:00
3,LFPG AD WRNG 3 VALID 091800/092200 SFC WSPD MA...,2020-08-09 17:16:00
4,LFPG AD WRNG 4 VALID 091850/092200 CNL AD WRNG...,2020-08-09 18:50:00
...,...,...
1951,LFPG AD WRNG 1 VALID 051130/052330 SFC WSPD MA...,2023-08-05 09:34:00
1952,LFPG AD WRNG 2 VALID 051900/060230 SFC WSPD MA...,2023-08-05 18:48:00
1953,LFPG AD WRNG 3 VALID 052200/061000 SFC WSPD MA...,2023-08-05 21:34:00
1954,LFPG AD WRNG 4 VALID 052303/060230 CNL AD WRNG...,2023-08-05 23:03:00


In [4]:
# On garde uniquement les prévisions de VEHICLE RIME dans le message
df_maa['is_VEHICLE_RIME'] = df_maa['Contenu du message'].apply(lambda x: True if 'VEHICLE RIME' in x else False)
df_vehicle_rime = df_maa[df_maa['is_VEHICLE_RIME']]

df_vehicle_rime = df_vehicle_rime.drop('is_VEHICLE_RIME', axis=1)

In [5]:
def begin_date(maa: str, date: pd.Timestamp) -> (pd.Timestamp, pd.Timestamp):
    splited_maa = maa.split(' ')
    dates = splited_maa[5]
    begin_date, end_date = dates.split('/')
    begin_date  = pd.to_datetime(f"{date.year}{date.month:02}{begin_date}",
                                 format="%Y%m%d%H%M")
    end_date  = pd.to_datetime(f"{date.year}{date.month:02}{end_date}",
                               format="%Y%m%d%H%M")
    return begin_date, end_date

df_vehicle_rime[['begin_date', 'end_date']] = df_vehicle_rime.apply(lambda x: begin_date(x['Contenu du message'], x['date']), axis=1, result_type='expand')

In [6]:
df_vehicle_rime

Unnamed: 0,Contenu du message,date,begin_date,end_date
125,LFPG AD WRNG 1 VALID 292030/300830 VEHICLE RIM...,2020-11-29 18:35:00,2020-11-29 20:30:00,2020-11-30 08:30:00
126,LFPG AD WRNG 1 VALID 282230/291030 VEHICLE RIM...,2020-11-28 20:30:00,2020-11-28 22:30:00,2020-11-29 10:30:00
127,LFPG AD WRNG 1 VALID 272330/281030 VEHICLE RIM...,2020-11-27 21:33:00,2020-11-27 23:30:00,2020-11-28 10:30:00
128,LFPG AD WRNG 1 VALID 260500/260930 VEHICLE RIM...,2020-11-26 04:39:00,2020-11-26 05:00:00,2020-11-26 09:30:00
129,LFPG AD WRNG 1 VALID 252330/260630 VEHICLE RIM...,2020-11-25 21:34:00,2020-11-25 23:30:00,2020-11-26 06:30:00
...,...,...,...,...
1794,LFPG AD WRNG 1 VALID 082255/090830 VEHICLE RIM...,2023-04-08 22:53:00,2023-04-08 22:55:00,2023-04-09 08:30:00
1795,LFPG AD WRNG 1 VALID 080130/080830 VEHICLE RIM...,2023-04-07 23:34:00,2023-04-08 01:30:00,2023-04-08 08:30:00
1797,LFPG AD WRNG 1 VALID 042330/050930 VEHICLE RIM...,2023-04-04 21:32:00,2023-04-04 23:30:00,2023-04-05 09:30:00
1798,LFPG AD WRNG 3 VALID 040130/040830 VEHICLE RIM...,2023-04-03 23:33:00,2023-04-04 01:30:00,2023-04-04 08:30:00


In [7]:
min_date = min(df_vehicle_rime['begin_date'])
min_date = min_date - pd.Timedelta(min_date.minute, 'minutes')
index = pd.date_range(start=min_date,
                      end=max(df_vehicle_rime['end_date']),
                      freq='1h')
 
columns = ['prediction']
 
df_prediction = pd.DataFrame(index=index, columns=columns).fillna(0)
df_prediction

Unnamed: 0,prediction
2020-11-04 01:00:00,0
2020-11-04 02:00:00,0
2020-11-04 03:00:00,0
2020-11-04 04:00:00,0
2020-11-04 05:00:00,0
...,...
2023-04-26 03:00:00,0
2023-04-26 04:00:00,0
2023-04-26 05:00:00,0
2023-04-26 06:00:00,0


In [8]:
# On change la valeur de prédiction à 1 pour tous les intervalles cités dans les MAA par des RIME VEHICLE
for date_index in tqdm(df_prediction.index):
    for begin_date, end_date in zip(df_vehicle_rime['begin_date'], df_vehicle_rime['end_date']):
        if date_index >= begin_date and date_index <= end_date:
            df_prediction.loc[date_index, 'prediction'] = 1

df_prediction

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 21679/21679 [00:08<00:00, 2660.89it/s]


Unnamed: 0,prediction
2020-11-04 01:00:00,0
2020-11-04 02:00:00,1
2020-11-04 03:00:00,1
2020-11-04 04:00:00,1
2020-11-04 05:00:00,1
...,...
2023-04-26 03:00:00,1
2023-04-26 04:00:00,1
2023-04-26 05:00:00,1
2023-04-26 06:00:00,1


In [28]:
df_prediction.index[1000]

Timestamp('2020-12-15 17:00:00', freq='H')

## Fichier d'observation

In [37]:
df_obs = pd.read_csv(path_obs / "obs_gelee_blanche.csv")
df_obs['date'] = pd.to_datetime(df_obs['date'], format="%Y-%m-%d %H:%M:%S")
df_obs = df_obs.set_index('date')
df_obs

Unnamed: 0_level_0,gelee_blanche_vehicule_presence
date,Unnamed: 1_level_1
2011-04-12 13:00:00,0
2011-04-12 14:00:00,0
2011-04-12 15:00:00,0
2011-04-12 16:00:00,0
2011-04-12 17:00:00,0
...,...
2023-06-06 09:00:00,0
2023-06-06 10:00:00,0
2023-06-06 11:00:00,0
2023-06-06 12:00:00,0


## Fusion des observations et des prévisions

In [39]:
df_obs_pred = df_prediction.join(df_obs['gelee_blanche_vehicule_presence'])
df_obs_pred = df_obs_pred.dropna()
df_obs_pred['gelee_blanche_vehicule_presence'] = df_obs_pred['gelee_blanche_vehicule_presence'].astype('int')
df_obs_pred

Unnamed: 0,prediction,gelee_blanche_vehicule_presence
2020-11-04 01:00:00,0,0
2020-11-04 02:00:00,1,0
2020-11-04 03:00:00,1,0
2020-11-04 04:00:00,1,0
2020-11-04 05:00:00,1,0
...,...,...
2023-04-26 03:00:00,1,0
2023-04-26 04:00:00,1,0
2023-04-26 05:00:00,1,0
2023-04-26 06:00:00,1,0


## Calcul des scores

In [50]:
from sklearn.metrics import (confusion_matrix , classification_report,
                             accuracy_score, recall_score, f1_score,
                             precision_score)


accuracy = accuracy_score(df_obs_pred['gelee_blanche_vehicule_presence'],
                          df_obs_pred['prediction'])
recall = recall_score(df_obs_pred['gelee_blanche_vehicule_presence'],
                      df_obs_pred['prediction'])
precision = precision_score(df_obs_pred['gelee_blanche_vehicule_presence'],
                            df_obs_pred['prediction'])
f1 = f1_score(df_obs_pred['gelee_blanche_vehicule_presence'],
              df_obs_pred['prediction'])


print("-------------------------------------------------------")
print(classification_report(df_obs_pred['gelee_blanche_vehicule_presence'],
                            df_obs_pred['prediction']))
print("-------------------------------------------------------")
print(f"Accuracy  : {accuracy:.2}\n" +
      f"Recall    : {recall:.2}\n" +
      f"Precision : {precision:.2}\n" +
      f"F1        : {f1:.2}")
print("-------------------------------------------------------")
print(confusion_matrix(df_obs_pred['gelee_blanche_vehicule_presence'],
                       df_obs_pred['prediction']))

-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.99      0.95      0.97     20399
           1       0.52      0.90      0.66      1221

    accuracy                           0.95     21620
   macro avg       0.76      0.93      0.81     21620
weighted avg       0.97      0.95      0.95     21620

-------------------------------------------------------
Accuracy  : 0.95
Recall    : 0.9
Precision : 0.52
F1        : 0.66
-------------------------------------------------------
[[19378  1021]
 [  122  1099]]
