In [230]:
from siphon.simplewebservice.ndbc import NDBC
import time
import requests, pandas as pd
from datetime import datetime, timedelta, timezone, time
import os
from pathlib import Path
import meteomatics.api as api
import matplotlib.pyplot as plt
import pytz, folium

Get Data from APIs

Marine API

In [231]:
# Coordonnées de Buoy_51101
latitude = 24.359
longitude = -162.081

# Créer la carte centrée sur les coordonnées
m = folium.Map(location=[latitude, longitude], zoom_start=6)

# Ajouter un marqueur sur Buoy_51101
folium.Marker([latitude, longitude], popup='Buoy_51101').add_to(m)

# Sauvegarder la carte dans un fichier HTML
m.save('buoy_51101_map.html')

In [232]:
df_marine = NDBC.realtime_observations('51101')
df_marine.head()

Unnamed: 0,wind_direction,wind_speed,wind_gust,wave_height,dominant_wave_period,average_wave_period,dominant_wave_direction,pressure,air_temperature,water_temperature,dewpoint,visibility,3hr_pressure_tendency,water_level_above_mean,time
0,220.0,3.0,5.0,,,,,1015.8,22.7,24.2,,,,,2025-02-06 15:10:00+00:00
1,220.0,3.0,5.0,,,,,1015.9,22.6,24.2,,,-0.9,,2025-02-06 15:00:00+00:00
2,220.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.8,22.6,24.2,,,,,2025-02-06 14:50:00+00:00
3,230.0,3.0,5.0,2.2,,9.8,48.0,1015.9,22.7,24.2,,,,,2025-02-06 14:40:00+00:00
4,230.0,4.0,6.0,,,,,1015.9,22.7,24.2,,,,,2025-02-06 14:30:00+00:00


Meteo API

In [233]:
import json

def read_credentials():
    with open('credentials.json', 'r') as file:
        credentials = json.load(file)
    
    meteo_username = credentials.get('username')
    meteo_password = credentials.get('password')
    
    return meteo_username, meteo_password


In [234]:
# Définir les paramètres
parameters = [
        'wind_speed_10m:ms',	
        'wind_dir_10m:d',
        'wind_gusts_10m_1h:ms',
        't_2m:C',
        't_max_2m_24h:C',
        't_min_2m_24h:C',
        'msl_pressure:hPa',
        'precip_1h:mm'
    ]

# Définir les coordonnées
coordinates = [(24.359, -162.081)]

# Définir le modèle
model = 'mix'
# Début URL de l'API
DEFAULT_API_BASE_URL = "https://api.meteomatics.com"
enddate = datetime.now(timezone.utc).replace(minute=0, second=0, microsecond=0)
startdate = enddate - timedelta(days=1)
interval = timedelta(minutes=10)

# Fonction de requête API Météo
def meteo_api_request(parameters, coordinates, model, DEFAULT_API_BASE_URL, startdate, enddate, interval):

    # Appel de la fonction read_credentials pour obtenir les informations d'identification
    meteo_username, meteo_password = read_credentials()
    
    # Récupérer les données
    df_meteo = api.query_time_series(coordinates, startdate, enddate, interval, parameters, meteo_username, meteo_password, model=model)
    df_meteo.reset_index(inplace=True)
    return df_meteo

df_meteo=meteo_api_request(parameters, coordinates, model, DEFAULT_API_BASE_URL, startdate, enddate, interval)
df_meteo = df_meteo.rename(columns={'validdate': 'time'})
df_meteo.head()

Unnamed: 0,lat,lon,time,wind_speed_10m:ms,wind_dir_10m:d,wind_gusts_10m_1h:ms,t_2m:C,t_max_2m_24h:C,t_min_2m_24h:C,msl_pressure:hPa,precip_1h:mm
0,24.359,-162.081,2025-02-05 15:00:00+00:00,4.0,103.9,4.2,21.2,21.6,20.8,1016.6,0.0
1,24.359,-162.081,2025-02-05 15:10:00+00:00,3.0,106.8,4.2,21.2,21.6,20.8,1016.6,0.0
2,24.359,-162.081,2025-02-05 15:20:00+00:00,3.0,110.1,4.2,21.2,21.6,20.9,1016.7,0.0
3,24.359,-162.081,2025-02-05 15:30:00+00:00,3.0,113.7,4.2,21.1,21.6,20.9,1016.8,0.0
4,24.359,-162.081,2025-02-05 15:40:00+00:00,2.0,117.7,4.2,21.2,21.6,20.9,1016.8,0.0


In [235]:
df_marine.head()

Unnamed: 0,wind_direction,wind_speed,wind_gust,wave_height,dominant_wave_period,average_wave_period,dominant_wave_direction,pressure,air_temperature,water_temperature,dewpoint,visibility,3hr_pressure_tendency,water_level_above_mean,time
0,220.0,3.0,5.0,,,,,1015.8,22.7,24.2,,,,,2025-02-06 15:10:00+00:00
1,220.0,3.0,5.0,,,,,1015.9,22.6,24.2,,,-0.9,,2025-02-06 15:00:00+00:00
2,220.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.8,22.6,24.2,,,,,2025-02-06 14:50:00+00:00
3,230.0,3.0,5.0,2.2,,9.8,48.0,1015.9,22.7,24.2,,,,,2025-02-06 14:40:00+00:00
4,230.0,4.0,6.0,,,,,1015.9,22.7,24.2,,,,,2025-02-06 14:30:00+00:00


Check first rows

In [236]:
def convert_to_datetime(value):
    try:
        # Spécifier dayfirst=True pour éviter le warning avec le format %d/%m/%Y
        return pd.to_datetime(value, errors='coerce', dayfirst=True).strftime('%d/%m/%Y-%H:%M') if pd.notna(value) else None
    except Exception as e:
        return None  # Retourner None si la conversion échoue


# Appliquer la fonction sur la colonne 'time' avec apply
df_marine['time'] = df_marine['time'].apply(convert_to_datetime)
df_meteo['time'] = df_meteo['time'].apply(convert_to_datetime)

In [237]:
meteo_null_counts = df_meteo.isna().sum()
meteo_null_counts

lat                     0
lon                     0
time                    0
wind_speed_10m:ms       0
wind_dir_10m:d          0
wind_gusts_10m_1h:ms    0
t_2m:C                  0
t_max_2m_24h:C          0
t_min_2m_24h:C          0
msl_pressure:hPa        0
precip_1h:mm            0
dtype: int64

Rename Columns + Delete Useless Ones

In [238]:
try:
    df_marine.rename(columns={'wind_direction': 'Wind Dir (°)', 'wind_speed':'Wind Speed (ms)',
                          'wind_gust':'Wind Gusts 1h (ms)','wave_height':'Wave Height (m)','dominant_wave_period':'Dominant Wave Period (s)',
                          'average_wave_period':'Avg Wave Period (s)', 'dominant_wave_direction': 'Dominant Wave Direction',
                          'air_temperature':'Air Temperature','water_temperature':'Water Temperature','time': 'Datetime','pressure':'Pressure'}, inplace=True)


    df_marine = df_marine.drop(columns={'dewpoint', 'visibility','3hr_pressure_tendency','water_level_above_mean'})

    df_meteo.rename(columns = {'time': 'Datetime'}, inplace=True)
except KeyError as e:
    raise(e)

In [239]:
df_meteo.head()

Unnamed: 0,lat,lon,Datetime,wind_speed_10m:ms,wind_dir_10m:d,wind_gusts_10m_1h:ms,t_2m:C,t_max_2m_24h:C,t_min_2m_24h:C,msl_pressure:hPa,precip_1h:mm
0,24.359,-162.081,05/02/2025-15:00,4.0,103.9,4.2,21.2,21.6,20.8,1016.6,0.0
1,24.359,-162.081,05/02/2025-15:10,3.0,106.8,4.2,21.2,21.6,20.8,1016.6,0.0
2,24.359,-162.081,05/02/2025-15:20,3.0,110.1,4.2,21.2,21.6,20.9,1016.7,0.0
3,24.359,-162.081,05/02/2025-15:30,3.0,113.7,4.2,21.1,21.6,20.9,1016.8,0.0
4,24.359,-162.081,05/02/2025-15:40,2.0,117.7,4.2,21.2,21.6,20.9,1016.8,0.0


In [240]:
print(f"Marine Data Rows Amount : {df_marine.shape[0]}")
marine_null_counts = df_marine.isnull().sum()

print(f"Marine Data Null Values Amount: \n{marine_null_counts}")


Marine Data Rows Amount : 6506
Marine Data Null Values Amount: 
Wind Dir (°)                  59
Wind Speed (ms)                4
Wind Gusts 1h (ms)             4
Wave Height (m)             2894
Dominant Wave Period (s)    4363
Avg Wave Period (s)         2894
Dominant Wave Direction     2894
Pressure                      13
Air Temperature               33
Water Temperature            125
Datetime                       0
dtype: int64


In [241]:
def handle_null_values(df):
    # Itérer sur les colonnes du DataFrame
    for column in df.columns:
        # Si la colonne est numérique, on remplace les valeurs nulles par la médiane
        if pd.api.types.is_numeric_dtype(df[column]):
            df[column] = df[column].fillna(df[column].median())  # Remplacer sans inplace=True
        # Sinon, on supprime les lignes où il y a des valeurs nulles dans cette colonne
        else:
            df.dropna(subset=[column], inplace=True)
    
    return df  # Retourner le DataFrame après traitement

# Exemple d'utilisation
df_marine = handle_null_values(df_marine)
df_meteo = handle_null_values(df_meteo)

# Vérifier le résultat
df_marine.isnull().sum() # Afficher les valeurs nulles restantes
df_meteo.isnull().sum() # Afficher les valeurs nulles restantes

lat                     0
lon                     0
Datetime                0
wind_speed_10m:ms       0
wind_dir_10m:d          0
wind_gusts_10m_1h:ms    0
t_2m:C                  0
t_max_2m_24h:C          0
t_min_2m_24h:C          0
msl_pressure:hPa        0
precip_1h:mm            0
dtype: int64

In [242]:
df_meteo.head()

Unnamed: 0,lat,lon,Datetime,wind_speed_10m:ms,wind_dir_10m:d,wind_gusts_10m_1h:ms,t_2m:C,t_max_2m_24h:C,t_min_2m_24h:C,msl_pressure:hPa,precip_1h:mm
0,24.359,-162.081,05/02/2025-15:00,4.0,103.9,4.2,21.2,21.6,20.8,1016.6,0.0
1,24.359,-162.081,05/02/2025-15:10,3.0,106.8,4.2,21.2,21.6,20.8,1016.6,0.0
2,24.359,-162.081,05/02/2025-15:20,3.0,110.1,4.2,21.2,21.6,20.9,1016.7,0.0
3,24.359,-162.081,05/02/2025-15:30,3.0,113.7,4.2,21.1,21.6,20.9,1016.8,0.0
4,24.359,-162.081,05/02/2025-15:40,2.0,117.7,4.2,21.2,21.6,20.9,1016.8,0.0


In [243]:
df_marine.head()

Unnamed: 0,Wind Dir (°),Wind Speed (ms),Wind Gusts 1h (ms),Wave Height (m),Dominant Wave Period (s),Avg Wave Period (s),Dominant Wave Direction,Pressure,Air Temperature,Water Temperature,Datetime
0,220.0,3.0,5.0,2.9,13.0,8.3,308.0,1015.8,22.7,24.2,06/02/2025-15:10
1,220.0,3.0,5.0,2.9,13.0,8.3,308.0,1015.9,22.6,24.2,06/02/2025-15:00
2,220.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.8,22.6,24.2,06/02/2025-14:50
3,230.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.9,22.7,24.2,06/02/2025-14:40
4,230.0,4.0,6.0,2.9,13.0,8.3,308.0,1015.9,22.7,24.2,06/02/2025-14:30


Merging Dataframes

In [246]:
# Effectuer le merge entre df_marine et df_meteo sur la colonne 'time'
df_result = pd.merge(df_marine, df_meteo, on='Datetime', how='inner')  # 'inner' pour ne garder que les lignes avec des 'time' correspondants

# Afficher le nombre de lignes de la DataFrame résultante
num_rows = df_result.shape[0]
print(f"Nombre de lignes dans la DataFrame résultante : {num_rows}")
df_result.head()

Nombre de lignes dans la DataFrame résultante : 143


Unnamed: 0,Wind Dir (°),Wind Speed (ms),Wind Gusts 1h (ms),Wave Height (m),Dominant Wave Period (s),Avg Wave Period (s),Dominant Wave Direction,Pressure,Air Temperature,Water Temperature,...,lat,lon,wind_speed_10m:ms,wind_dir_10m:d,wind_gusts_10m_1h:ms,t_2m:C,t_max_2m_24h:C,t_min_2m_24h:C,msl_pressure:hPa,precip_1h:mm
0,220.0,3.0,5.0,2.9,13.0,8.3,308.0,1015.9,22.6,24.2,...,24.359,-162.081,3.2,246.8,4.7,22.8,23.2,21.1,1015.6,0.0
1,220.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.8,22.6,24.2,...,24.359,-162.081,3.1,242.1,4.6,22.7,23.2,21.1,1015.4,0.0
2,230.0,3.0,5.0,2.2,13.0,9.8,48.0,1015.9,22.7,24.2,...,24.359,-162.081,3.1,237.1,4.6,22.7,23.2,21.1,1015.3,0.0
3,230.0,4.0,6.0,2.9,13.0,8.3,308.0,1015.9,22.7,24.2,...,24.359,-162.081,3.0,231.9,4.6,22.7,23.2,21.1,1015.1,0.0
4,230.0,3.0,5.0,1.9,13.0,9.1,41.0,1016.1,22.7,24.2,...,24.359,-162.081,3.0,226.6,4.6,22.7,23.2,21.1,1015.0,0.0


In [247]:
df_result.columns

Index(['Wind Dir (°)', 'Wind Speed (ms)', 'Wind Gusts 1h (ms)',
       'Wave Height (m)', 'Dominant Wave Period (s)', 'Avg Wave Period (s)',
       'Dominant Wave Direction', 'Pressure', 'Air Temperature',
       'Water Temperature', 'Datetime', 'lat', 'lon', 'wind_speed_10m:ms',
       'wind_dir_10m:d', 'wind_gusts_10m_1h:ms', 't_2m:C', 't_max_2m_24h:C',
       't_min_2m_24h:C', 'msl_pressure:hPa', 'precip_1h:mm'],
      dtype='object')

In [257]:
columns_to_drop = ['lat','lon']

def drop_columns(df, columns_to_drop):
    for col in columns_to_drop:
        if col in df.columns:  # Vérifier si la colonne existe
            df = df.drop(columns=col)  # Supprimer la colonne
        else:
            # Retourner un message si la colonne n'est pas trouvée
            return f" '{col}' is not found in the DataFrame"
    
    return df  # Retourner le DataFrame après avoir supprimé toutes les colonnes

df = drop_columns(df_result, columns_to_drop)

df_2 = drop_columns(df, columns_to_drop)

df_2

" 'lat' is not found in the DataFrame"

In [245]:
print(df_marine.isnull().sum(),df_marine.shape)

Wind Dir (°)                0
Wind Speed (ms)             0
Wind Gusts 1h (ms)          0
Wave Height (m)             0
Dominant Wave Period (s)    0
Avg Wave Period (s)         0
Dominant Wave Direction     0
Pressure                    0
Air Temperature             0
Water Temperature           0
Datetime                    0
dtype: int64 (6506, 11)
