In [24]:
%pip install -q -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
from imports import *
from functions import *

Initialize PostgreSQL Connection

In [2]:
path_postgresql_creds = r"C:\Users\f.gionnane\Documents\Data Engineering\Credentials\postgresql_creds.json"

with open(path_postgresql_creds, 'r') as file:
    content = json.load(file)
    user = content["user"]
    password = content["password"]
    host = content["host"]
    port = content["port"]

db = "Oceanography_ML_Project"
schema = "Bronze"

# Créer l'engine PostgreSQL
engine = create_engine(f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{db}")
conn = engine.connect()

Get Available Stations ID List

Filter Dysfunctional Stations

In [3]:
# get all stations and some metadata as a Pandas DataFrame
stations_df = api.stations()
# parse the response as a dictionary
stations_df = api.stations(as_df=True)

print(len(stations_df))

146


In [4]:
access_error_url_list = []

# Liste de mots à rechercher dans la colonne "Remark"
blacklist = ["Failure", "ceased", "failed", "recovered", "stopped", 'adrift']
stations_id_set = set()

print(f'Avant Filtre: {stations_df.shape[0]}')

# Liste pour collecter les indices à supprimer
indices_a_supprimer = []

# Parcours des lignes de la DataFrame
for idx, row in stations_df.iterrows():
    station_id = row["Station"]
    station_Location = row["Hull No./Config and Location"]  # Extraire la valeur de la cellule pour chaque ligne
    
    # Extraction du nom de la station si un ")" est trouvé
    if ")" in station_Location:
        station_name = station_Location.split(')')[1].rstrip(" )")  # On enlève l'espace et la parenthèse en fin de chaîne
    else:
        station_name = station_Location.strip()  # Si pas de ")", on garde toute la chaîne

    station_name = station_name.rstrip(" )").replace("(", "").replace(")", "").strip()

    # Nettoyage final pour enlever toute parenthèse ou espace en fin de nom
    station_name = station_name.rstrip(" )")

    # Vérifier si "Remark" n'est pas NaN et si un des éléments de blacklist est dans "Remark"
    if isinstance(row["Remark"], str) and any(blacklist_word.lower() in row["Remark"].lower() for blacklist_word in blacklist):
        # Ajouter l'index à la liste
        indices_a_supprimer.append(idx)
    else:
        try:
            # Effectuer l'appel API
            buoy_data = NDBC.realtime_observations(station_id)
            
            # Vérifier si les données de l'API sont valides (si le DataFrame n'est pas vide)
            if not buoy_data.empty:
                print(f'Buoy {station_id}: {station_name} passed the Remarks and API Test!')
                stations_id_set.add(station_id)
            else:
                print(f'Buoy {station_id}: {station_name} did not return valid data. Deleting.')
                indices_a_supprimer.append(idx)

        except Exception as e:
            # Si l'erreur est un HTTPError, on peut essayer d'afficher le code d'erreur
            if isinstance(e, HTTPError):
                print(f'Buoy {station_id}: {station_name} API Call returned {e.code}. Deleting.')
            else:
                # Dans tous les autres cas d'exception, on affiche le message d'erreur complet
                print(f'Buoy {station_id}: {station_name} API Call encountered an error. Deleting.')
                
                if str(e).startswith("Error accessing"):
                    url = f"https://www.ndbc.noaa.gov/station_page.php?station={station_id}"
                    access_error_url_list.append([station_id, url])
            # Ajouter l'index à la liste en cas d'erreur
            indices_a_supprimer.append(idx)

# Supprimer les lignes après la boucle
stations_df.drop(index=indices_a_supprimer, inplace=True)

print(f'Après Filtre: {stations_df.shape[0]}')

Avant Filtre: 146
Buoy 41004: Edisto passed the Remarks and API Test!
Buoy 41008: Grays Reef passed the Remarks and API Test!
Buoy 41010: Canaveral East passed the Remarks and API Test!
Buoy 41013: Frying Pan Shoals, Nc buoy passed the Remarks and API Test!
Buoy 41040: North Equatorial One passed the Remarks and API Test!
Buoy 41043: Ne Puerto Rico passed the Remarks and API Test!
Buoy 41044: Ne St Martin passed the Remarks and API Test!
Buoy 41049: South Bermuda passed the Remarks and API Test!
Buoy 42001: Mid Gulf passed the Remarks and API Test!
Buoy 42002: West Gulf passed the Remarks and API Test!
Buoy 42003: East Gulf API Call encountered an error. Deleting.
Buoy 42012: Orange Beach, Al passed the Remarks and API Test!
Buoy 42019: Freeport, Tx passed the Remarks and API Test!
Buoy 42020: Corpus Christi, Tx passed the Remarks and API Test!
Buoy 42035: Galveston, Tx passed the Remarks and API Test!
Buoy 42036: West Tampa passed the Remarks and API Test!
Buoy 42056: Yucatan Basin pa

In [5]:
for item in access_error_url_list:
    print(f"Access error for buoy {item[0]}")
    print(f"{item[1]}\n")

Access error for buoy 42003
https://www.ndbc.noaa.gov/station_page.php?station=42003

Access error for buoy KTNF1
https://www.ndbc.noaa.gov/station_page.php?station=KTNF1

Access error for buoy PTGC1
https://www.ndbc.noaa.gov/station_page.php?station=PTGC1

Access error for buoy SMKF1
https://www.ndbc.noaa.gov/station_page.php?station=SMKF1



Testing get_station_metadata and parse_buoy_json Functions

In [6]:
# Parcourir les lignes du DataFrame
for idx, row in stations_df.iterrows():
    station_id_from_df = row["Station"]  # Renommer la variable ici
    metadata = get_station_metadata(station_id_from_df)
    print(f"Metadata pour la station {station_id_from_df}: {metadata}")  # Vérification de la valeur de metadata
    Name = metadata["Name"]
    # Changer le nom de la variable retournée par parse_buoy_json
    parsed_station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata)
    

Metadata pour la station 41008: {'Watch circle radius': '69 yards', 'Water depth': '16 m', 'Sea temp depth': '2 m below water line', 'Barometer elevation': '2.4 m above mean sea level', 'Anemometer height': '3.8 m above site elevation', 'Air temp height': '3.4 m above site elevation', 'Site elevation': 'sea level', 'Location': '31.400 N 80.866 W (31°24\'0" N 80°51\'59" W)', 'Statation Type': 'Owned and maintained by National Data Buoy Center, 3-meter discus buoy, SCOOP payload', 'Name': 'Station 41008 (LLNR 833) - GRAYS REEF - 40 NM Southeast of Savannah, GA'}
Metadata pour la station 41044: {'Watch circle radius': '5441 yards', 'Water depth': '5419 m', 'Sea temp depth': '2 m below water line', 'Barometer elevation': '2.4 m above mean sea level', 'Anemometer height': '3.8 m above site elevation', 'Air temp height': '3.4 m above site elevation', 'Site elevation': 'sea level', 'Location': '21.582 N 58.630 W (21°34\'57" N 58°37\'48" W)', 'Statation Type': 'Owned and maintained by National

In [7]:
for key, value in metadata.items():
    print(key)

Barometer elevation
Anemometer height
Air temp height
Site elevation
Location
Statation Type
Name


In [8]:
# list_check=[]

# stations_sans_zone = [
#     "44020",
#     "46072",
#     "BURL1",
#     "FFIA2",
#     "LONF1",
#     "MDRM1",
#     "MRKA2",
#     "POTA2",
#     "SANF1",
#     "SBIO1"
# ]

# # Parcourir les lignes du DataFrame
# for id in stations_sans_zone:
#     metadata = get_station_metadata(id)
#     Name = metadata["Name"]

#     station_id, station_zone, lat_buoy, lon_buoy, marine_data_table_name = parse_buoy_json(metadata)
#     message = f"Station {station_id}: \nNom: {Name}\nZone: {station_zone}"
#     list_check.append(message)

# for msg in list_check:
#     print(f"{msg}\n")


Build Dictionary of Stations

In [9]:
# Dictionnaire pour stocker les DataFrames, clé : ID de la bouée, valeur : DataFrame
buoy_datas = {}
buoy_list = []

# Parcours de chaque bouée dans stations_df
for index, row in stations_df.iterrows():
    buoy_id = row['Station']

    metadata = get_station_metadata(buoy_id)  # Utilise buoy_id au lieu de station_id_from_df
    # Changer le nom de la variable retournée par parse_buoy_json
    parsed_station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata)

    # Initialiser le dictionnaire pour chaque bouée s'il n'est pas encore créé
    if buoy_id not in buoy_datas:
        buoy_datas[buoy_id] = {}

    # Ajouter les informations de la bouée
    buoy_datas[buoy_id]["Zone"] = station_zone
    buoy_datas[buoy_id]["Lat"] = lat_buoy
    buoy_datas[buoy_id]["Lon"] = lon_buoy

    # Ajouter la bouée à la liste
    buoy_list.append(buoy_id)

# Affichage du nombre de bouées réussies et échouées
print(f"Nombre de bouées traitées : {len(buoy_datas)}\n")

# Afficher le contenu de buoy_datas
buoy_datas

Nombre de bouées traitées : 39



{'41008': {'Zone': 'grays reef', 'Lat': '31.40N', 'Lon': '80.87W'},
 '41044': {'Zone': 'ne st martin', 'Lat': '21.58N', 'Lon': '58.63W'},
 '42001': {'Zone': 'mid gulf', 'Lat': '25.93N', 'Lon': '89.66W'},
 '42002': {'Zone': 'west gulf', 'Lat': '25.95N', 'Lon': '93.78W'},
 '42012': {'Zone': 'orange beach', 'Lat': '30.06N', 'Lon': '87.55W'},
 '42036': {'Zone': 'west tampa', 'Lat': '28.50N', 'Lon': '84.50W'},
 '42056': {'Zone': 'yucatan basin', 'Lat': '19.82N', 'Lon': '84.98W'},
 '42058': {'Zone': 'central caribbean', 'Lat': '14.51N', 'Lon': '75.15W'},
 '44020': {'Zone': 'nantucket sound', 'Lat': '41.50N', 'Lon': '70.28W'},
 '44025': {'Zone': 'long island', 'Lat': '40.26N', 'Lon': '73.17W'},
 '44027': {'Zone': 'jonesport, me', 'Lat': '44.28N', 'Lon': '67.30W'},
 '44065': {'Zone': 'new york harbor entrance',
  'Lat': '40.37N',
  'Lon': '73.70W'},
 '46001': {'Zone': 'western gulf of alaska',
  'Lat': '56.30N',
  'Lon': '148.03W'},
 '46006': {'Zone': 'southeast papa', 'Lat': '40.76N', 'Lon': 

In [10]:
id_problem = 46072
metadata = get_station_metadata(id_problem)  # Utilise buoy_id au lieu de station_id_from_df
    # Changer le nom de la variable retournée par parse_buoy_json
parsed_station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata)

print(f'{metadata}\n {lat_buoy}\n {lon_buoy}')

{'Watch circle radius': '3629 yards', 'Water depth': '3589 m', 'Sea temp depth': '2 m below water line', 'Barometer elevation': '2.4 m above mean sea level', 'Anemometer height': '3.8 m above site elevation', 'Air temp height': '3.4 m above site elevation', 'Site elevation': 'sea level', 'Location': '51.645 N 172.145 W (51°38\'42" N 172°8\'42" W)', 'Statation Type': 'Owned and maintained by National Data Buoy Center, 3-meter discus buoy, SCOOP payload', 'Name': 'Station 46072 (LLNR 27510) - CENTRAL ALEUTIANS 230 NM SW Dutch Harbor'}
 51.65N
 172.15W


In [11]:
# Convertir le dictionnaire en DataFrame, en utilisant 'buoy_id' comme index
df_buoy_datas = pd.DataFrame.from_dict(buoy_datas, orient='index')

# Réinitialiser l'index et renommer la colonne
df_buoy_datas = df_buoy_datas.reset_index(drop=False)
df_buoy_datas.rename(columns={"index": "Station ID"}, inplace=True)

table_name="buoy_datas"
# Charger les données dans la table
try:
    load_data_in_table(conn=conn, schema=schema, df=df_buoy_datas, table_name=table_name, key_column="Station ID")
    print(f"Données chargées avec succès dans la table {table_name}.")
except Exception as e:
    print(f"Erreur lors du chargement des données : {e}")

# Affichage du DataFrame
df_buoy_datas.head()

No new data to insert.

No new data was inserted.
Rows in table before insertion: 39
Rows inserted: 0
Rows in table after insertion: 39

Données chargées avec succès dans la table buoy_datas.


Unnamed: 0,Station ID,Zone,Lat,Lon
0,41008,grays reef,31.40N,80.87W
1,41044,ne st martin,21.58N,58.63W
2,42001,mid gulf,25.93N,89.66W
3,42002,west gulf,25.95N,93.78W
4,42012,orange beach,30.06N,87.55W


Map Visualization

In [12]:
list_coords=[]

for id in buoy_list:
    metadata = get_station_metadata(id)
    
    station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata)
    lat_buoy, lon_buoy = convert_coordinates(lat_buoy,lon_buoy)
    coords = [station_id, station_zone, lat_buoy, lon_buoy]
    list_coords.append(coords)

def barycentre(coords):
    x_coords = [item[2] for item in coords]
    y_coords = [item[3] for item in coords]

    x_barycentre = sum(x_coords) / len(coords)
    y_barycentre = sum(y_coords) / len(coords)

    return [x_barycentre, y_barycentre]

# Exemple d'utilisation
coords = [[1, 2], [3, 4], [5, 6], [7, 8]]
centre = barycentre(list_coords)
print(list_coords)
print("Coordonnées du barycentre :", centre)

[['41008', 'grays reef', 31.4, -80.87], ['41044', 'ne st martin', 21.58, -58.63], ['42001', 'mid gulf', 25.93, -89.66], ['42002', 'west gulf', 25.95, -93.78], ['42012', 'orange beach', 30.06, -87.55], ['42036', 'west tampa', 28.5, -84.5], ['42056', 'yucatan basin', 19.82, -84.98], ['42058', 'central caribbean', 14.51, -75.15], ['44020', 'nantucket sound', 41.5, -70.28], ['44025', 'long island', 40.26, -73.17], ['44027', 'jonesport, me', 44.28, -67.3], ['44065', 'new york harbor entrance', 40.37, -73.7], ['46001', 'western gulf of alaska', 56.3, -148.03], ['46006', 'southeast papa', 40.76, -137.38], ['46014', 'pt arena', 39.23, -123.98], ['46022', 'eel river', 40.72, -124.54], ['46025', 'santa monica basin', 33.76, -119.05], ['46027', 'st georges', 41.84, -124.38], ['46029', 'columbia river bar', 46.16, -124.49], ['46053', 'east santa barbara', 34.24, -119.84], ['46069', 'south santa rosa', 33.68, -120.21], ['46071', 'western aleutians', 51.04, 179.76], ['46072', 'central aleutians 230 

In [13]:
map = folium.Map(location=(centre[0],centre[1]), 
                 tiles="Esri.WorldImagery", 
                 zoom_start=2.5, attr="Données fournies par Esri")
for loc in list_coords:
    lat = loc[2]
    lon = loc[3]
    station_id = loc[0]
    station_zone = loc[1]
    folium.Marker(
    location=[lat, lon],
    popup=f"Station ID: {station_id}\n\nZone: {station_zone}\nLat: {lat}\n\nLon: {lon}",
    icon=folium.Icon(icon="cloud"),
).add_to(map)

map

Columns Check Tests

In [14]:
choice = random.choice(buoy_list)
choice

'51002'

In [15]:
df_marine_test = NDBC.realtime_observations(choice)
print(f'{df_marine_test.shape[0]} rows \n\n{df_marine_test.isna().sum()}')
df_marine_test.head(1)

6510 rows 

wind_direction               13
wind_speed                   13
wind_gust                    13
wave_height                2770
dominant_wave_period       4351
average_wave_period        2774
dominant_wave_direction    2774
pressure                     13
air_temperature              39
water_temperature          1713
dewpoint                     39
visibility                 6510
3hr_pressure_tendency      5419
water_level_above_mean     6510
time                          0
dtype: int64


Unnamed: 0,wind_direction,wind_speed,wind_gust,wave_height,dominant_wave_period,average_wave_period,dominant_wave_direction,pressure,air_temperature,water_temperature,dewpoint,visibility,3hr_pressure_tendency,water_level_above_mean,time
0,70.0,10.0,12.0,2.3,,6.0,80.0,1017.1,24.4,,20.8,,,,2025-03-22 16:10:00+00:00


Check test API Open-Meteo

In [16]:
df_meteo_test = meteo_api_request(coordinates=[12, 23])
print(f'{df_meteo_test.shape[0]} rows \n{df_meteo_test.isna().sum()}')
df_meteo_test.head(1)

2376 rows 
date                        0
temperature_2m            114
relative_humidity_2m      114
dew_point_2m              114
precipitation             114
rain                      114
showers                   114
pressure_msl              114
surface_pressure          114
cloud_cover               114
cloud_cover_low           114
cloud_cover_mid           114
cloud_cover_high          114
visibility                114
wind_speed_10m            114
soil_temperature_0cm      370
soil_moisture_0_to_1cm    370
is_day                      0
dtype: int64


Unnamed: 0,date,temperature_2m,relative_humidity_2m,dew_point_2m,precipitation,rain,showers,pressure_msl,surface_pressure,cloud_cover,cloud_cover_low,cloud_cover_mid,cloud_cover_high,visibility,wind_speed_10m,soil_temperature_0cm,soil_moisture_0_to_1cm,is_day
0,2024-12-20 00:00:00+00:00,,,,,,,,,,,,,,,,,0.0


In [17]:
metadata_extracted = get_station_metadata(choice)
metadata_extracted

{'Watch circle radius': '5029 yards',
 'Water depth': '4979 m',
 'Sea temp depth': '1.5 m below water line',
 'Barometer elevation': '2.7 m above mean sea level',
 'Anemometer height': '4.1 m above site elevation',
 'Air temp height': '3.7 m above site elevation',
 'Site elevation': 'sea level',
 'Location': '17.070 N 157.755 W (17°4\'12" N 157°45\'18" W)',
 'Statation Type': 'Owned and maintained by National Data Buoy Center, 3-meter foam buoy, SCOOP payload',
 'Name': 'Station 51002 (LLNR 28005.1) - SOUTHWEST HAWAII - 215NM SSW of Hilo, HI'}

In [18]:
station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata_extracted)
print(f'{station_name}\n{station_id}\n{station_zone}\n{lat_buoy}\n{lon_buoy}\n')

West Point, Wa
51002
southwest hawaii
17.07N
157.75W



Big API Call Loop

In [None]:
# Définir le dossier où sauvegarder les fichiers
marine_output_dir = r"marine_tables"
meteo_output_dir = r"meteo_tables"
# S'assurer que le dossier existe (le créer s'il n'existe pas)
os.makedirs(marine_output_dir, exist_ok=True)
os.makedirs(meteo_output_dir, exist_ok=True)

dict_df = {}
count = 1
total = len(buoy_datas)

for buoy_id in buoy_datas:  # Boucle sur les bouées dans le dictionnaire
    dict_df[buoy_id] = {}

    # Récupérer les métadonnées de la bouée
    metadata_extracted = get_station_metadata(buoy_id)
    
    station_id, station_zone, lat_buoy, lon_buoy = parse_buoy_json(metadata_extracted)
    
    buoy_datas[buoy_id]["Station Name"] = station_name
    buoy_datas[buoy_id]["Station ID"] = station_id
    buoy_datas[buoy_id]["Zone"] = station_zone
    buoy_datas[buoy_id]["Lat"] = lat
    buoy_datas[buoy_id]["Lon"] = lon

    Bronze_Marine_Table_Name = f"station_{station_id}_marine_{station_zone}"
    Bronze_Marine_Table_Name = Bronze_Marine_Table_Name.replace('.', '_').replace('-', '_')

    Bronze_Meteo_Table_Name = f"station_{station_id}_meteo_{station_zone}"
    Bronze_Meteo_Table_Name = Bronze_Meteo_Table_Name.replace('.', '_').replace('-', '_')

    # Construire le chemin complet du fichier
    marine_csv_path = os.path.join(marine_output_dir, Bronze_Marine_Table_Name + ".csv")
    meteo_csv_path = os.path.join(meteo_output_dir, Bronze_Meteo_Table_Name + ".csv")

    duo = [Bronze_Marine_Table_Name, Bronze_Meteo_Table_Name]

    print(f"\n{'='*50}\nIteration: {count}/{total} | Bouée ID: {buoy_id}")
    print(f"{'='*50}")
    
    # NOAA API CALL (avec timeout)
    try:
        df_marine = NDBC.realtime_observations(buoy_id)  # Timeout après 30 secondes
        if df_marine is None or df_marine.empty:
            print(f"⚠️ Marine Data is empty for buoy {buoy_id}")
        else:
            df_marine["Station ID"] = buoy_id
            buoy_datas[buoy_id]["Marine Dataframe"] = df_marine
            print(f"🌊 Marine Data Successfully collected for buoy {buoy_id}")
            dict_df[buoy_id]["Marine DataFrame"] = df_marine
            df_marine.to_csv(marine_csv_path , mode='w', index=True, index_label="time")
            load_data_in_table(conn, schema, table_name=Bronze_Marine_Table_Name, df=df_marine, key_column="time")

    except Exception as e:
        print(f"❌ Failed to collect Marine Data for buoy {buoy_id}: \n{e}\n")
        
    # Open-Meteo API Call (avec timeout)
    try:
        df_meteo = meteo_api_request(coordinates=[lat, lon])  # Timeout après 30 secondes
        if df_meteo is None or df_meteo.empty:
            print(f"⚠️ Meteo Data is empty for buoy {buoy_id}")
        else:
            buoy_datas[buoy_id]["Meteo DataFrame"] = df_meteo
            print(f"🌦️ Meteo Data Successfully collected for buoy {buoy_id}")
            dict_df[buoy_id]["Meteo DataFrame"] = df_meteo
            df_meteo.to_csv(meteo_csv_path , mode='w', index=True, index_label="date")
            load_data_in_table(conn, schema, table_name=Bronze_Meteo_Table_Name, df=df_meteo, key_column="date")

    except Exception as e:
        print(f"⚠️ Bouée {buoy_id} : Erreur à l'insertion des données:\n{e}\n")

    count += 1

# Final summary
print('\n' + '='*50)
list_tables_info(conn=conn, schema=schema)
print('\n' + '='*50)

text = "Processus Bronze Du Projet Oceanography ML terminé !"
show_popup(text=text)


Iteration: 1/39 | Bouée ID: 41008
🌊 Marine Data Successfully collected for buoy 41008
New data inserted successfully!
Rows in table before insertion: 6515
Rows inserted: 29
Rows in table after insertion: 6544

🌦️ Meteo Data Successfully collected for buoy 41008
No new data to insert.

No new data was inserted.
Rows in table before insertion: 2376
Rows inserted: 0
Rows in table after insertion: 2376


Iteration: 2/39 | Bouée ID: 41044
🌊 Marine Data Successfully collected for buoy 41044
New data inserted successfully!
Rows in table before insertion: 6483
Rows inserted: 29
Rows in table after insertion: 6512

🌦️ Meteo Data Successfully collected for buoy 41044
No new data to insert.

No new data was inserted.
Rows in table before insertion: 2376
Rows inserted: 0
Rows in table after insertion: 2376


Iteration: 3/39 | Bouée ID: 42001
🌊 Marine Data Successfully collected for buoy 42001
New data inserted successfully!
Rows in table before insertion: 1336
Rows inserted: 29
Rows in table afte

In [25]:
df_marine.columns

Index(['wind_direction', 'wind_speed', 'wind_gust', 'wave_height',
       'dominant_wave_period', 'average_wave_period',
       'dominant_wave_direction', 'pressure', 'air_temperature',
       'water_temperature', 'dewpoint', 'visibility', '3hr_pressure_tendency',
       'water_level_above_mean', 'time', 'Station ID'],
      dtype='object')

In [26]:
df_meteo.columns

Index(['date', 'temperature_2m', 'relative_humidity_2m', 'dew_point_2m',
       'precipitation', 'rain', 'showers', 'pressure_msl', 'surface_pressure',
       'cloud_cover', 'cloud_cover_low', 'cloud_cover_mid', 'cloud_cover_high',
       'visibility', 'wind_speed_10m', 'soil_temperature_0cm',
       'soil_moisture_0_to_1cm', 'is_day'],
      dtype='object')

In [20]:
# def drop_schema_or_tables(conn, schema_name, drop_schema=False):
#     try:
#         if drop_schema:
#             # Supprimer le schéma si drop_schema est True
#             print(f"Dropping schema {schema_name}...")
#             conn.execute(text(f"DROP SCHEMA IF EXISTS \"{schema_name}\" CASCADE;"))
#             conn.commit()
#             print(f"Schema {schema_name} dropped successfully.")
#         else:
#             # Supprimer toutes les tables dans le schéma
#             print(f"Dropping all tables in schema {schema_name}...")
#             conn.execute(text(f"""
#                 DO $$ 
#                 DECLARE
#                     r RECORD;
#                 BEGIN
#                     FOR r IN 
#                         SELECT table_name 
#                         FROM information_schema.tables 
#                         WHERE table_schema = :schema_name
#                     LOOP
#                         EXECUTE 'DROP TABLE IF EXISTS "' || :schema_name || '".' || quote_ident(r.table_name) || ' CASCADE';
#                     END LOOP;
#                 END $$;
#             """), {'schema_name': schema_name})
#             conn.commit()
#             print(f"All tables in schema {schema_name} dropped successfully.")
#     except Exception as e:
#         print(f"An error occurred: {e}")
#         conn.rollback()
#         print("Transaction rolled back.")

# drop_schema_or_tables(conn=conn, schema_name=schema, drop_schema=False)

In [21]:
# def explore_dict(d, indent=0):
#     """ Fonction récursive pour afficher toute la structure du dictionnaire """
#     for key, value in d.items():
#         print(" " * indent + f"- {key}: {type(value)}")
#         if isinstance(value, dict):
#             explore_dict(value, indent + 4)  # Explorer récursivement avec une indentation
#         elif isinstance(value, list):
#             if len(value) > 0:
#                 print(" " * (indent + 4) + f"Liste ({len(value)} éléments), type du premier élément: {type(value[0])}")
#                 if isinstance(value[0], dict):
#                     explore_dict(value[0], indent + 8)  # Explorer si c'est une liste de dicts
#         else:
#             print(" " * (indent + 4) + f"Valeur: {value}")

# # Récupérer la première clé du dictionnaire
# first_key = next(iter(buoy_datas))
# print(f"Exploration de la première clé: {first_key}\n")

# # Exécuter la fonction sur le premier élément uniquement
# explore_dict({first_key: buoy_datas[first_key]})

In [22]:
# openmongo_creds = r'C:\Users\f.gionnane\Documents\Data Engineering\Credentials\mongo_creds.json'

# with open(openmongo_creds, 'r') as file:
#     content = json.load(file)
#     mongo_user = content["user"]
#     mongo_password = content["password"]
#     mongo_string = content["connection_string"]

# uri = mongo_string
# # Create a new client and connect to the server
# client = MongoClient(uri, server_api=ServerApi('1'))

# # Send a ping to confirm a successful connection
# try:
#     client.admin.command('ping')
#     print("Pinged your deployment. You successfully connected to MongoDB!")
# except Exception as e:
#     print(e)
#     print(f'{mongo_user}\n{mongo_password}\n{mongo_string}')

In [23]:
# Charger le fichier GeoJSON depuis l'URL
url = "https://gist.githubusercontent.com/jrrickard/8755532505a40f3b8317/raw/ecd98849d3a5f4502b773b986254f19af3b8d8fb/oceans.json"
geojson_data = requests.get(url).json()

# Créer une carte avec un fond satellite ESRI
m = folium.Map(location=[0, 0], zoom_start=3, tiles="Esri.WorldImagery")

# Ajouter le GeoJSON (les océans) à la carte
folium.GeoJson(geojson_data, name="Oceans").add_to(m)

# Fonction pour générer des coordonnées aléatoires dans l'océan
def random_ocean_coords():
    lat = random.uniform(-60, 60)  # Latitude dans les eaux de l'océan
    lon = random.uniform(-180, 180)  # Longitude dans les eaux de l'océan
    return lat, lon

# Générer un DataFrame avec des coordonnées, températures, hauteur des vagues, vitesse du vent, etc.
data = {
    "Coordinates": [],
    "Temperature (°C)": [],
    "Wave Height (m)": [],
    "Wind Speed (km/h)": []
}

# Générer 10 marqueurs avec des valeurs aléatoires
for _ in range(10):
    lat, lon = random_ocean_coords()
    
    # Générer des valeurs aléatoires pour la température, la hauteur des vagues et la vitesse du vent
    temperature = random.uniform(15, 30)  # Température entre 15 et 30°C
    wave_height = random.uniform(0.5, 5)  # Hauteur des vagues entre 0.5m et 5m
    wind_speed = random.uniform(10, 50)  # Vitesse du vent entre 10 km/h et 50 km/h
    
    # Ajouter ces valeurs dans le DataFrame
    data["Coordinates"].append((lat, lon))
    data["Temperature (°C)"].append(temperature)
    data["Wave Height (m)"].append(wave_height)
    data["Wind Speed (km/h)"].append(wind_speed)

# Créer un DataFrame pandas avec ces données
df = pd.DataFrame(data)

# Ajouter les marqueurs et cercles à la carte avec des couleurs dépendant de la température
for index, row in df.iterrows():
    lat, lon = row["Coordinates"]
    temperature = row["Temperature (°C)"]
    
    # Déterminer la couleur du cercle en fonction de la température
    if temperature < 20:
        fill_color = "blue"
    elif temperature < 25:
        fill_color = "green"
    else:
        fill_color = "red"
    
    # Ajouter un marqueur
    folium.Marker(
        location=[lat, lon],
        icon=folium.Icon(color="blue", icon="info-sign")
    ).add_to(m)
    
    # Ajouter un cercle autour du marqueur
    folium.CircleMarker(
        location=[lat, lon],
        radius=15,  # Rayon du cercle
        color="black",
        fill=True,
        fill_color=fill_color,
        fill_opacity=0.5
    ).add_to(m)

# Ajouter une couche de contrôle pour la carte
folium.LayerControl().add_to(m)

# Sauvegarder la carte dans un fichier HTML
m.save("ocean_markers_map.html")

# Affichage de la carte dans l'environnement interactif (si vous êtes dans un environnement Jupyter)
