## Test Numéro 1

In [4]:
import spacy
from date_spacy import find_dates


In [2]:
nlp = spacy.blank('fr')

# Add the component to the pipeline
nlp.add_pipe('find_dates')

<function components.find_dates(doc)>

In [3]:
doc = nlp("""The event is scheduled for 25th August 2023.
          We also have a meeting on 10 September and another one on the twelfth of October and a
          final one on January fourth.""")
for ent in doc.ents:
    if ent.label_ == "DATE":
        print(f"Text: {ent.text} -> Parsed Date: {ent._.date}")

In [4]:


doc = nlp("""L'événement est prévu pour le 25 août 2023.
          Nous avons également une réunion le 10 septembre et une autre le douze octobre et une
          dernière le quatre janvier""")
for ent in doc.ents:
    if ent.label_ == "DATE":
        print(f"Text: {ent.text} -> Parsed Date: {ent._.date}")


Text: 25 août 2023 -> Parsed Date: 2023-08-25 00:00:00
Text: 10 septembre -> Parsed Date: 2025-09-10 00:00:00
Text: douze octobre -> Parsed Date: 2025-10-12 00:00:00
Text: quatre janvier -> Parsed Date: 2025-01-04 00:00:00


## Test numéro 2

In [5]:
import re
from spacy.tokens import Span
from spacy.language import Language
from spacy.util import filter_spans
import dateparser

ordinal_to_number = {
    "premier": "1", "deux": "2", "trois": "3", "quatre": "4", "cinq": "5",
    "six": "6", "sept": "7", "huit": "8", "neuf": "9", "dix": "10",
    "onze": "11", "douze": "12", "treize": "13", "quatorze": "14",
    "quinze": "15", "seize": "16", "dix-sept": "17", "dix-huit": "18",
    "dix-neuvième": "19", "vingt": "20", "vingt-et-un": "21", "vingt-deux": "22",
    "vingt-trois": "23", "vingt-quatre": "24", "vingt-cinq": "25", "vingt-six": "26",
    "vingt-sept": "27", "vingt-huit": "28", "vingt-neuf": "29", "trente": "30", 
    "trente-et-un": "31"
}

@Language.component("find_dates")
def find_dates(doc):
    # Définir une extension de date sur le span
    Span.set_extension("date", default=None, force=True)

    # Ordinaux
    ordinals = [
        "premier", "deux", "trois", "quatre", "cinq",
        "six", "sept", "huit", "neuf", "dix",
        "onze", "douze", "treize", "quatorze",
        "quinze", "seize", "dix-sept", "dix-huit",
        "dix-neuf", "vingt", "vingt-et-un", "vingt-deux",
        "vingt-trois", "vingt-quatre", "vingt-cinq", "vingt-six",
        "vingt-sept", "vingt-huit", "vingt-neuf", "trente", "trente-et-un"  
    ]
    
    ordinal_pattern = r"\b(?:" + "|".join(ordinals) + r")\b"

    # Un modèle regex pour capturer une variété de formats de date
    date_pattern = r"""
        # Jour-Mois-Année
        (?:
            \d{1,2}(?:er|ème)?          # Jour avec suffixe optionnel er, ème
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Jour/Mois/Année
        (?:
            \d{1,2}                     # Jour
            [/-]
            \d{1,2}                     # Mois
            (?:                         # Année est optionnelle
                [/-]
                \d{2,4}                 # Année
            )?
        )
        |
        # Année-Mois-Jour
        (?:
            \d{4}                       # Année
            [-/]
            \d{1,2}                     # Mois
            [-/]
            \d{1,2}                     # Jour
        )
        |
        # Mois-Jour-Année
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            \s+
            \d{1,2}(?:er|ème)?          # Jour avec suffixe optionnel er, ème
            (?:                         # Année est optionnelle
                ,?
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Mois-Année
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            \s+
            \d{4}                       # Année
        )
        |
        # Ordinal-Jour-Mois-Année
        (?:
            """ + ordinal_pattern + """
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        (?:
            """ + ordinal_pattern + """
            \s+
            de
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]*  # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Mois Ordinal
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]*  # Nom du mois
            \s+
            """ + ordinal_pattern + """
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
    """
    matches = list(re.finditer(date_pattern, doc.text, re.VERBOSE))
    new_ents = []
    for match in matches:
        start_char, end_char = match.span()
        # Convertir les offsets de caractères en offsets de tokens
        start_token = None
        end_token = None
        for token in doc:
            if token.idx == start_char:
                start_token = token.i
            if token.idx + len(token.text) == end_char:
                end_token = token.i
        if start_token is not None and end_token is not None:
            hit_text = doc.text[start_char:end_char]
            parsed_date = dateparser.parse(hit_text, languages=["fr"])
            if parsed_date:  # S'assurer que la chaîne correspondante est une date valide
                ent = Span(doc, start_token, end_token + 1, label="DATE")
                ent._.date = parsed_date
                new_ents.append(ent)
            else:
                # Remplacer chaque ordinal dans hit_text par sa représentation numérique
                for ordinal, number in ordinal_to_number.items():
                    hit_text = hit_text.replace(ordinal, number)

                # Supprimer le mot "de" de hit_text
                new_date = hit_text.replace(" de ", " ")

                parsed_date = dateparser.parse(new_date, languages=["fr"])
                ent = Span(doc, start_token, end_token + 1, label="DATE")
                ent._.date = parsed_date
                new_ents.append(ent)
    # Combiner les nouvelles entités avec les entités existantes, en s'assurant qu'il n'y a pas de chevauchement
    doc.ents = list(doc.ents) + new_ents
    
    return doc

  """ + ordinal_pattern + """
  """ + ordinal_pattern + """
  """ + ordinal_pattern + """


In [6]:
import spacy
from spacy.tokens import Span
import dateparser
import re

# Charger le modèle de langue française
nlp = spacy.load("fr_core_news_md")

# Définir une extension de date sur le span
Span.set_extension("date", default=None, force=True)

# Fonction pour trouver les dates
def find_dates(doc):
    ordinal_to_number = {
        "premier": "1", "deux": "2", "trois": "3", "quatre": "4", "cinq": "5",
        "six": "6", "sept": "7", "huit": "8", "neuf": "9", "dix": "10",
        "onze": "11", "douze": "12", "treize": "13", "quatorze": "14",
        "quinze": "15", "seize": "16", "dix-sept": "17", "dix-huit": "18",
        "dix-neuvième": "19", "vingt": "20", "vingt-et-un": "21", "vingt-deux": "22",
        "vingt-trois": "23", "vingt-quatre": "24", "vingt-cinq": "25", "vingt-six": "26",
        "vingt-sept": "27", "vingt-huit": "28", "vingt-neuf": "29", "trente": "30", 
        "trente-et-un": "31"
    }

    ordinal_pattern = r"\b(?:" + "|".join(ordinal_to_number.keys()) + r")\b"

    date_pattern = r"""
        # Jour-Mois-Année
        (?:
            \d{1,2}(?:er|ème)?          # Jour avec suffixe optionnel er, ème
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Jour/Mois/Année
        (?:
            \d{1,2}                     # Jour
            [/-]
            \d{1,2}                     # Mois
            (?:                         # Année est optionnelle
                [/-]
                \d{2,4}                 # Année
            )?
        )
        |
        # Année-Mois-Jour
        (?:
            \d{4}                       # Année
            [-/]
            \d{1,2}                     # Mois
            [-/]
            \d{1,2}                     # Jour
        )
        |
        # Mois-Jour-Année
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            \s+
            \d{1,2}(?:er|ème)?          # Jour avec suffixe optionnel er, ème
            (?:                         # Année est optionnelle
                ,?
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Mois-Année
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            \s+
            \d{4}                       # Année
        )
        |
        # Ordinal-Jour-Mois-Année
        (?:
            """ + ordinal_pattern + """
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]* # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        (?:
            """ + ordinal_pattern + """
            \s+
            de
            \s+
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]*  # Nom du mois
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
        |
        # Mois Ordinal
        (?:
            (?:janv|févr|mars|avr|mai|juin|juil|août|sept|oct|nov|déc)[a-z]*  # Nom du mois
            \s+
            """ + ordinal_pattern + """
            (?:                         # Année est optionnelle
                \s+
                \d{4}                   # Année
            )?
        )
    """
    matches = list(re.finditer(date_pattern, doc.text, re.VERBOSE))
    new_ents = []
    for match in matches:
        start_char, end_char = match.span()
        start_token = None
        end_token = None
        for token in doc:
            if token.idx == start_char:
                start_token = token.i
            if token.idx + len(token.text) == end_char:
                end_token = token.i
        if start_token is not None and end_token is not None:
            hit_text = doc.text[start_char:end_char]
            parsed_date = dateparser.parse(hit_text, languages=["fr"])
            if parsed_date:
                ent = Span(doc, start_token, end_token + 1, label="DATE")
                ent._.date = parsed_date
                new_ents.append(ent)
            else:
                for ordinal, number in ordinal_to_number.items():
                    hit_text = hit_text.replace(ordinal, number)
                new_date = hit_text.replace(" de ", " ")
                parsed_date = dateparser.parse(new_date, languages=["fr"])
                ent = Span(doc, start_token, end_token + 1, label="DATE")
                ent._.date = parsed_date
                new_ents.append(ent)
    doc.ents = list(doc.ents) + new_ents
    return doc

# Ajouter le composant au pipeline
nlp.add_pipe("find_dates", last=True)

# Texte de test
text = """L'événement est prévu pour le 25 août 2023.
          Nous avons également une réunion le 10 septembre et une autre le douze octobre et une
          dernière le quatre janvier et jeudi suivant"""

# Traiter le texte
doc = nlp(text)

# Afficher les entités trouvées
for ent in doc.ents:
    print(ent.text, ent.label_, ent._.date)


  """ + ordinal_pattern + """
  """ + ordinal_pattern + """
  """ + ordinal_pattern + """


25 août 2023 DATE 2023-08-25 00:00:00
10 septembre DATE 2025-09-10 00:00:00
douze octobre DATE 2025-10-12 00:00:00
quatre janvier DATE 2025-01-04 00:00:00


## Test Api Meteo ; Open Meteo

In [5]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/meteofrance"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"hourly": "temperature_2m",
	"models": "meteofrance_seamless"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m

hourly_dataframe = pd.DataFrame(data = hourly_data)
hourly_dataframe

Coordinates 52.52000045776367°N 13.40999984741211°E
Elevation 38.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


Unnamed: 0,date,temperature_2m
0,2025-03-04 00:00:00+00:00,2.9195
1,2025-03-04 01:00:00+00:00,2.3695
2,2025-03-04 02:00:00+00:00,2.0195
3,2025-03-04 03:00:00+00:00,2.2695
4,2025-03-04 04:00:00+00:00,1.8695
...,...,...
91,2025-03-07 19:00:00+00:00,4.8935
92,2025-03-07 20:00:00+00:00,3.7935
93,2025-03-07 21:00:00+00:00,2.9435
94,2025-03-07 22:00:00+00:00,2.2435


## test Numero 2 de l'API Open Meteo

In [8]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/meteofrance"
params = {
	"latitude": 47.3948,
	"longitude": 0.704,
	"daily": ["temperature_2m_max", "temperature_2m_min", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "wind_speed_10m_min"],
	"models": "meteofrance_seamless"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(4).ValuesAsNumpy()
daily_wind_speed_10m_min = daily.Variables(5).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_speed_10m_min"] = daily_wind_speed_10m_min

daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe


Coordinates 47.38999938964844°N 0.6999998092651367°E
Elevation 51.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s


Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,rain_sum,wind_speed_10m_max,wind_speed_10m_min
0,2025-03-03 00:00:00+00:00,11.7,0.7,0.0,0.0,11.966954,4.452954
1,2025-03-04 00:00:00+00:00,13.75,2.9,0.0,0.0,11.440978,7.729527
2,2025-03-05 00:00:00+00:00,14.1755,1.6255,0.0,0.0,11.879999,5.62338
3,2025-03-06 00:00:00+00:00,14.4755,2.9255,0.0,0.0,17.654686,9.021574


In [9]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 52.52,
	"longitude": 13.41,
	"hourly": ["temperature_2m", "rain", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "wind_speed_10m", "is_day"],
	"past_minutely_15": 96,
	"forecast_minutely_15": 96,
	"temporal_resolution": "hourly_6",
	"models": "meteofrance_seamless"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_rain = hourly.Variables(1).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(2).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(3).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(4).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(6).ValuesAsNumpy()
hourly_is_day = hourly.Variables(7).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["rain"] = hourly_rain
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["is_day"] = hourly_is_day

hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)

Coordinates 52.52000045776367°N 13.40999984741211°E
Elevation 38.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                        date  temperature_2m  rain  cloud_cover  \
0  2025-03-03 00:00:00+00:00          4.3195   0.0        100.0   
1  2025-03-03 06:00:00+00:00          2.6195   0.0         85.0   
2  2025-03-03 12:00:00+00:00          8.2195   0.0         47.0   
3  2025-03-03 18:00:00+00:00          6.2695   0.0        100.0   
4  2025-03-04 00:00:00+00:00          2.9695   0.0        100.0   
5  2025-03-04 06:00:00+00:00          1.7695   0.0        100.0   
6  2025-03-04 12:00:00+00:00          9.2195   0.0          0.0   
7  2025-03-04 18:00:00+00:00          7.0695   0.0          0.0   
8  2025-03-05 00:00:00+00:00          2.0195   0.0          0.0   
9  2025-03-05 06:00:00+00:00          1.4195   0.0          0.0   
10 2025-03-05 12:00:00+00:00          9.4435   0.0          3.0   
11 2025-03-05 18:00:00+00:00          4.5435   0.0          0.0   
12 2

In [6]:
import requests
from typing import Tuple

def get_coordinates(city_name: str) -> Tuple[float, float]:
    geocode_url = "https://nominatim.openstreetmap.org/search"
    params = {"q": city_name, "format": "json"}
    headers = {"User-Agent": "Mozilla/5.0"}
    print(f"URL: {geocode_url}")
    print(f"Paramètres: {params}")
    print(f"En-têtes: {headers}")
    r = requests.get(geocode_url, params=params, headers=headers)
    print(r)
    data = r.json()
    print(data)
    if not data:
        raise Exception(f"Ville introuvable : {city_name}")
    lat = float(data[0]["lat"])
    lon = float(data[0]["lon"])
    return lat, lon

In [11]:
get_coordinates("Paris")

URL: https://nominatim.openstreetmap.org/search
Paramètres: {'q': 'Paris', 'format': 'json'}
En-têtes: {'User-Agent': 'Mozilla/5.0'}
<Response [200]>
[{'place_id': 88664949, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 71525, 'lat': '48.8534951', 'lon': '2.3483915', 'class': 'boundary', 'type': 'administrative', 'place_rank': 12, 'importance': 0.8845663630228834, 'addresstype': 'city', 'name': 'Paris', 'display_name': 'Paris, Île-de-France, France métropolitaine, France', 'boundingbox': ['48.8155755', '48.9021560', '2.2241220', '2.4697602']}, {'place_id': 88715228, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 7444, 'lat': '48.8588897', 'lon': '2.3200410217200766', 'class': 'boundary', 'type': 'administrative', 'place_rank': 15, 'importance': 0.8845663630228834, 'addresstype': 'suburb', 'name': 'Paris', 'display_name': 'Paris, Île-de-France, F

(48.8534951, 2.3483915)

In [12]:
get_coordinates("Los Angeles")

URL: https://nominatim.openstreetmap.org/search
Paramètres: {'q': 'Los Angeles', 'format': 'json'}
En-têtes: {'User-Agent': 'Mozilla/5.0'}
<Response [200]>
[{'place_id': 400671807, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 207359, 'lat': '34.0536909', 'lon': '-118.242766', 'class': 'boundary', 'type': 'administrative', 'place_rank': 16, 'importance': 0.8280182952483853, 'addresstype': 'city', 'name': 'Los Angeles', 'display_name': 'Los Angeles, Los Angeles County, California, United States', 'boundingbox': ['33.6595410', '34.3373060', '-118.6681798', '-118.1552983']}, {'place_id': 293551383, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 396479, 'lat': '34.3155072', 'lon': '-118.209681', 'class': 'boundary', 'type': 'administrative', 'place_rank': 12, 'importance': 0.6412251647542688, 'addresstype': 'county', 'name': 'Los Angeles County', 'd

(34.0536909, -118.242766)

In [13]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

lat, lon = get_coordinates("Tours")

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/meteofrance"
params = {
	"latitude": lat,
	"longitude": lon,
	"daily": ["temperature_2m_max", "temperature_2m_min", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "wind_speed_10m_min"],
	"models": "meteofrance_seamless"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(4).ValuesAsNumpy()
daily_wind_speed_10m_min = daily.Variables(5).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_speed_10m_min"] = daily_wind_speed_10m_min

daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe

URL: https://nominatim.openstreetmap.org/search
Paramètres: {'q': 'Tours', 'format': 'json'}
En-têtes: {'User-Agent': 'Mozilla/5.0'}
<Response [200]>
[{'place_id': 86517144, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 76306, 'lat': '47.3900474', 'lon': '0.6889268', 'class': 'boundary', 'type': 'administrative', 'place_rank': 16, 'importance': 0.6553053337560605, 'addresstype': 'city', 'name': 'Tours', 'display_name': 'Tours, Indre-et-Loire, Centre-Val de Loire, France métropolitaine, France', 'boundingbox': ['47.3489171', '47.4395937', '0.6528317', '0.7373427']}, {'place_id': 93594580, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1664345, 'lat': '47.3399839', 'lon': '0.6756459417146139', 'class': 'boundary', 'type': 'administrative', 'place_rank': 14, 'importance': 0.5120811876238771, 'addresstype': 'municipality', 'name': 'Tours', 'display_

Unnamed: 0,date,temperature_2m_max,temperature_2m_min,precipitation_sum,rain_sum,wind_speed_10m_max,wind_speed_10m_min
0,2025-03-03 00:00:00+00:00,11.224,0.874,0.0,0.0,11.966954,4.452954
1,2025-03-04 00:00:00+00:00,13.174,3.024,0.0,0.0,11.440978,7.729527
2,2025-03-05 00:00:00+00:00,14.13,1.58,0.0,0.0,11.879999,5.62338
3,2025-03-06 00:00:00+00:00,14.43,2.88,0.0,0.0,17.654686,9.021574


## Test Version 1

In [14]:
def get_coordinates_test(city_name):
    API_KEY = "b6cf1eceaa703e0b9f80b3f9453ff79a"
    GEOCODING_URL = 'http://api.openweathermap.org/geo/1.0/direct?'
    params = {
        'q': city_name,
        'appid': API_KEY,
        'limit': 1
    }
    response = requests.get(GEOCODING_URL, params=params)
    data = response.json()
    if data and len(data) > 0 and 'lat' in data[0] and 'lon' in data[0]:
        return data[0]['lat'], data[0]['lon']
    else:
        return None, None

In [15]:
get_coordinates_test('Paris')

(48.8588897, 2.3200410217200766)

## Test Fonctionnement extraction de Ville et de Date

In [16]:
import spacy
import re
from typing import Tuple
import logging

nlp = spacy.load("fr_core_news_md")



def extract_location(text: str) -> str:
    doc = nlp(text)
    location = None
    for ent in doc.ents:
        if ent.label_ in ["LOC", "GPE"] and not location:
            location = ent.text
    if not location:
        location = "Paris"
    return location

In [20]:
def get_coordinates_V2(city_name) -> Tuple[float, float]:
    geocode_url = "https://nominatim.openstreetmap.org/search"
    params = {"q": city_name, "format": "json"}
    headers = {"User-Agent": "Mozilla/5.0"}
    r = requests.get(geocode_url, params=params, headers=headers)
    data = r.json()
    print(data)
    if not data:
        raise Exception(f"Ville introuvable : {city_name}")
    lat = float(data[0]["lat"])
    lon = float(data[0]["lon"])
    return lat, lon

In [21]:
def extract_forecast_days(text: str) -> int:
    doc = nlp(text)
    forecast_days = None
    regex_match = re.search(r"sur\s+(\d+)\s+jours", text, re.IGNORECASE)
    if regex_match:
        try:
            num = int(regex_match.group(1))
            if num in [3, 5, 7]:
                forecast_days = num
        except Exception as e:
            logging.error(f"Erreur extraction jours: {e}")
    if not forecast_days:
        for token in doc:
            if token.like_num:
                try:
                    num = int(token.text)
                    if num in [3, 5, 7]:
                        forecast_days = num
                        break
                except Exception:
                    continue
    if not forecast_days:
        forecast_days = 7
    return forecast_days

In [18]:
text = "Quelle est la météo à Paris pour les 5 prochains jours ?"
location = extract_location(text)
forecast_days = extract_forecast_days(text)
print(f"Location: {location}")
print(f"Forecast Days: {forecast_days}")

Location: Paris
Forecast Days: 5


## Test Numéro Trois

### Open Météo

In [12]:
import requests_cache
from retry_requests import retry

cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)

def get_weather_forecast(city_name: str) -> pd.DataFrame:
    lat, lon = get_coordinates_V2(city_name)
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": "temperature_2m,rain,precipitation,cloudcover,windspeed_10m",
        "timezone": "auto"
    }
    response = retry_session.get(url, params=params)
    data = response.json()
    
    # Vérification des données reçues
    if 'hourly' not in data or not all(key in data['hourly'] for key in ['time', 'temperature_2m', 'precipitation', 'cloudcover', 'windspeed_10m']):
        raise ValueError("Données manquantes dans la réponse de l'API")
    
    times = pd.to_datetime(data['hourly']['time'])
    df = pd.DataFrame({
        "date": times,
        "temperature_2m": data['hourly']['temperature_2m'],
        "rain": data['hourly']['rain'],
        "precipitation": data['hourly']['precipitation'],
        "cloudcover": data['hourly']['cloudcover'],
        "windspeed_10m": data['hourly']['windspeed_10m'],
        "pm2_5": [12.3] * len(times)
    })
    # Convertir les types de données en types natifs Python
    df = df.astype({
        "temperature_2m": float,
        "rain": float,
        "precipitation": float,
        "cloudcover": float,
        "windspeed_10m": float
    })
    return df

In [13]:
get_weather_forecast("Rennes")

[{'place_id': 261082467, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 54517, 'lat': '48.1113387', 'lon': '-1.6800198', 'class': 'boundary', 'type': 'administrative', 'place_rank': 16, 'importance': 0.668872045900498, 'addresstype': 'city', 'name': 'Rennes', 'display_name': 'Rennes, Ille-et-Vilaine, Bretagne, France métropolitaine, France', 'boundingbox': ['48.0769155', '48.1549705', '-1.7525876', '-1.6244045']}, {'place_id': 263154915, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1655027, 'lat': '48.156836049999995', 'lon': '-1.8144255084289234', 'class': 'boundary', 'type': 'administrative', 'place_rank': 14, 'importance': 0.5255282243803496, 'addresstype': 'municipality', 'name': 'Rennes', 'display_name': 'Rennes, Ille-et-Vilaine, Bretagne, France métropolitaine, France', 'boundingbox': ['47.9340931', '48.3796360', '-2.2889840', '-1.2839440

Unnamed: 0,date,temperature_2m,rain,precipitation,cloudcover,windspeed_10m,pm2_5
0,2025-03-03 00:00:00,4.2,0.0,0.0,25.0,7.9,12.3
1,2025-03-03 01:00:00,3.6,0.0,0.0,71.0,8.3,12.3
2,2025-03-03 02:00:00,3.6,0.0,0.0,93.0,6.1,12.3
3,2025-03-03 03:00:00,2.8,0.0,0.0,0.0,7.6,12.3
4,2025-03-03 04:00:00,2.4,0.0,0.0,10.0,6.1,12.3
...,...,...,...,...,...,...,...
163,2025-03-09 19:00:00,11.8,0.0,0.2,100.0,17.7,12.3
164,2025-03-09 20:00:00,11.3,0.0,0.0,100.0,15.9,12.3
165,2025-03-09 21:00:00,10.7,0.0,0.0,100.0,15.5,12.3
166,2025-03-09 22:00:00,10.1,0.0,0.0,100.0,15.5,12.3


### Test Numéro 4

In [14]:
import pandas as pd
def get_weather_forecast(city_name: str) -> pd.DataFrame:
    lat, lon = get_coordinates_V2(city_name)
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": "temperature_2m,rain,precipitation,cloudcover,windspeed_10m",
        "daily": "temperature_2m_min,temperature_2m_max",  # Ajout des données quotidiennes
        "timezone": "auto"
    }
    response = retry_session.get(url, params=params)
    data = response.json()
    
    # Vérification des données reçues
    if 'hourly' not in data or not all(key in data['hourly'] for key in ['time', 'temperature_2m', 'precipitation', 'cloudcover', 'windspeed_10m']):
        raise ValueError("Données manquantes dans la réponse de l'API")
    
    times = pd.to_datetime(data['hourly']['time'])
    df_hourly = pd.DataFrame({
        "date": times,
        "temperature_2m": data['hourly']['temperature_2m'],
        "rain": data['hourly']['rain'],
        "precipitation": data['hourly']['precipitation'],
        "cloudcover": data['hourly']['cloudcover'],
        "windspeed_10m": data['hourly']['windspeed_10m'],
        "pm2_5": [12.3] * len(times)
    })
    # Convertir les types de données en types natifs Python
    df_hourly = df_hourly.astype({
        "temperature_2m": float,
        "rain": float,
        "precipitation": float,
        "cloudcover": float,
        "windspeed_10m": float
    })
    
    # Traitement des données quotidiennes
    if 'daily' in data and 'temperature_2m_min' in data['daily'] and 'temperature_2m_max' in data['daily']:
        daily_times = pd.to_datetime(data['daily']['time'])
        df_daily = pd.DataFrame({
            "date": daily_times,
            "temperature_min": data['daily']['temperature_2m_min'],
            "temperature_max": data['daily']['temperature_2m_max']
        })
        df_daily = df_daily.astype({
            "temperature_min": float,
            "temperature_max": float
        })
        # Vous pouvez choisir de retourner df_hourly, df_daily, ou les deux
        return df_hourly, df_daily
    
    return df_hourly

In [None]:
get_weather_forecast("Rennes")

### Test Numéro 5 : Double fonction de prevision


In [7]:
import spacy
import re
from typing import Tuple
import logging

def get_coordinates_V2(city_name) -> Tuple[float, float]:
    geocode_url = "https://nominatim.openstreetmap.org/search"
    params = {"q": city_name, "format": "json"}
    headers = {"User-Agent": "Mozilla/5.0"}
    r = requests.get(geocode_url, params=params, headers=headers)
    data = r.json()
    print(data)
    if not data:
        raise Exception(f"Ville introuvable : {city_name}")
    lat = float(data[0]["lat"])
    lon = float(data[0]["lon"])
    return lat, lon

In [8]:
import pandas as pd

def get_hourly_weather_forecast(city_name: str) -> pd.DataFrame:
    lat, lon = get_coordinates_V2(city_name)
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "hourly": "temperature_2m,rain,precipitation,cloudcover,windspeed_10m",
        "timezone": "auto"
    }
    response = retry_session.get(url, params=params)
    data = response.json()
    
    # Vérification des données reçues
    if 'hourly' not in data or not all(key in data['hourly'] for key in ['time', 'temperature_2m', 'precipitation', 'cloudcover', 'windspeed_10m']):
        raise ValueError("Données manquantes dans la réponse de l'API")
    
    times = pd.to_datetime(data['hourly']['time'])
    df_hourly = pd.DataFrame({
        "date": times,
        "temperature_2m": data['hourly']['temperature_2m'],
        "rain": data['hourly']['rain'],
        "precipitation": data['hourly']['precipitation'],
        "cloudcover": data['hourly']['cloudcover'],
        "windspeed_10m": data['hourly']['windspeed_10m'],
        "pm2_5": [12.3] * len(times)
    })
    # Convertir les types de données en types natifs Python
    df_hourly = df_hourly.astype({
        "temperature_2m": float,
        "rain": float,
        "precipitation": float,
        "cloudcover": float,
        "windspeed_10m": float
    })
    
    return df_hourly

def get_daily_weather_forecast(city_name: str) -> pd.DataFrame:
    lat, lon = get_coordinates_V2(city_name)
    url = "https://api.open-meteo.com/v1/forecast"
    params = {
        "latitude": lat,
        "longitude": lon,
        "daily": "temperature_2m_min,temperature_2m_max,sunrise,sunset,windspeed_10m_max,windspeed_10m_min",
        "timezone": "auto"
    }
    response = retry_session.get(url, params=params)
    data = response.json()
    
    # Traitement des données quotidiennes
    if 'daily' in data and 'temperature_2m_min' in data['daily'] and 'temperature_2m_max' in data['daily']:
        daily_times = pd.to_datetime(data['daily']['time'])
        df_daily = pd.DataFrame({
            "date": daily_times,
            "temperature_min": data['daily']['temperature_2m_min'],
            "temperature_max": data['daily']['temperature_2m_max'],
            "sunrise": data['daily']['sunrise'],
            "sunset": data['daily']['sunset'],
            "windspeed_10m_max": data['daily']['windspeed_10m_max'],
            "windspeed_10m_min": data['daily']['windspeed_10m_min']
        })
        df_daily = df_daily.astype({
            "temperature_min": float,
            "temperature_max": float,
            "windspeed_10m_max": float,
            "windspeed_10m_min": float
        })
        return df_daily
    
    raise ValueError("Données quotidiennes manquantes dans la réponse de l'API")

In [9]:
get_daily_weather_forecast("Tours")

[{'place_id': 85977484, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 76306, 'lat': '47.3900474', 'lon': '0.6889268', 'class': 'boundary', 'type': 'administrative', 'place_rank': 16, 'importance': 0.6553053337560605, 'addresstype': 'city', 'name': 'Tours', 'display_name': 'Tours, Indre-et-Loire, Centre-Val de Loire, France métropolitaine, France', 'boundingbox': ['47.3489171', '47.4395937', '0.6528317', '0.7373427']}, {'place_id': 85958816, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1664345, 'lat': '47.3399839', 'lon': '0.6756459417146139', 'class': 'boundary', 'type': 'administrative', 'place_rank': 14, 'importance': 0.5120811876238771, 'addresstype': 'municipality', 'name': 'Tours', 'display_name': 'Tours, Indre-et-Loire, Centre-Val de Loire, France métropolitaine, France', 'boundingbox': ['47.1344580', '47.5458001', '0.2724505', '0.927724

Unnamed: 0,date,temperature_min,temperature_max,sunrise,sunset,windspeed_10m_max,windspeed_10m_min
0,2025-03-04,3.4,13.7,2025-03-04T07:31,2025-03-04T18:46,10.2,6.4
1,2025-03-05,4.8,17.2,2025-03-05T07:29,2025-03-05T18:48,8.5,4.2
2,2025-03-06,4.5,18.7,2025-03-06T07:27,2025-03-06T18:49,18.1,4.3
3,2025-03-07,8.9,18.5,2025-03-07T07:25,2025-03-07T18:51,16.4,7.3
4,2025-03-08,7.7,15.4,2025-03-08T07:23,2025-03-08T18:52,20.6,9.7
5,2025-03-09,8.6,16.1,2025-03-09T07:21,2025-03-09T18:53,20.8,5.0
6,2025-03-10,6.7,17.3,2025-03-10T07:19,2025-03-10T18:55,12.6,2.8


In [10]:
get_hourly_weather_forecast("Tours")

[{'place_id': 85977484, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 76306, 'lat': '47.3900474', 'lon': '0.6889268', 'class': 'boundary', 'type': 'administrative', 'place_rank': 16, 'importance': 0.6553053337560605, 'addresstype': 'city', 'name': 'Tours', 'display_name': 'Tours, Indre-et-Loire, Centre-Val de Loire, France métropolitaine, France', 'boundingbox': ['47.3489171', '47.4395937', '0.6528317', '0.7373427']}, {'place_id': 85958816, 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright', 'osm_type': 'relation', 'osm_id': 1664345, 'lat': '47.3399839', 'lon': '0.6756459417146139', 'class': 'boundary', 'type': 'administrative', 'place_rank': 14, 'importance': 0.5120811876238771, 'addresstype': 'municipality', 'name': 'Tours', 'display_name': 'Tours, Indre-et-Loire, Centre-Val de Loire, France métropolitaine, France', 'boundingbox': ['47.1344580', '47.5458001', '0.2724505', '0.927724

Unnamed: 0,date,temperature_2m,rain,precipitation,cloudcover,windspeed_10m,pm2_5
0,2025-03-04 00:00:00,6.1,0.0,0.0,88.0,7.4,12.3
1,2025-03-04 01:00:00,5.9,0.0,0.0,65.0,8.4,12.3
2,2025-03-04 02:00:00,5.4,0.0,0.0,0.0,6.6,12.3
3,2025-03-04 03:00:00,4.8,0.0,0.0,100.0,8.2,12.3
4,2025-03-04 04:00:00,4.5,0.0,0.0,90.0,8.9,12.3
...,...,...,...,...,...,...,...
163,2025-03-10 19:00:00,13.7,0.0,0.0,24.0,5.8,12.3
164,2025-03-10 20:00:00,12.5,0.0,0.0,49.0,3.8,12.3
165,2025-03-10 21:00:00,11.3,0.0,0.0,75.0,2.8,12.3
166,2025-03-10 22:00:00,10.3,0.0,0.0,100.0,2.9,12.3
