# Données SYNOP
https://donneespubliques.meteofrance.fr/?fond=produit&id_produit=90&id_rubrique=32

## Init dataset

In [1]:
import requests
import pandas as pd
import io
import numpy as np

In [2]:
url_stations = "https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/postesSynop.csv"
s = requests.session()
f = s.get(url_stations)
buffer = io.BytesIO(f.content)
buffer.seek(0)
df_stations = pd.read_csv(buffer, sep=";").rename(columns={'Nom' : 'station'})
df_stations

Unnamed: 0,ID,station,Latitude,Longitude,Altitude
0,7005,ABBEVILLE,50.136000,1.834000,69
1,7015,LILLE-LESQUIN,50.570000,3.097500,47
2,7020,PTE DE LA HAGUE,49.725167,-1.939833,6
3,7027,CAEN-CARPIQUET,49.180000,-0.456167,67
4,7037,ROUEN-BOOS,49.383000,1.181667,151
...,...,...,...,...,...
57,81401,SAINT LAURENT,5.485500,-54.031667,5
58,81405,CAYENNE-MATOURY,4.822333,-52.365333,4
59,81408,SAINT GEORGES,3.890667,-51.804667,6
60,81415,MARIPASOULA,3.640167,-54.028333,106


In [57]:
from tqdm.autonotebook import tqdm

l_df = []



pbar = tqdm(total=6*12)
for year in range(2016, 2022):
    for month in range(1, 13):
        date_format = f"{year}{month:02d}"
        url = f"https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.{date_format}.csv.gz"
        f = s.get(url)
        buffer = io.BytesIO(f.content)
        buffer.seek(0)
        try:
            df = pd.read_csv(buffer, compression='gzip', header=0, sep=';', quotechar='"', error_bad_lines=False)
            l_df.append(df)
        except:
            print(url)
        pbar.update(1)
pbar.close()

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=72.0), HTML(value='')))

https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.202111.csv.gz
https://donneespubliques.meteofrance.fr/donnees_libres/Txt/Synop/Archive/synop.202112.csv.gz



In [4]:
df.columns

Index(['numer_sta', 'date', 'pmer', 'tend', 'cod_tend', 'dd', 'ff', 't', 'td',
       'u', 'vv', 'ww', 'w1', 'w2', 'n', 'nbas', 'hbas', 'cl', 'cm', 'ch',
       'pres', 'niv_bar', 'geop', 'tend24', 'tn12', 'tn24', 'tx12', 'tx24',
       'tminsol', 'sw', 'tw', 'raf10', 'rafper', 'per', 'etat_sol', 'ht_neige',
       'ssfrai', 'perssfrai', 'rr1', 'rr3', 'rr6', 'rr12', 'rr24', 'phenspe1',
       'phenspe2', 'phenspe3', 'phenspe4', 'nnuage1', 'ctype1', 'hnuage1',
       'nnuage2', 'ctype2', 'hnuage2', 'nnuage3', 'ctype3', 'hnuage3',
       'nnuage4', 'ctype4', 'hnuage4', 'Unnamed: 59'],
      dtype='object')

In [5]:
df_all = (pd.concat(l_df, ignore_index=True)
          [['numer_sta', 'date', 't', 'rr3', 'pres']]
          .rename(columns = {
              'numer_sta' : 'ID',
              't' : 'temperature',
              'rr3' : 'precipitations3h',
              'pres' : 'pression'
          })
          .merge(df_stations[['ID', 'station']])
         )

df_all['station'] = df_all['station'].str.lower()
df_all['date'] = pd.to_datetime(df_all['date'], format="%Y%m%d%H%M%S")

numeric_cols = ['temperature', 'precipitations3h']
df_all[numeric_cols] = df_all[numeric_cols].apply(pd.to_numeric, errors='coerce')
df_all['temperature'] = df_all['temperature'].apply(lambda x : x - 273.15)


df_all

Unnamed: 0,ID,date,temperature,precipitations3h,pression,station
0,7005,2016-01-01 00:00:00,7.3,0.4,101370,abbeville
1,7005,2016-01-01 03:00:00,5.2,-0.1,101540,abbeville
2,7005,2016-01-01 06:00:00,3.3,0.0,101530,abbeville
3,7005,2016-01-01 09:00:00,3.0,-0.1,101480,abbeville
4,7005,2016-01-01 12:00:00,5.1,-0.1,101250,abbeville
...,...,...,...,...,...,...
986288,61970,2018-12-24 18:00:00,27.4,,100990,juan de nova
986289,61970,2019-01-08 12:00:00,30.1,,100640,juan de nova
986290,61970,2019-01-24 15:00:00,28.2,,100770,juan de nova
986291,61970,2019-01-25 12:00:00,28.8,,100900,juan de nova


In [52]:
station_list = ['toulouse-blagnac', 'lille-lesquin', 'orly', 'nice', 'strasbourg-entzheim', 'brest-guipavas', 'ajaccio']

df_all['station'].unique()

array(['abbeville', 'lille-lesquin', 'pte de la hague', 'rouen-boos',
       'reims-prunay', 'brest-guipavas', "ploumanac'h",
       'rennes-st jacques', 'orly', 'troyes-barberey', 'nancy-ochey',
       'strasbourg-entzheim', 'belle ile-le talut', 'nantes-bouguenais',
       'tours', 'bourges', 'dijon-longvic', 'bale-mulhouse',
       'pte de chassiron', 'poitiers-biard', 'limoges-bellegarde',
       'clermont-fd', 'le puy-loudes', 'lyon-st exupery',
       'bordeaux-merignac', 'gourdon', 'millau', 'montelimar', 'embrun',
       'tarbes-ossun', 'st girons', 'toulouse-blagnac', 'montpellier',
       'marignane', 'cap cepet', 'nice', 'perpignan', 'ajaccio', 'bastia',
       'gillot-aeroport', 'pamandzi', 'st-pierre', 'lamentin-aero',
       'saint laurent', 'cayenne-matoury', 'saint georges', 'maripasoula',
       'alencon', 'mont-de-marsan', 'nouvelle amsterdam', 'crozet',
       'kerguelen', "dumont d'urville", 'caen-carpiquet', 'tromelin',
       'le raizet aero', 'trinite-caravel', '

## Inject in InfluxDB

In [53]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import influxdb_client
from influxdb_client.client.write_api import SYNCHRONOUS
from influxdb_client import InfluxDBClient, Point, WriteOptions
import rx
from rx import operators as ops
from collections import OrderedDict
from csv import DictReader
from datetime import datetime


In [54]:
bucket = "synop"
org = "obd_influxdb"
token = "DkqpJF-KB6oCD25k7E8woeHxK0deggIpNyDxFeoszhLogSRrYLKdYMdcZjVGXyutwFSFBfLD5-Jkbah6wt9ujA=="
url="http://localhost:8086"

client = influxdb_client.InfluxDBClient(
        url=url,
        token=token,
        org=org,
        timeout=60_000
    )

In [55]:
def parse_row(row, station_name):
    return influxdb_client.Point("meteo") \
        .tag('station', station_name) \
        .field('temperature', float(row['temperature'])) \
        .field('precipitations3h', float(row['precipitations3h'])) \
        .field('pression', float(row['pression'])) \
        .time(row['date'])

def gen_rows(df):
    for row in df.itertuples(index=False):
        yield row._asdict()
        

    
def import_one_station(df_station, station_name):
    df_station.replace('', np.nan, inplace=True)
    df_station.replace('mq', np.nan, inplace=True) 
    df_station.dropna(inplace=True)
    
    data = rx.from_iterable(gen_rows(df_station)).pipe(ops.map(lambda row: parse_row(row, station_name)))

    write_api = client.write_api(write_options=WriteOptions(batch_size=50_000, flush_interval=10_000))
    write_api.write(bucket=bucket, org=org, record=data)
    write_api.close()
    print(f"{station_name} done !")

In [56]:
import time

for station in station_list:
    print(station, end=" : ")
    start_time = time.time()
    df_station = df_all.loc[df_all['station'] == station].copy()
    print(len(df_station))
    import_one_station(df_station, station)
    print("Elapsed time : ", time.time() - start_time)
    print()

toulouse-blagnac : 17001
toulouse-blagnac done !
Elapsed time :  3.875098943710327

lille-lesquin : 16995
lille-lesquin done !
Elapsed time :  4.353186130523682

orly : 16995
orly done !
Elapsed time :  3.6784350872039795

nice : 16992
nice done !
Elapsed time :  3.3189003467559814

strasbourg-entzheim : 16914
strasbourg-entzheim done !
Elapsed time :  3.4602608680725098

brest-guipavas : 16935
brest-guipavas done !
Elapsed time :  3.8106024265289307

ajaccio : 16991
ajaccio done !
Elapsed time :  3.8937127590179443



In [51]:
def delete_db():
    delete_api = client.delete_api()
    start = "1970-01-01T00:00:00Z"
    stop = "2022-02-01T00:00:00Z"
    delete_api.delete(start, stop, '_measurement="meteo"', bucket=bucket, org=org)
    print("Done !")
    
delete_db()

Done !


In [50]:
df_station = df_all.loc[df_all['station'] == 'lille-lesquin'].copy()
print(len(df_station))
import_one_station(df_station, 'lille-lesquin')

# write_api = client.write_api(write_options=SYNCHRONOUS)
# n_rows = len(df_station)
# pbar = tqdm(total=n_rows)
# for i, row in df_station.iterrows():
#     p = parse_row(row, 'lille-lesquin')
#     write_api.write(bucket=bucket, org=org, record=p)
#     pbar.update(1)
# pbar.close()
# print("Done !")

16995

lille-lesquin done !
