# Historique Météo

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import influxdb_client
from influxdb_client.client.write_api import SYNCHRONOUS
from influxdb_client import InfluxDBClient, Point, WriteOptions
import rx
from rx import operators as ops
from collections import OrderedDict
from csv import DictReader
from datetime import datetime


In [2]:
bucket = "meteo_history"
org = "obd_influxdb"
token = "DkqpJF-KB6oCD25k7E8woeHxK0deggIpNyDxFeoszhLogSRrYLKdYMdcZjVGXyutwFSFBfLD5-Jkbah6wt9ujA=="
url="http://localhost:8086"

client = influxdb_client.InfluxDBClient(
        url=url,
        token=token,
        org=org,
        timeout=60_000
    )

### Directly from csv file

In [10]:

def parse_row(row, tag):
    return Point("historique-meteo") \
        .tag("location", tag) \
        .field("TemperatureMin", float(row['TemperatureMin'])) \
        .field("TemperatureMax", float(row['TemperatureMax'])) \
        .field("TemperatureMoyenne", float(row['TemperatureMoyenne'])) \
        .field("Precipitations", float(row['Precipitations'])) \
        .field("HygrometrieMin", float(row['HygrometrieMin'])) \
        .field("HygrometrieMax", float(row['HygrometrieMax'])) \
        .field("HygrometrieMoyenne", float(row['HygrometrieMoyenne'])) \
        .time(row['Date'])
    

for filename in os.listdir('data'):
        if not('hist' in filename): continue
        
        # Get file
        csv_file = f"./data/{filename}"
        city = re.match(r'hist-(.*).csv', filename).group(1)
        print("Processing", csv_file)
        
        
        data = rx.from_iterable(DictReader(open(csv_file, 'r'))).pipe(ops.map(lambda row: parse_row(row, city)))

        write_api = client.write_api(write_options=WriteOptions(batch_size=50_000, flush_interval=10_000))
        write_api.write(bucket=bucket, org=org, record=data)
        write_api.close()
        
        print("Done !")
        

Processing ./data/hist-chambery_aix_les_bains.csv
Done !
Processing ./data/hist-grenoble.csv
Done !
Processing ./data/hist-lille.csv
Done !
Processing ./data/hist-marseille.csv
Done !
Processing ./data/hist-paris.csv
Done !
Processing ./data/hist-toulouse.csv
Done !


### From dataFrames

In [11]:
csv_file = "data/hist-toulouse.csv"

df = pd.read_csv('data/hist-toulouse.csv')
df['date_unix'] = (pd.to_datetime(df["Date"]) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')

df

Unnamed: 0,Date,TemperatureMin,TemperatureMax,TemperatureMoyenne,Precipitations,HygrometrieMin,HygrometrieMax,HygrometrieMoyenne,date_unix
0,2000-10-01,9.9,18.299999,14.10,1.0,64.0,90.0,77.0,970358400
1,2000-10-02,10.8,17.600000,14.20,8.0,67.0,91.0,79.0,970444800
2,2000-10-03,12.2,18.500000,15.35,1.0,60.0,94.0,77.0,970531200
3,2000-10-04,10.1,20.900000,15.50,0.0,53.0,96.0,74.5,970617600
4,2000-10-05,11.9,18.400000,15.15,0.2,78.0,94.0,86.0,970704000
...,...,...,...,...,...,...,...,...,...
6189,2021-10-11,6.7,19.700001,13.20,0.0,50.0,97.0,78.0,1633910400
6190,2021-10-12,6.1,18.799999,12.45,0.0,52.0,97.0,77.0,1633996800
6191,2021-10-13,6.7,18.299999,12.50,0.6,45.0,89.0,71.0,1634083200
6192,2021-10-14,4.5,21.100000,12.80,0.0,29.0,97.0,68.0,1634169600


In [4]:
def parse_row(row, tag):
    return Point("historique-meteo") \
        .tag("location", tag) \
        .field("TemperatureMin", float(row['TemperatureMin'])) \
        .field("TemperatureMax", float(row['TemperatureMax'])) \
        .field("TemperatureMoyenne", float(row['TemperatureMoyenne'])) \
        .field("Precipitations", float(row['Precipitations'])) \
        .field("HygrometrieMin", float(row['HygrometrieMin'])) \
        .field("HygrometrieMax", float(row['HygrometrieMax'])) \
        .field("HygrometrieMoyenne", float(row['HygrometrieMoyenne'])) \
        .time(row['Date'])


def gen_rows(df):
    for row in df.itertuples(index=False):
        yield row._asdict()
        
        
data = rx.from_iterable(gen_rows(df)).pipe(ops.map(lambda row: parse_row(row, "toulouse")))
write_api = client.write_api(write_options=WriteOptions(batch_size=50_000, flush_interval=10_000))
write_api.write(bucket=bucket, org=org, record=data)
write_api.close()

Done


### Create clean csv for Postgres

In [5]:
# To use in PostgreSQL
relevant_index = ["TemperatureMin", "TemperatureMax", "TemperatureMoyenne",
                  "Precipitations",
                  "HygrometrieMin", "HygrometrieMax", "HygrometrieMoyenne"]

def export_clean_csv(df_station, station):
    df_clean = pd.DataFrame()
    df_clean[relevant_index] = df_station[relevant_index]
    df_clean['Station'] = [station] * len(df_clean)
    df_clean['Heure'] = df_station['Date']
    df_clean.to_csv(f'psql_queries/meteo_hist-{station}.csv', index=False)
    

for filename in os.listdir('data'):
    if not('hist' in filename): continue
    csv_file = f"./data/{filename}"
    city = re.match(r'hist-(.*).csv', filename).group(1)
    df_station = pd.read_csv(csv_file)
    export_clean_csv(df_station, city)

### Delete full DB

In [9]:
def delete_db():
    delete_api = client.delete_api()
    start = "1970-01-01T00:00:00Z"
    stop = "2022-02-01T00:00:00Z"
    delete_api.delete(start, stop, '_measurement="historique-meteo"', bucket=bucket, org=org)
    print("Done !")
    
delete_db()

Done !
