In [2]:
data_folder = ".data"
export_name = "coros_export_20251126"
zip_file = export_name + ".zip"

In [19]:
import duckdb
import os
import polars as pl

folders = ["records", "events", "laps", "sessions"]
all_dfs = {}
# Connexion à DuckDB
conn = duckdb.connect()

for folder in folders:

    # Chemin vers le dossier contenant les CSV
    dossier_csv = f"/home/albert/repos/coros_explo/.csv_data/{folder}"

    # Liste tous les fichiers CSV du dossier
    fichiers_csv = [os.path.join(dossier_csv, f) for f in os.listdir(dossier_csv) if f.endswith('.csv')]

    # Requête SQL pour lire tous les CSV et les combiner
    query = f"""
        SELECT * FROM read_csv_auto({fichiers_csv}, union_by_name=true, header=true)
    """


    # Exécution et affichage des résultats
    all_dfs[folder] = duckdb.sql(query).pl()
    # Enregistrer la DataFrame Polars comme une vue temporaire
    conn.register(folder, all_dfs[folder])

In [46]:
conn.execute("""
SELECT
    timestamp
    , speed
    , distance
    , DATE(timestamp) AS activity_date
    , YEAR(timestamp) AS activity_year
    , MONTH(timestamp) AS activity_month
    , QUARTER(timestamp) AS activity_year_quarter
    , HOUR(timestamp) AS activity_hour
    , WEEK(timestamp) AS activity_week
    , CASE
        WHEN HOUR(timestamp) < 6 THEN 1
        WHEN HOUR(timestamp) >= 6 AND HOUR(timestamp) < 12 THEN 2
        WHEN HOUR(timestamp) >= 12 AND HOUR(timestamp) < 18 THEN 3
        ELSE 4
    END AS day_quarter
    , AVG(speed) OVER (
        PARTITION BY activity_id
        ORDER BY timestamp
        RANGE BETWEEN INTERVAL 10 MINUTE PRECEDING AND INTERVAL 1 SECOND PRECEDING
    ) AS avg_speed_10min
    , AVG(speed) OVER (
        PARTITION BY activity_id
        ORDER BY timestamp
        RANGE BETWEEN INTERVAL 15 MINUTE PRECEDING AND INTERVAL 1 SECOND PRECEDING
    ) AS avg_speed_15min
    , AVG(speed) OVER (
        PARTITION BY activity_id
        ORDER BY timestamp
        RANGE BETWEEN INTERVAL 30 MINUTE PRECEDING AND INTERVAL 1 SECOND PRECEDING
    ) AS avg_speed_30min
    , AVG(speed) OVER (
        PARTITION BY activity_id
        ORDER BY timestamp
        ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
    ) AS avg_speed_since_start
    , EXTRACT(EPOCH FROM (timestamp - FIRST_VALUE(timestamp) OVER (
        PARTITION BY activity_id
        ORDER BY timestamp
    ))) AS seconds_since_start
    
    -- Cycle cosinus (valeur entre -1 et 1, 0 = minuit, 1 = midi, -1 = minuit suivante)
    , COS((HOUR(timestamp) + MINUTE(timestamp)/60.0) * (2 * PI() / 24)) AS day_cos_cycle
    -- Cycle sinus (décalé de 6h par rapport au cosinus)
    , SIN((HOUR(timestamp) + MINUTE(timestamp)/60.0) * (2 * PI() / 24)) AS day_sin_cycle
    -- Cycle jour/mois (cosinus, normalisé sur ~30 jours)
    , COS((DAY(timestamp) - 1) * (2 * PI() / 30)) AS month_day_cos_cycle
    -- Cycle jour/mois (sinus)
    , SIN((DAY(timestamp) - 1) * (2 * PI() / 30)) AS month_day_sin_cycle
    -- Cycle semaine/année (cosinus, normalisé sur 52 semaines)
    , COS((WEEK(timestamp) - 1) * (2 * PI() / 52)) AS year_week_cos_cycle
    -- Cycle semaine/année (sinus)
    , SIN((WEEK(timestamp) - 1) * (2 * PI() / 52)) AS year_week_sin_cycle
FROM records
WHERE activity_type = 'running'
ORDER BY timestamp
""").pl().tail(200)

timestamp,speed,distance,activity_date,activity_year,activity_month,activity_year_quarter,activity_hour,activity_week,day_quarter,avg_speed_10min,avg_speed_15min,avg_speed_30min,avg_speed_since_start,seconds_since_start,day_cos_cycle,day_sin_cycle,month_day_cos_cycle,month_day_sin_cycle,year_week_cos_cycle,year_week_sin_cycle
"datetime[μs, Europe/Paris]",f64,f64,date,i64,i64,i64,i64,i64,i32,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2025-11-16 11:56:25 CET,11.3508,20.611,2025-11-16,2025,11,4,11,46,2,11.492724,11.650364,11.7855,12.025866,6160.0,-0.999848,0.017452,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:56:26 CET,11.3616,20.615,2025-11-16,2025,11,4,11,46,2,11.491944,11.649764,11.785122,12.025758,6161.0,-0.999848,0.017452,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:56:27 CET,11.4084,20.6175,2025-11-16,2025,11,4,11,46,2,11.491182,11.649144,11.78475,12.025658,6162.0,-0.999848,0.017452,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:56:28 CET,11.4588,20.62,2025-11-16,2025,11,4,11,46,2,11.490498,11.648564,11.784404,12.025566,6163.0,-0.999848,0.017452,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:56:29 CET,11.4912,20.622,2025-11-16,2025,11,4,11,46,2,11.489898,11.64802,11.784086,12.025479,6164.0,-0.999848,0.017452,-1.0,1.2246e-16,0.663123,-0.748511
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2025-11-16 11:59:40 CET,11.1492,21.19,2025-11-16,2025,11,4,11,46,2,11.198424,11.371508,11.65625,11.986901,6355.0,-0.99999,0.004363,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:59:41 CET,11.0988,21.1935,2025-11-16,2025,11,4,11,46,2,11.197404,11.370552,11.656056,11.986761,6356.0,-0.99999,0.004363,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:59:42 CET,11.0592,21.197,2025-11-16,2025,11,4,11,46,2,11.196366,11.369552,11.655872,11.986615,6357.0,-0.99999,0.004363,-1.0,1.2246e-16,0.663123,-0.748511
2025-11-16 11:59:43 CET,11.0088,21.199,2025-11-16,2025,11,4,11,46,2,11.195334,11.368516,11.655704,11.986462,6358.0,-0.99999,0.004363,-1.0,1.2246e-16,0.663123,-0.748511
