In [2]:
# Imports et initialisation
import pandas as pd
import numpy as np
import plotly.express as px
from sqlmodel import select
from collections import defaultdict, Counter
import math
from sqlalchemy import func

from canlock.db.database import get_session, init_db
from canlock.db.models import SpnDefinition, AnalogAttributes, DefinedDigitalValues, Session, CanMessage, PgnDefinition
from canlock.decoder import SessionDecoder

# Initialise les tables si nécessaire (ne fait rien si déjà créées)
init_db()

In [3]:
# 1) Proportion SPN analogique vs catégorique (définitions)
with get_session() as session:
    spns = session.exec(select(SpnDefinition)).all()
    total_spns = len(spns)
    analog_count = sum(1 for s in spns if s.is_analog)
    digital_count = total_spns - analog_count

df_prop = pd.DataFrame({'type': ['analog', 'digital'], 'count': [analog_count, digital_count]})
df_prop['proportion'] = df_prop['count'] / df_prop['count'].sum()
print(f'Total SPN definitions: {total_spns}')
display(df_prop)
fig = px.bar(df_prop, x='type', y='count', text='proportion', title='Proportion SPN: analog vs digital')
fig.show()

Total SPN definitions: 13313


Unnamed: 0,type,count,proportion
0,analog,13313,1.0
1,digital,0,0.0


In [None]:
# Paramètres de parcours par lots (adapter selon ressources)
BATCH_SIZE = 100000
# Pour un run rapide de test, réduire MAX_BATCHES. Mettre None pour tout parcourir.
MAX_BATCHES = None

# 2) Proportion de PGN == 60416 (parcours par lots)
TARGET_PGN = 60416
offset = 0
total_msgs = 0
target_count = 0
batch_index = 0
with get_session() as session:
    while True:
        if MAX_BATCHES is not None and batch_index >= MAX_BATCHES:
            break
        q = select(CanMessage).offset(offset).limit(BATCH_SIZE)
        batch = session.exec(q).all()
        if not batch:
            break
        for m in batch:
            total_msgs += 1
            if m.can_identifier is None:
                continue
            try:
                pgn = SessionDecoder.extract_pgn_number_from_payload(m.can_identifier)
            except Exception:
                continue
            if pgn == TARGET_PGN:
                target_count += 1
        offset += BATCH_SIZE
        batch_index += 1
        if batch_index % 10 == 0:
            print(f'Processed {batch_index} batches ({offset} messages)')

print(f'PGN {TARGET_PGN}: {target_count} occurrences out of {total_msgs} messages ({(target_count/total_msgs if total_msgs else 0):.6f})')

Processed 10 batches (1000000 messages)
Processed 20 batches (2000000 messages)
Processed 30 batches (3000000 messages)
Processed 40 batches (4000000 messages)
Processed 50 batches (5000000 messages)
Processed 60 batches (6000000 messages)
Processed 70 batches (7000000 messages)
Processed 80 batches (8000000 messages)
Processed 90 batches (9000000 messages)
Processed 100 batches (10000000 messages)


In [None]:
# 3) Liste de tous les SPN (noms) et metadata
with get_session() as session:
    spn_defs = session.exec(select(SpnDefinition)).all()
df_spn_list = pd.DataFrame([{'spn_id': s.id, 'spn_name': s.name, 'is_analog': s.is_analog, 'bit_length': s.bit_length} for s in spn_defs])
display(df_spn_list.sort_values(['is_analog','spn_name']))

# Top SPN names frequency by definition (just count how many named vs unnamed)
print('Named SPNs:', df_spn_list['spn_name'].notna().sum(), ' / ', len(df_spn_list))

In [None]:
# 4) Statistiques (moyenne, variance) pour SPN analogiques observés en parcourant les messages
# Welford algorithm for online mean/variance
def welford_update(state, x):
    n, mean, M2 = state
    n += 1
    delta = x - mean
    mean += delta / n
    delta2 = x - mean
    M2 += delta * delta2
    return (n, mean, M2)

,
1