# Exploratory Data Analysis of Heavy Truck J1939 data

Ce notebook va servir de base dans l'analyse des données du `Heavy Truck`. Ces données reprennent une payload que nous pouvons retrouver avec le protocole **bus CAN J1939**.

**Payload d'un message sous protocole J1939:**

![payload j1939](./images/j1939-payload.png)

:warning: Attention à bien écouter Vendredi 26 la présentation de `Simon Bellemare` sur les données **bus CAN J1939** !

# Import libraries

In [None]:
from canlock.decoder import SessionDecoder
from canlock.db.database import init_db, get_session

In [None]:
init_db()

with get_session() as session:
    decoder = SessionDecoder(db=session)
    sessions = decoder.list_sessions()

In [None]:
sessions[12].id

In [None]:
df_session = decoder.decode(session_id=sessions[12].id)

In [None]:
df_session

## Imports

In [None]:
from sqlmodel import Session, select
from tqdm.auto import tqdm

from canlock.db.database import get_session, init_db
from canlock.db.models import (AnalogAttributes, CanMessage, Ecu,
                               PgnDefinition, Session, SpnDefinition, Vehicle)

from canlock.decoder import SessionDecoder

In [None]:
SessionDecoder.list_sessions()

1ère étape: récupérons toutes les sessions

In [None]:
init_db()

with get_session() as session:
    sessions = session.exec(select(Session)).all()
    print(f"Number of sessions: {len(sessions)}")

2ème étape: sélectionnons une session

In [None]:
session_durations = [(i, (sess.end_time - sess.start_time).total_seconds() / 60) for i, sess in enumerate(sessions)]
[(sess_id, duration) for sess_id, duration in session_durations if duration <= 10 and duration > 0]

Prenons la session 205 qui a une durée d'environ 7 minutes, ce qui est largement suffisant pour nos tests.

In [None]:
with get_session() as session:
    can_messages = session.exec(
        select(CanMessage)
        .where(CanMessage.session_id == sessions[205].id)
        .order_by(CanMessage.timestamp)
    ).all()
    
print(f"Nombre de messages CAN disponible durant cette session: {len(can_messages)}")
can_messages[:5]

Cela nous fait environ 400000 messages à décoder, ce qui est amplement suffisant.

Pour récupérer les valeurs, il faut dans un premier temps, identifier le bon PGN. Puis avec le PGN, nous devons récupérer les SPNs qui sont associés et décoder les valeurs par rapports aux colonnes données.

https://gemini.google.com/share/630f89813590

In [None]:
def extract_pgn_number_from_payload(identifier: int) -> int:
    binary_identifier = bin(identifier)[2:].zfill(29)
    pgn_identifier = binary_identifier[3:21]
    pgn_integer = int(pgn_identifier, 2)
    
    return pgn_integer

In [None]:
pgns_from_messages = []
pgns_definitions_from_messages = []
spns_defintions_from_messages = []
spns_analogic_rules = []
for message in tqdm(can_messages):
    extracted_pgn = extract_pgn_number_from_payload(message.can_identifier)
    pgns_from_messages.append(extract_pgn_number_from_payload(message.can_identifier))
    
    with get_session() as session:
        pgn_definition = session.exec(select(PgnDefinition).where(PgnDefinition.pgn_identifier == extracted_pgn)).first()
        pgns_definitions_from_messages.append(pgn_definition)
        
        if pgn_definition is None:
            spns_defintions_from_messages.append(None)
        else:
            extracted_spns = session.exec(
                select(SpnDefinition)
                .where(SpnDefinition.pgn_id == pgn_definition.id)
            ).all()
            spns_defintions_from_messages.append(extracted_spns)
            spns_analogic_rules.append(
                [spn.analog_attributes for spn in extracted_spns]
            )

In [None]:
example_spn = spns_defintions_from_messages[1][0]

In [None]:
example_spn.bit_start

In [None]:
example_spn.bit_length

In [None]:
example_spn.is_analog

In [None]:
def extract_spn_bits_from_payload(spn: SpnDefinition, payload: str) -> int:
    int_payload = int(payload.hex(), 16)
    binary_payload = bin(int_payload)[2:].zfill(64)
    bit_start = spn.bit_start
    bit_length = spn.bit_length
    
    return int(binary_payload[bit_start:bit_start+bit_length], 2)

In [None]:
value = extract_spn_bits_from_payload(example_spn, can_messages[1].payload)

In [None]:
def extract_values_from_spns(spn_list: list[SpnDefinition], analogic_rules: list[AnalogAttributes], payload: str) -> list[float]:
    spn_values = []
    for spn, analog_attr in zip(spn_list, analogic_rules):
        spn_pre_val = extract_spn_bits_from_payload(spn, payload)
        spn_val = analog_attr.scale * spn_pre_val + analog_attr.offset
        
        spn_values.append(spn_val)
    return spn_values

In [None]:
extract_values_from_spns(
    spns_defintions_from_messages[1],
    spns_analogic_rules[1],
    can_messages[1].payload,
)

In [None]:
init_db()  # Ensure tables are created

with get_session() as session:
    # Fetch all vehicles
    vehicles = session.exec(select(Vehicle)).all()
    
    # Fetch 100 CAN messages for example
    canmessages = session.exec(select(CanMessage).limit(100)).all()

In [None]:
vehicles

In [None]:
canmessages

In [None]:
canmessages[0].can_identifier

# Convert integer to 29 bits string
binary_identifier = bin(canmessages[0].can_identifier)[2:].zfill(29)
pgn_identifier = binary_identifier[3:21] # Extract bits 3 to 20

# Convert PGN identifier to integer
pgn_integer = int(pgn_identifier, 2)

# SQLmodel query to table PGNDefinition with pgn_integer key value
with get_session() as session:
    pgn_definition = session.exec(select(PgnDefinition).where(PgnDefinition.pgn_identifier == pgn_integer)).first()

In [None]:
pgn_identifier

In [None]:
pgn_integer

In [None]:
pgn_definition.name

In [None]:
canmessages[0].can_identifier

# Convert integer to 29 bits string
binary_identifier = bin(canmessages[0].can_identifier)[2:].zfill(29)
address_identifier = binary_identifier[21:] # Extract bits 21 to the end

# Convert Address identifier to integer
address_integer = int(address_identifier, 2)

# SQLmodel query to table Ecu with pgn_integer key value
with get_session() as session:
    ecu_result = session.exec(select(Ecu).where(Ecu.address == address_integer)).first()

In [None]:
ecu_result.name

In [None]:
with get_session() as session:
    all_spn_definitions = session.exec(select(SpnDefinition)).all()
    all_pgn_definitions = session.exec(select(PgnDefinition)).all()

In [None]:
len(all_spn_definitions)

In [None]:
len(all_pgn_definitions)

In [None]:
all_spn_names = [spn_def.name for spn_def in all_spn_definitions]
all_pgn_names = [pgn_def.name for pgn_def in all_pgn_definitions]

In [None]:
set(all_spn_names).intersection(set(all_pgn_names))

In [None]:
all_spn_definitions

In [None]:
all_pgn_definitions

In [None]:
# Convert payload to 64-bits binary string
binary_payload = bin(int.from_bytes(canmessages[7].payload, byteorder='big'))[2:].zfill(64)
binary_payload

In [None]:
binary_payload[4:12]

In [None]:
binary_payload[24:40]

In [None]:
int(binary_payload[24:40], 2)

In [None]:
with get_session() as session:
    spn_definition = session.exec(select(SpnDefinition).where(SpnDefinition.spn_identifier == int(binary_payload[24:40], 2))).first()
    analog_definition = spn_definition.analog_attributes

In [None]:
analog_definition

In [None]:
binary_payload[8:16]