In [14]:
import pandas as pd
import numpy as np
import json
from datetime import datetime, timedelta

# Load datasets
syn_data = json.load(open('artificial_ign_off_data.json', 'r'))
map_df = pd.read_csv('vehicle_pnid_mapping.csv')
trg_df = pd.read_csv('triggers_soc.csv')
tlm_df = pd.read_csv('telemetry_data.csv')

print(f"Loaded: SYN={len(syn_data)}, MAP={len(map_df)}, TRG={len(trg_df)}, TLM={len(tlm_df)}")

Loaded: SYN=411, MAP=19, TRG=68670, TLM=1833552


In [26]:
def data_quality_check():
    """Simple but effective data quality assessment"""

    print("=== DATA QUALITY ASSESSMENT ===")

    # Check TRG structure (main events source)
    print(f"TRG columns: {list(trg_df.columns)}")
    print(f"TRG event types: {trg_df['NAME'].value_counts().to_dict()}")

    # Check TLM data sparsity
    tlm_missing = tlm_df['IGNITION_STATUS'].isna().sum() / len(tlm_df) * 100
    print(f"TLM ignition data missing: {tlm_missing:.1f}%")

    # Check vehicle mappings
    mapped_vehicles = map_df[map_df['IDS'].notna() & (map_df['IDS'] != '[]')]
    print(f"Vehicle mappings available: {len(mapped_vehicles)}/{len(map_df)}")

    return None

data_quality_check()

=== DATA QUALITY ASSESSMENT ===
TRG columns: ['Unnamed: 0', 'CTS', 'PNID', 'NAME', 'VAL']
TRG event types: {'CHARGE_STATE': 31367, 'IGN_CYL': 30880, 'EV_CHARGE_STATE': 6423}
TLM ignition data missing: 87.5%
Vehicle mappings available: 13/19


In [16]:
def extract_trg_ignition():
    """Extract ignition from TRG IGN_CYL events"""

    ignition_events = []

    # Filter TRG for ignition events
    trg_ign = trg_df[trg_df['NAME'] == 'IGN_CYL']

    for _, row in trg_ign.iterrows():
        # Clean timestamp
        clean_time = pd.to_datetime(row['CTS'].replace(' IST+0530', ''))

        # Map ignition state
        event_type = 'ignitionOn' if row['VAL'] == 'ON' else 'ignitionOff'

        ignition_events.append({
            'vehicle_id': None,  # No vehicle mapping available for TRG
            'event': event_type,
            'event_ts': clean_time,
            'source': 'TRG',
            'pnid': row['PNID']
        })

    return pd.DataFrame(ignition_events)

trg_ignition = extract_trg_ignition()
print(f"TRG ignition events: {len(trg_ignition)}")

TRG ignition events: 30880


In [17]:
def extract_tlm_ignition():
    """Extract ignition state changes from TLM"""

    ignition_events = []

    # Get TLM data with valid ignition status
    tlm_valid = tlm_df[tlm_df['IGNITION_STATUS'].notna()]

    for vehicle_id in tlm_valid['VEHICLE_ID'].unique():
        vehicle_data = tlm_valid[tlm_valid['VEHICLE_ID'] == vehicle_id].sort_values('TIMESTAMP')

        # Detect state changes
        vehicle_data['prev_status'] = vehicle_data['IGNITION_STATUS'].shift(1)
        changes = vehicle_data[
            (vehicle_data['IGNITION_STATUS'] != vehicle_data['prev_status']) &
            (vehicle_data['prev_status'].notna())
        ]

        for _, change in changes.iterrows():
            event_type = 'ignitionOn' if change['IGNITION_STATUS'] == 'on' else 'ignitionOff'

            ignition_events.append({
                'vehicle_id': vehicle_id,
                'event': event_type,
                'event_ts': pd.to_datetime(change['TIMESTAMP']),
                'source': 'TLM'
            })

    return pd.DataFrame(ignition_events)

tlm_ignition = extract_tlm_ignition()
print(f"TLM ignition events: {len(tlm_ignition)}")

TLM ignition events: 9409


In [18]:
def extract_syn_ignition():
    """Extract ground truth ignition-off events"""

    ignition_events = []

    for event in syn_data:
        ignition_events.append({
            'vehicle_id': event['vehicleId'],
            'event': 'ignitionOff',  # All SYN are ignition-off
            'event_ts': pd.to_datetime(event['timestamp']),
            'source': 'SYN'
        })

    return pd.DataFrame(ignition_events)

syn_ignition = extract_syn_ignition()
print(f"SYN ignition events: {len(syn_ignition)}")

SYN ignition events: 411


In [19]:
# Combine all ignition events
all_ignition_events = pd.concat([trg_ignition, tlm_ignition, syn_ignition], ignore_index=True)
print(f"Total ignition events: {len(all_ignition_events)}")

Total ignition events: 40700


In [20]:
def extract_charging_status():
    """Extract Active/Abort/Completed events from TRG"""

    charging_events = []

    # Filter TRG for charging status events
    trg_charge = trg_df[trg_df['NAME'] == 'EV_CHARGE_STATE']

    for _, row in trg_charge.iterrows():
        # Clean timestamp
        clean_time = pd.to_datetime(row['CTS'].replace(' IST+0530', ''))

        # Map charging status
        if row['VAL'] == 'Complete':
            event = 'Completed'
        elif row['VAL'] == 'Aborted':
            event = 'Abort'
        elif row['VAL'] == 'Active':
            event = 'Active'
        else:
            continue

        charging_events.append({
            'vehicle_id': None,  # No vehicle mapping
            'event': event,
            'event_ts': clean_time,
            'pnid': row['PNID'] # Will be changed
        })

    return pd.DataFrame(charging_events)

charging_status_events = extract_charging_status()
print(f"Charging status events: {len(charging_status_events)}")

Charging status events: 6423


In [21]:
def associate_battery_levels(events_df, window_seconds=300):
    """Find closest battery reading within ±300 seconds"""

    # Get battery readings from TRG
    battery_readings = trg_df[trg_df['NAME'] == 'CHARGE_STATE'].copy()
    battery_readings['timestamp'] = pd.to_datetime(
        battery_readings['CTS'].str.replace(' IST+0530', '')
    )
    battery_readings['battery_level'] = pd.to_numeric(
        battery_readings['VAL'], errors='coerce'
    )
    battery_readings = battery_readings[battery_readings['battery_level'].notna()]

    events_with_battery = []

    for _, event in events_df.iterrows():
        event_time = pd.to_datetime(event['event_ts'])
        event_pnid = event.get('pnid', None)

        # Match by PNID if available, otherwise use all readings
        if event_pnid:
            relevant_batteries = battery_readings[battery_readings['PNID'] == event_pnid]
        else:
            relevant_batteries = battery_readings

        if len(relevant_batteries) == 0:
            continue

        # Calculate time differences
        time_diffs = abs((relevant_batteries['timestamp'] - event_time).dt.total_seconds())

        # Find readings within window
        within_window = relevant_batteries[time_diffs <= window_seconds]

        if len(within_window) > 0:
            # Get closest reading (tie-breaker: first occurrence)
            closest_idx = time_diffs[time_diffs <= window_seconds].idxmin()
            closest_battery = battery_readings.loc[closest_idx]

            event_with_battery = event.copy()
            event_with_battery['battery_level'] = closest_battery['battery_level']
            event_with_battery['battery_time_diff'] = time_diffs[closest_idx]
            events_with_battery.append(event_with_battery)

    return pd.DataFrame(events_with_battery)

# Associate battery levels
ignition_with_battery = associate_battery_levels(all_ignition_events)
charging_with_battery = associate_battery_levels(charging_status_events)

print(f"Ignition events with battery: {len(ignition_with_battery)}")
print(f"Charging events with battery: {len(charging_with_battery)}")

Ignition events with battery: 28845
Charging events with battery: 5709


In [22]:
def detect_charging_events():
    """Detect meaningful charging events using business logic"""

    charging_events = []

    # Group charging status by PNID to track sessions
    for pnid in charging_status_events['pnid'].unique():
        pnid_events = charging_status_events[
            charging_status_events['pnid'] == pnid
        ].sort_values('event_ts')

        active_start = None

        for _, event in pnid_events.iterrows():
            if event['event'] == 'Active' and active_start is None:
                active_start = event

            elif event['event'] in ['Completed', 'Abort'] and active_start is not None:
                duration_minutes = (event['event_ts'] - active_start['event_ts']).total_seconds() / 60

                # Business logic: Real charging = Completed + >5 minutes
                if event['event'] == 'Completed' and duration_minutes > 5:
                    charging_events.append({
                        'vehicle_id': None,
                        'start_ts': active_start['event_ts'],
                        'end_ts': event['event_ts'],
                        'delta_battery_pct': 'unknown',  # Would need before/after levels
                        'ignition_state': 'unknown',
                        'session_quality': 'successful',
                        'duration_minutes': duration_minutes,
                        'pnid': pnid
                    })

                active_start = None

    return pd.DataFrame(charging_events)

charging_events = detect_charging_events()
print(f"Real charging events: {len(charging_events)}")

Real charging events: 236


In [23]:
# Create final IgnitionEvents.csv
ignition_final = all_ignition_events[['vehicle_id', 'event', 'event_ts']].copy()
ignition_final.to_csv('IgnitionEvents.csv', index=False)

# Create final ChargingEvents.csv
charging_final = charging_events[['vehicle_id', 'start_ts', 'end_ts', 'session_quality']].copy()
charging_final.to_csv('ChargingEvents.csv', index=False)

print("✅ Exported:")
print(f"   - IgnitionEvents.csv: {len(ignition_final)} events")
print(f"   - ChargingEvents.csv: {len(charging_final)} events")

✅ Exported:
   - IgnitionEvents.csv: 40700 events
   - ChargingEvents.csv: 236 events


In [24]:
def generate_key_insights():
    """Generate business insights for presentation"""

    print("=== KEY BUSINESS INSIGHTS ===")

    # Ignition analysis
    print(f"🔥 IGNITION EVENTS:")
    print(f"   - Total: {len(all_ignition_events)}")
    print(f"   - TRG (sensor): {len(trg_ignition)} events")
    print(f"   - TLM (vehicle): {len(tlm_ignition)} events")
    print(f"   - SYN (ground truth): {len(syn_ignition)} events")

    # Charging analysis
    if len(charging_status_events) > 0:
        total_attempts = len(charging_status_events) // 3  # Rough estimate
        success_rate = len(charging_events) / total_attempts * 100 if total_attempts > 0 else 0

        print(f"🔋 CHARGING ANALYSIS:")
        print(f"   - Status events: {len(charging_status_events)}")
        print(f"   - Successful sessions: {len(charging_events)}")
        print(f"   - Success rate: {success_rate:.1f}%")

    # Data quality insights
    battery_coverage = len(ignition_with_battery) / len(all_ignition_events) * 100
    print(f"📊 DATA QUALITY:")
    print(f"   - Events with battery data: {battery_coverage:.1f}%")
    print(f"   - Main challenge: Vehicle mapping gaps")

generate_key_insights()

=== KEY BUSINESS INSIGHTS ===
🔥 IGNITION EVENTS:
   - Total: 40700
   - TRG (sensor): 30880 events
   - TLM (vehicle): 9409 events
   - SYN (ground truth): 411 events
🔋 CHARGING ANALYSIS:
   - Status events: 6423
   - Successful sessions: 236
   - Success rate: 11.0%
📊 DATA QUALITY:
   - Events with battery data: 70.9%
   - Main challenge: Vehicle mapping gaps
