In [6]:
import pandas as pd

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 1000)

### Load NHTSA and Ford AsBuilt Data for vehicles

Filtered to only include the Escape for this example

In [8]:
from nhtsa import decode_vins
from vins import load_vins

vins = load_vins(filter_comment='aviator', include_openpilot=True) \
     + load_vins(filter_comment='explorer', include_openpilot=True)
df_nhtsa = decode_vins(vins)

# Count unique values in ModelYear
print(df_nhtsa['ModelYear'].value_counts())
print(df_nhtsa['Series'].value_counts())

# Filter ModelYear
df_nhtsa = df_nhtsa[df_nhtsa.ModelYear.astype(int) >= 2019]

# Filter out Police series
df_nhtsa = df_nhtsa[~df_nhtsa['Series'].str.contains('Police', na=False)]

len(df_nhtsa)

Loaded 37 VINs (filter_comment='aviator', include_openpilot=True, skipped=61)


Downloading AsBuilt data: 100%|██████████| 1/1 [00:06<00:00,  6.56s/it]


Loaded AsBuilt data for 37 VINs
Loaded 326 VINs (filter_comment='explorer', include_openpilot=True, skipped=61)
Loaded AsBuilt data for 326 VINs


Decoding VINs: 100%|██████████| 363/363 [00:00<00:00, 966.59it/s]

Decoded 363 VINs
ModelYear
2021    151
2020     91
2022     73
2023     34
2024      7
2019      4
2017      2
2018      1
Name: count, dtype: int64
Series
Limited          144
XLT               91
Base              47
Platinum          23
Reserve           17
ST                14
Standard           9
Grand Touring      4
                   4
Police             4
Timberline         3
Livery             1
Sport              1
King Ranch         1
Name: count, dtype: int64





356

In [9]:
# Apply fixes
df_nhtsa['DisplacementL'] = df_nhtsa['DisplacementL'].apply(lambda x: round(float(x), 1) if x else None)
df_nhtsa['DriveType'] = df_nhtsa['DriveType'].apply(lambda x: '4WD' if '4WD' in x else 'RWD')

df_nhtsa.head()

Unnamed: 0,ABS,ActiveSafetySysNote,AdaptiveCruiseControl,AdaptiveDrivingBeam,AirBagLocFront,AirBagLocKnee,AirBagLocSide,AutoReverseSystem,Axles,BlindSpotMon,BodyClass,BrakeSystemType,BusType,CAN_AACN,CIB,DaytimeRunningLight,DisplacementCC,DisplacementCI,DisplacementL,Doors,DriveType,DynamicBrakeSupport,EDR,ESC,ElectrificationLevel,EngineConfiguration,EngineCylinders,EngineHP,EngineManufacturer,EngineModel,ForwardCollisionWarning,FuelInjectionType,FuelTypePrimary,FuelTypeSecondary,GVWR,KeylessIgnition,LaneCenteringAssistance,LaneDepartureWarning,LaneKeepSystem,LowerBeamHeadlampLightSource,Make,MakeID,Manufacturer,ManufacturerId,Model,ModelID,ModelYear,NCSABodyType,NCSAMake,NCSAModel,OtherEngineInfo,ParkAssist,PedestrianAutomaticEmergencyBraking,PlantCity,PlantCompanyName,PlantCountry,PlantState,RearAutomaticEmergencyBraking,RearCrossTrafficAlert,RearVisibilitySystem,SeatBeltsAll,SeatRows,Seats,SemiautomaticHeadlampBeamSwitching,Series,SteeringLocation,TPMS,TopSpeedMPH,TractionControl,TransmissionSpeeds,TransmissionStyle,Trim,Turbo,VIN,VehicleDescriptor,VehicleType,WheelBaseShort,WheelSizeFront,WheelSizeRear,Wheels
0,Standard,,,,1st Row (Driver and Passenger),1st Row (Driver and Passenger),1st and 2nd and 3rd Rows,Standard,2.0,Standard,Sport Utility Vehicle (SUV)/Multi-Purpose Vehi...,Hydraulic,Not Applicable,Standard,Standard,Standard,3000.0,183.07123228419,3.0,4,RWD,Standard,,Standard,,V-Shaped,6,400,Ford,,Standard,,Gasoline,,"Class 2E: 6,001 - 7,000 lb (2,722 - 3,175 kg)",Standard,,Standard,Standard,LED,LINCOLN,464,FORD MOTOR COMPANY,976,Aviator,2653,2020,Large utility (ANSI D16.1 Utility Vehicle Cate...,Lincoln,Aviator,Net Brake H.P.: 400 w/prem,,Standard,CHICAGO,,UNITED STATES (USA),ILLINOIS,,,Standard,Manual,3,7,Standard,Reserve,Left-Hand Drive (LHD),Direct,,Standard,,,,,5LM5J7XC9LGL15606,5LM5J7XC*LG,MULTIPURPOSE PASSENGER VEHICLE (MPV),119.1,,,4.0
1,Standard,,Optional,Optional,1st Row (Driver and Passenger),1st Row (Driver and Passenger),1st and 2nd and 3rd Rows,Standard,2.0,Standard,Sport Utility Vehicle (SUV)/Multi-Purpose Vehi...,Hydraulic,Not Applicable,Standard,Standard,Standard,3000.0,183.07123228419,3.0,4,RWD,Standard,,Standard,,V-Shaped,6,400,Ford,GTDI,Standard,Stoichiometric Gasoline Direct Injection (SGDI),Gasoline,,"Class 2E: 6,001 - 7,000 lb (2,722 - 3,175 kg)",Standard,Optional,Standard,Standard,LED,LINCOLN,464,FORD MOTOR COMPANY,976,Aviator,2653,2021,Large utility (ANSI D16.1 Utility Vehicle Cate...,Lincoln,Aviator,,Optional,Standard,CHICAGO,,UNITED STATES (USA),ILLINOIS,,Standard,Standard,Manual,3,6,Standard,Reserve,Left-Hand Drive (LHD),Direct,,Standard,10.0,Automatic,,Yes,5LM5J7XC8MGL09541,5LM5J7XC*MG,MULTIPURPOSE PASSENGER VEHICLE (MPV),119.1,20.0,20.0,4.0
2,Standard,,,,1st Row (Driver and Passenger),1st Row (Driver and Passenger),1st and 2nd and 3rd Rows,Standard,,Standard,Sport Utility Vehicle (SUV)/Multi-Purpose Vehi...,Hydraulic,Not Applicable,Standard,Standard,Standard,3000.0,183.07123228419,3.0,4,RWD,Standard,,Standard,,V-Shaped,6,400,Ford,GTDI,Standard,,Gasoline,,"Class 2E: 6,001 - 7,000 lb (2,722 - 3,175 kg)",Standard,,Standard,Standard,LED,LINCOLN,464,FORD MOTOR COMPANY,976,Aviator,2653,2022,Large utility (ANSI D16.1 Utility Vehicle Cate...,Lincoln,Aviator,,,Standard,CHICAGO,Chicago Assembly,UNITED STATES (USA),ILLINOIS,,Standard,Standard,Manual,3,7,Standard,Base,,Direct,,Standard,10.0,Automatic,,Yes,5LM5J6WC1NGL16313,5LM5J6WC*NG,MULTIPURPOSE PASSENGER VEHICLE (MPV),,,,
3,Standard,,,,1st Row (Driver and Passenger),1st Row (Driver and Passenger),1st and 2nd and 3rd Rows,Standard,2.0,Standard,Sport Utility Vehicle (SUV)/Multi-Purpose Vehi...,Hydraulic,Not Applicable,Standard,Standard,Standard,3000.0,183.07123228419,3.0,4,RWD,Standard,,Standard,,V-Shaped,6,400,Ford,,Standard,,Gasoline,,"Class 2E: 6,001 - 7,000 lb (2,722 - 3,175 kg)",Standard,,Standard,Standard,LED,LINCOLN,464,FORD MOTOR COMPANY,976,Aviator,2653,2020,Large utility (ANSI D16.1 Utility Vehicle Cate...,Lincoln,Aviator,Net Brake H.P.: 400 w/prem,,Standard,CHICAGO,,UNITED STATES (USA),ILLINOIS,,,Standard,Manual,3,7,Standard,Standard,Left-Hand Drive (LHD),Direct,,Standard,,,,,5LM5J6XC4LGL07567,5LM5J6XC*LG,MULTIPURPOSE PASSENGER VEHICLE (MPV),119.1,,,4.0
4,Standard,,,,1st Row (Driver and Passenger),1st Row (Driver and Passenger),1st and 2nd and 3rd Rows,Standard,2.0,Standard,Sport Utility Vehicle (SUV)/Multi-Purpose Vehi...,Hydraulic,Not Applicable,Standard,Standard,Standard,3000.0,183.07123228419,3.0,4,RWD,Standard,,Standard,,V-Shaped,6,400,Ford,,Standard,,Gasoline,,"Class 2E: 6,001 - 7,000 lb (2,722 - 3,175 kg)",Standard,,Standard,Standard,LED,LINCOLN,464,FORD MOTOR COMPANY,976,Aviator,2653,2020,Large utility (ANSI D16.1 Utility Vehicle Cate...,Lincoln,Aviator,Net Brake H.P.: 400 w/prem,,Standard,CHICAGO,,UNITED STATES (USA),ILLINOIS,,,Standard,Manual,3,7,Standard,Standard,Left-Hand Drive (LHD),Direct,,Standard,,,,,5LM5J6WC7LGL33162,5LM5J6WC*LG,MULTIPURPOSE PASSENGER VEHICLE (MPV),119.1,,,4.0


### Reduce to interesting columns

The NHTSA data has lots of columns, so we can filter down to only include the ones which don't change or aren't interesting to us.

In [10]:
SKIP = [
  'AirBagLocKnee',
  'AirBagLocSide',
  'DisplacementCC',
  'DisplacementCI',
  'EngineCylinders',
  'GVWR',
  'LowerBeamHeadlampLightSource',
  'Make',
  'MakeID',
  'ModelID',
  'NCSABodyType',
  'NCSAMake',
  'NCSAModel',
  'PlantCity',
  'PlantCompanyName',
  'PlantState',
  'Seats',
  'WheelSizeFront',
  'WheelSizeRear',
  'VIN',
  'VehicleDescriptor',
]

KEEP = [
  'DisplacementL',
  'FuelTypeSecondary',
  'Series',
  'Trim',
]

properties = {}

for col in df_nhtsa.columns:
  if col in SKIP:
    continue

  property_values = set(df_nhtsa[col].unique())
  if '' in property_values and col not in KEEP:
    continue
  if len(property_values) == 1:
    continue

  properties[col] = property_values

properties

{'DisplacementL': {2.3, 3.0, 3.3, 3.5},
 'DriveType': {'4WD', 'RWD'},
 'FuelTypePrimary': {'Electric', 'Gasoline'},
 'FuelTypeSecondary': {'', 'Electric', 'Gasoline'},
 'Model': {'Aviator', 'Explorer'},
 'ModelYear': {'2019', '2020', '2021', '2022', '2023', '2024'},
 'Series': {'',
  'Base',
  'Grand Touring',
  'King Ranch',
  'Limited',
  'Livery',
  'Platinum',
  'Reserve',
  'ST',
  'Standard',
  'Timberline',
  'XLT'},
 'Trim': {'', 'Base', 'Limited', 'XLT'}}

### Combine NHTSA and Ford AsBuilt Data

We fetch the factory part numbers (software and hardware) from the Ford AsBuilt data and combine it with the NHTSA data.

In [13]:
from panda.python.uds import DATA_IDENTIFIER_TYPE

from asbuilt import AsBuiltData
from ecu import FordEcu
from settings import VehicleSetting, VehicleSettings


df_fw = df_nhtsa.copy()


def get_ecu_identifier(ecu: FordEcu, identifier: int):
  def apply(row):
    data = AsBuiltData.from_vin(row['VIN'])
    if ecu not in data.ecus:
      return ''
    return data.get_identifier(ecu, identifier)
  return apply


def get_setting(setting: VehicleSetting):
  def apply(row):
    data = AsBuiltData.from_vin(row['VIN'])
    if setting.ecu not in data.ecus:
      return ''
    return data.get_setting_value(setting)
  return apply


# Drop rows that we don't care about (not in the properties)
df_fw.drop(
  columns=[col for col in df_nhtsa.columns if col not in properties and col != 'VIN'],
  inplace=True,
)


# Merge 'Series' and 'Trim' properties (they should be mutually exclusive)
if 'Trim' in properties and 'Series' in properties:
  def get_series_or_trim(row):
    series, trim = row['Series'], row['Trim']
    if series and trim:
      raise ValueError(f'{row["VIN"]} Both Series and Trim are set: {series} and {trim}')
    return series or trim


  df_fw['Series'] = df_fw.apply(get_series_or_trim, axis=1)
  df_fw.drop(columns=['Trim'], inplace=True)
  properties.pop('Trim', None)


# Add the ECU identifiers
ecus = {
  'abs': FordEcu.AntiLockBrakeSystem,
  # 'engine': FordEcu.PowertrainControlModule,
  'eps': FordEcu.PowerSteeringControlModule,
  'fwdCamera': FordEcu.ImageProcessingModuleA,
  'fwdRadar': FordEcu.CruiseControlModule,
}
for name, ecu in ecus.items():
  df_fw[f'{name}_fw'] = df_fw.apply(
    get_ecu_identifier(
      ecu, DATA_IDENTIFIER_TYPE.VEHICLE_MANUFACTURER_ECU_SOFTWARE_NUMBER
    ),
    axis=1,
  )
  df_fw[f'{name}_part'] = df_fw.apply(get_ecu_identifier(ecu, 0xF111), axis=1)


# Apply filters
constants = {
  # 'ModelYear': '2020',
  # 'DisplacementL': '1.5',
  # 'DriveType': '4x2',
  # 'EngineCylinders': '3',
  # 'Series': 'Titanium',
}
for col, value in constants.items():
  df_fw = df_fw[df_fw[col] == value]
  df_fw.drop(columns=[col], inplace=True)


# Drop columns that are all empty
df_fw = df_fw.loc[:, (df_fw != '').any(axis=0)]


# Add settings
settings = {
  # TODO: read this from multiple modules to check that it's consistent
  'acc': VehicleSettings.ipma_enable_adaptive_cruise,
  'lca': VehicleSettings.ipma_enable_traffic_jam_assist,
}
for name, setting in settings.items():
  df_fw[f'code_{name}'] = df_fw.apply(get_setting(setting), axis=1)


# Drop the VIN
df_fw.drop(columns=['VIN'], inplace=True)


# Sort by columns
extra_columns = list(set(properties.keys()) - {'ModelYear', 'Series'})

df_fw.sort_values(
  by=['ModelYear', 'Series'] + [f'code_{name}' for name in settings.keys()] + extra_columns + [f'{name}_fw' for name in ecus.keys()],
  ascending=False,
  inplace=True,
  ignore_index=True,
)


# Drop columns that are all the same
# df_fw = df_fw.loc[:, df_fw.apply(pd.Series.nunique) != 1]


# Add asterisks to column name where the value is the same for all rows
# df_fw.rename(
#   columns={
#     col: f'*{col}' if len(set(df_fw[col].unique())) == 1 else col
#     for col in df_fw.columns
#   },
#   inplace=True,
# )

# Remove duplicate rows

count = len(df_fw)
df_fw.drop_duplicates(inplace=True)
print(f'Removed {count - len(df_fw)} duplicate rows')


df_fw

Removed 214 duplicate rows


Unnamed: 0,DisplacementL,DriveType,FuelTypePrimary,FuelTypeSecondary,Model,ModelYear,Series,abs_fw,abs_part,eps_fw,eps_part,fwdCamera_fw,fwdCamera_part,fwdRadar_fw,fwdRadar_part,code_acc,code_lca
0,2.3,4WD,Gasoline,,Explorer,2024,XLT,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,,,Off,Off
4,2.3,4WD,Gasoline,,Explorer,2024,Timberline,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
5,3.0,4WD,Gasoline,,Explorer,2024,ST,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
7,2.3,RWD,Gasoline,,Explorer,2023,XLT,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
8,2.3,4WD,Gasoline,,Explorer,2023,XLT,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
9,2.3,RWD,Gasoline,,Explorer,2023,XLT,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,,,Off,Off
11,2.3,4WD,Gasoline,,Explorer,2023,XLT,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,,,Off,Off
19,3.0,4WD,Gasoline,,Explorer,2023,ST,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
23,3.0,RWD,Gasoline,,Aviator,2023,Reserve,L1MC-2D053-KB,L1MC-14F065-DA,M1MC-14D003-AC,M1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On
26,3.3,4WD,Gasoline,Electric,Explorer,2023,Platinum,L1MC-2D053-KB,L1MC-14F065-DA,P1MC-14D003-AA,P1MC-14F079-AA,LB5T-14F397-AF,LB5T-14F403-CA,LB5T-14D049-AB,LB5T-14F089-AA,RadarFusion,On


In [37]:
df_fw_analysis = df_fw.copy()

SORTED_TRIM_LEVELS = [
  'Base',
  'XL',
  'XLT',
  'Limited',
  'ST',
  'Platinum',
  'Timberline',
  'King Ranch',
  # Aviator
  'Standard',
  'Reserve',
  'Livery',
  'Grand Touring',
]

def sort_trim_level(trim):
  if trim in SORTED_TRIM_LEVELS:
    return SORTED_TRIM_LEVELS.index(trim)
  return 1000

for ecu in reversed(ecus.keys()):
  print()
  print(f'# Ecu.{ecu}')

  fw_groups = df_fw_analysis \
    .drop(
      columns=[f'{name}_fw' for name in ecus.keys() if name != ecu] + [f'{name}_part' for name in ecus.keys() if name != ecu],
    ) \
    .rename(
      columns={
        f'{ecu}_fw': 'fw',
        f'{ecu}_part': 'part',
      },
    ) \
    .groupby(
      by=['part', 'fw'],
      dropna=False,
    )
  # print(fw_groups.nunique().to_string())

  if len(fw_groups) > 1:
    for name, group in fw_groups:
      print(name)
      # print(group.groupby(
      #   by=['ModelYear', 'Model', 'Series'] + list(set(group.columns) - {'ModelYear', 'Model', 'Series', 'code_acc', 'code_lca', 'part', 'fw'}) + ['code_acc', 'code_lca'],
      #   dropna=False,
      # ).count().to_string())

      drive_type = group["DriveType"].unique()
      fuel_type_secondary = group["FuelTypeSecondary"].unique()
      code_acc = group["code_acc"].unique()

      print(f'  Model        : {group["Model"].unique()}')
      print(f'  ModelYear    : {sorted(group["ModelYear"].unique())}')
      print(f'  Series       : {", ".join(sorted(group["Series"].unique(), key=sort_trim_level))}')
      print(f'  DisplacementL: {", ".join(map(str, sorted(group["DisplacementL"].unique())))}')
      print(f'  4WD?         : {"Both" if "4WD" in drive_type and "RWD" in drive_type else "Yes" if "4WD" in drive_type else "No"}')
      print(f'  Hybrid?      : {"Both" if "Electric" in fuel_type_secondary and ("Gasoline" in fuel_type_secondary or "" in fuel_type_secondary) else "Yes" if "Electric" in fuel_type_secondary else "No"}')
      print(f'  ACC?         : {"Both" if "RadarFusion" in code_acc and ("Off" in code_acc or "" in code_acc) else "Yes" if "RadarFusion" in code_acc else "No"}')

      print()

  print()



# Ecu.fwdRadar
('', '')
  Model        : ['Explorer' 'Aviator']
  ModelYear    : ['2019', '2020', '2021', '2022', '2023', '2024']
  Series       : Base, XLT, Standard, Reserve, Livery, Grand Touring, 
  DisplacementL: 2.3, 3.0, 3.3, 3.5
  4WD?         : Both
  Hybrid?      : No
  ACC?         : No

('LB5T-14F089-AA', 'LB5T-14D049-AB')
  Model        : ['Explorer' 'Aviator']
  ModelYear    : ['2020', '2021', '2022', '2023', '2024']
  Series       : XLT, Limited, ST, Platinum, Timberline, King Ranch, Reserve, Grand Touring
  DisplacementL: 2.3, 3.0, 3.3
  4WD?         : Both
  Hybrid?      : Both
  ACC?         : Yes



# Ecu.fwdCamera
('', '')
  Model        : ['Explorer']
  ModelYear    : ['2019']
  Series       : 
  DisplacementL: 2.3, 3.5
  4WD?         : Both
  Hybrid?      : No
  ACC?         : No

('LB5T-14F403-CA', 'LB5T-14F397-AD')
  Model        : ['Explorer' 'Aviator']
  ModelYear    : ['2020']
  Series       : XLT, Limited, ST, Platinum, Standard
  DisplacementL: 2.3, 3.0
  