In [2]:
import pandas as pd

df = pd.read_csv('2022-autonomous-vehicle-disengagement-reports-csv.csv', encoding='ISO-8859-1')
print(df.columns)


Index(['Manufacturer', 'Permit Number', 'DATE', 'VIN NUMBER',
       'VEHICLE IS CAPABLE OF OPERATING WITHOUT A DRIVER\n(Yes or No)',
       'DRIVER PRESENT\n(Yes or No)',
       'DISENGAGEMENT INITIATED BY\n(AV System, Test Driver, Remote Operator, or Passenger)',
       'DISENGAGEMENT\nLOCATION\n(Interstate, Freeway, Highway, Rural Road, Street, or Parking Facility)',
       'DESCRIPTION OF FACTS CAUSING DISENGAGEMENT'],
      dtype='object')


In [3]:
#Summary of disengagements by Manufacturer
manufacturer_summary = df.groupby('Manufacturer').size()
print("Disengagements by Manufacturer:\n", manufacturer_summary, "\n")

Disengagements by Manufacturer:
 Manufacturer
AIMOTIVE INC.                                                680
APPLE INC.                                                  5982
ARGO AI, LLC                                                   1
AUTOX TECHNOLOGIES, INC                                        1
CRUISE LLC                                                     9
DIDI RESEARCH AMERICA, LLC                                     2
GATIK AI INC                                                   3
GHOST AUTONOMY INC                                           448
IMAGRY INC                                                   204
INTEL CORPORATION                                            155
MERCEDES-BENZ RESEARCH & DEVELOPMENT NORTH AMERICA, INC.      38
MOTIONAL AD, INC.                                            135
NISSAN NORTH AMERICA, INC DBA ALLIANCE INNOVATION LAB          8
NURO, INC                                                     15
NVIDIA CORPORATION                          

In [4]:
#Percentage of vehicles capable of operating without a driver
capable_vehicles = df[df['VEHICLE IS CAPABLE OF OPERATING WITHOUT A DRIVER\n(Yes or No)'].str.upper() == 'YES'].shape[0]
total_vehicles = df.shape[0]
percentage_capable = (capable_vehicles / total_vehicles) * 100

print(f"Percentage of Vehicles Capable of Operating Without a Driver: {percentage_capable:.2f}%\n")



Percentage of Vehicles Capable of Operating Without a Driver: 0.04%



In [16]:
# Renamed column
df = df.rename(columns={
    'DISENGAGEMENT INITIATED BY\n(AV System, Test Driver, Remote Operator, or Passenger)': 'DisengagementBy'
})



#Percentage of disengagements initiated by each entity
disengagement_by_entity = df['DisengagementBy'].value_counts(normalize=True) * 100
print("Disengagements Initiated By (in %):\n", disengagement_by_entity, "\n")

Disengagements Initiated By (in %):
 Test Driver    80.294547
AV System      11.355891
Test driver     8.300876
ADS             0.036514
AV system       0.012171
Name: DisengagementBy, dtype: float64 



In [6]:
import re

def extract_info(description):
    event_pattern = r"(.*?)[.]\s*Root cause:"
    root_cause_pattern = r"Root cause:\s*(.*?)[.]\s*Conditions:"
    conditions_pattern = r"Conditions:\s*(.*?)[.]"

    event = re.search(event_pattern, description)
    root_cause = re.search(root_cause_pattern, description)
    conditions = re.search(conditions_pattern, description)

    return event.group(1) if event else None, root_cause.group(1) if root_cause else None, conditions.group(1) if conditions else None


df['Event'], df['Root Cause'], df['Conditions'] = zip(*df['DESCRIPTION OF FACTS CAUSING DISENGAGEMENT'].apply(extract_info))

print(df['Root Cause'].value_counts())


object detection issue                        225
traffic situation                             124
Navigation/Localisation issue                  82
lane detection issue                           37
bad lane detection in exit/merge lane          15
limited control actuation, detection issue      4
object or lane detection issue                  1
Name: Root Cause, dtype: int64


In [7]:
#Traffic situtation subset
traffic_situation_descriptions = df[df['Root Cause'] == 'traffic situation']['DESCRIPTION OF FACTS CAUSING DISENGAGEMENT']
print(traffic_situation_descriptions.sample(10))


172    The test vehicle could not perform the lane ch...
170    The test vehicle could not perform the lane ch...
629    The test vehicle could not perform the lane ch...
311    The test vehicle could not perform the lane ch...
443    The test vehicle could not perform the lane ch...
396    The test vehicle could not perform the lane ch...
211    The test vehicle could not perform the lane ch...
315    The test vehicle could not perform the lane ch...
428    The test vehicle could not perform the lane ch...
218    The test vehicle could not perform the lane ch...
Name: DESCRIPTION OF FACTS CAUSING DISENGAGEMENT, dtype: object


In [8]:
traffic_situation_events = df[df['Root Cause'] == 'traffic situation']['Event']
print(traffic_situation_events.value_counts().head(10))

traffic_situation_conditions = df[df['Root Cause'] == 'traffic situation']['Conditions']
print(traffic_situation_conditions.value_counts().head(10))


The test vehicle could not perform the lane change into the exit lane or merge to highway and could not follow the planned route    124
Name: Event, dtype: int64
Non-inclement weather, dry roads, no other factors involved    124
Name: Conditions, dtype: int64


In [25]:
from fuzzywuzzy import process


common_causes = [
    "object detection issue",
    "traffic situation",
    "Navigation/Localisation issue",
    "lane detection issue",
    "bad lane detection in exit/merge lane",
    "limited control actuation, detection issue",
    "object or lane detection issue"
]

# Function to classify description into one of the common causes
def classify_description(desc):
    match, score = process.extractOne(desc, common_causes)
    if score > 80:  
        return match
    return "Other"

df['Cause Category'] = df['DESCRIPTION OF FACTS CAUSING DISENGAGEMENT'].apply(classify_description)  # Replace 'DescriptionColumnName' with actual column name


print(df['Cause Category'].value_counts())


bad lane detection in exit/merge lane         4443
object detection issue                        1263
limited control actuation, detection issue    1167
Other                                          856
Navigation/Localisation issue                  335
lane detection issue                           110
traffic situation                               41
object or lane detection issue                   1
Name: Cause Category, dtype: int64


In [28]:
#Percentages of each category
percentages = (counts / counts.sum()) * 100

print(percentages)


bad lane detection in exit/merge lane         54.077410
object detection issue                        15.372444
limited control actuation, detection issue    14.203992
Other                                         10.418695
Navigation/Localisation issue                  4.077410
lane detection issue                           1.338851
traffic situation                              0.499026
object or lane detection issue                 0.012171
Name: Cause Category, dtype: float64


In [29]:
from fuzzywuzzy import process


common_causes = [
    "Unnecessary lane change",
    "test vehicle could have got too close to another vehicle",
    "SW couldnt perform the manuever safely",
    "The test vehicle attepted to lane change to shoulder/express lane",
    "During a lane change a faster car approached in the target lane",
    "limited control actuation, detection issue",
    "object or lane detection issue"
]

# Function to classify description into one of the common causes
def classify_description(desc):
    match, score = process.extractOne(desc, common_causes)
    if score > 80:  # You can adjust this threshold based on your needs
        return match
    return "Other"


df['Cause Category'] = df['DESCRIPTION OF FACTS CAUSING DISENGAGEMENT'].apply(classify_description)  # Replace 'DescriptionColumnName' with actual column name

# Print counts of each category
print(df['Cause Category'].value_counts())

test vehicle could have got too close to another vehicle             7293
Other                                                                 478
Unnecessary lane change                                               155
The test vehicle attepted to lane change to shoulder/express lane     128
limited control actuation, detection issue                             69
SW couldnt perform the manuever safely                                 44
During a lane change a faster car approached in the target lane        40
object or lane detection issue                                          9
Name: Cause Category, dtype: int64
