In [1]:
import pandas as pd
import numpy as np

# These CSV files are from the NHTSA crash data available at thier website. 
# The link is provided in the README.md file.
accident = pd.read_csv(r"../Primary_Sources/accident.csv")
person = pd.read_csv(r"../Primary_Sources/person.csv")
vehicle = pd.read_csv(r"../Primary_Sources/vehicle.csv")

merged = pd.merge(vehicle, accident)
print(merged)

  vehicle = pd.read_csv(r"../Primary_Sources/vehicle.csv")


       STATE STATENAME  ST_CASE  VEH_NO  VE_FORMS  MONTH MONTHNAME  DAY  \
0          1   Alabama    10001       1         1      1   January    4   
1          1   Alabama    10002       1         1      3     March   20   
2          1   Alabama    10003       1         1      3     March   18   
3          1   Alabama    10004       1         1      3     March   17   
4          1   Alabama    10005       1         1      3     March   17   
...      ...       ...      ...     ...       ...    ...       ...  ...   
58314     56   Wyoming   560118       1         1     12  December   26   
58315     56   Wyoming   560119       1         1     12  December   27   
58316     56   Wyoming   560120       1         2     12  December   29   
58317     56   Wyoming   560120       2         2     12  December   29   
58318     56   Wyoming   560121       1         1     12  December   31   

       DAYNAME  HOUR  ...          NOT_MINNAME  ARR_HOUR  \
0            4    22  ...              

In [None]:
# --- FINAL SCRIPT - RUN THIS IN A CLEAN NOTEBOOK ---

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# --- 1. ROBUST DATA LOADING ---
# This method builds the path from your script's location, making it reliable.
try:
    script_dir = Path(__file__).parent
except NameError:
    # Fallback for interactive environments like some notebooks
    script_dir = Path.cwd()

data_folder = script_dir / "Primary_Sources"
accident_path = data_folder / "accident.csv"
vehicle_path = data_folder / "vehicle.csv"

print(f"Attempting to load data from: {data_folder}")
try:
    accident = pd.read_csv(accident_path)
    vehicle = pd.read_csv(vehicle_path, low_memory=False)
    print("Files loaded successfully.")
except FileNotFoundError:
    print(f"ERROR: Could not find files in {data_folder}. Please check your folder structure.")
    # Exit if files aren't found, no need to proceed
    exit()

# --- 2. PREPROCESSING (Correct Order) ---
# Be explicit with the merge key
merged = pd.merge(vehicle, accident, on='ST_CASE', how='inner')

# Filter for light-duty vehicles using the code we confirmed is correct.
light_vehicles_df = merged[merged['GVWR_FROM'] == 11].copy()

# Define the body type map
body_type_map = {
    1: 'Convertible', 2: 'Minivan', 3: 'Coupe',
    5: 'Hatchback', 7: 'SUV/MPV', 8: 'CUV',
    10: 'Roadster', 13: 'Sedan/Saloon', 60: 'Pickup'
}

# Create the readable 'BodyType_Name' column
light_vehicles_df['BodyType_Name'] = light_vehicles_df['BODY_TYP'].map(body_type_map)
light_vehicles_df.dropna(subset=['BodyType_Name'], inplace=True)

# --- 3. ANALYSIS ---
# Define the correct SUV/CUV codes
suv_cuv_codes = [7, 8]

# Create the boolean 'Is_Crossover' column
light_vehicles_df['Is_Crossover_SUV'] = light_vehicles_df['BODY_TYP'].isin(suv_cuv_codes)

# Perform the final counts on the correctly filtered and prepared DataFrame
crossover_comparison = light_vehicles_df['Is_Crossover_SUV'].value_counts()
crash_counts = light_vehicles_df['BodyType_Name'].value_counts()
fatal_by_body = light_vehicles_df.groupby('BodyType_Name')['FATALS'].sum().sort_values(ascending=False)

print("\n--- Crossover/SUV vs. Other Light Vehicle Crashes ---")
print(crossover_comparison)

# --- 4. VISUALIZATION ---
print("\nGenerating plots...")

# Plot 1: Crossover/SUV vs. Other
plt.figure(figsize=(8, 6))
sns.barplot(x=crossover_comparison.index, y=crossover_comparison.values)
plt.title('Fatal Crashes: Crossovers/SUVs vs. Other Light Vehicles', fontsize=16)
plt.ylabel('Number of Fatal Crashes', fontsize=12)
plt.xticks(ticks=[0, 1], labels=['Other Light Vehicles', 'Crossovers/SUVs'])
plt.tight_layout()
plt.show()

# Plot 2: Total Crashes by Body Type
plt.figure(figsize=(12, 8))
sns.barplot(x=crash_counts.index, y=crash_counts.values)
plt.xlabel('Vehicle Body Type')
plt.ylabel('Number of Fatal Crashes')
plt.title('Fatal Crashes by Light-Duty Vehicle Body Type')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()