In [1]:
# Import necessary libraries
import pandas as pd

# Function to load and inspect NEISS main data files
def load_neiss_files(years):
    neiss_data = {}
    
    # Load each NEISS file into a dictionary where the key is the year
    for year in years:
        file_path = f'NEISS_{year}.csv'
        data = pd.read_csv(file_path)
        neiss_data[year] = data
        print(f"Loaded NEISS data for year {year}. Shape: {data.shape}")
    
    return neiss_data

# Function to inspect the structure and summary of the NEISS data
def inspect_neiss_data(neiss_data):
    for year, data in neiss_data.items():
        print(f"### Inspection for NEISS {year} ###")
        # Show basic information about the data
        print(data.info())
        print("\nFirst few rows:\n", data.head())
        print("\nMissing values per column:\n", data.isnull().sum())
        print("="*50)

# Step 1: List of years to process
years = list(range(2014, 2024))  # NEISS files from 2014 to 2023

# Step 2: Load the NEISS files
neiss_data = load_neiss_files(years)

# Step 3: Inspect the NEISS data
inspect_neiss_data(neiss_data)


Loaded NEISS data for year 2014. Shape: (367492, 25)
Loaded NEISS data for year 2015. Shape: (359129, 25)
Loaded NEISS data for year 2016. Shape: (375196, 25)
Loaded NEISS data for year 2017. Shape: (386906, 25)
Loaded NEISS data for year 2018. Shape: (361667, 25)
Loaded NEISS data for year 2019. Shape: (358715, 25)
Loaded NEISS data for year 2020. Shape: (309370, 25)
Loaded NEISS data for year 2021. Shape: (340442, 25)
Loaded NEISS data for year 2022. Shape: (323343, 25)
Loaded NEISS data for year 2023. Shape: (338262, 25)
### Inspection for NEISS 2014 ###
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 367492 entries, 0 to 367491
Data columns (total 25 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   CPSC_Case_Number   367492 non-null  int64  
 1   Treatment_Date     367492 non-null  object 
 2   Age                367492 non-null  int64  
 3   Sex                367492 non-null  int64  
 4   Race               367492 