In [8]:
import pandas as pd
import numpy as np

In [9]:
# Define the function to map PM2.5 concentration to category
def pm25_category(conc):
    if conc <= 12:
        return 'Good'
    elif 12.1 <= conc <= 35.4:
        return 'Moderate'
    elif 35.5 <= conc <= 55.4:
        return 'Unhealthy for sensitive'
    elif 55.5 <= conc <= 150.4:
        return 'Unhealthy'
    elif 150.5 <= conc <= 250.4:
        return 'Very unhealthy'
    else:
        return 'Hazardous'

# Define the dataset paths
dataset_paths = [
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2015_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2016_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2017_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2018_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2019_YTD-(10-2019-12-2019).csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2020_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2021_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2022_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2023_YTD.csv',
    '/home/dang032003/Final-Thesis/Hanoi-PM2.5-dataset/Hanoi_PM2.5_2024_YTD_(01-2024-02-2024).csv'
]

# Iterate over each dataset
for dataset_path in dataset_paths:
    # Read the dataset
    df = pd.read_csv(dataset_path)
    
    # Convert 'Date (LT)' column to datetime
    df['Date (LT)'] = pd.to_datetime(df['Date (LT)'])
    
    # Replace -999 with NaN
    df['Raw Conc.'] = df['Raw Conc.'].replace(-999, np.nan)
    
    # Interpolate missing values in 'Raw Conc.' column
    interpolation_method = 'linear'
    df['Raw Conc.'] = df['Raw Conc.'].interpolate(method=interpolation_method)
    
    # Apply the function to create the 'PM2.5 Category' column
    df['PM2.5 Category'] = df['Raw Conc.'].apply(pm25_category)
    
    # Save the processed dataset
    output_path = dataset_path.replace('.csv', '_with_category.csv')
    df.to_csv(output_path, index=False)
    print(f"Processed and saved dataset with PM2.5 category to: {output_path}")

KeyboardInterrupt: 