# Fill Missing Values using MICE Imputation

In [1]:
import os
import pandas as pd
from sklearn.experimental import enable_iterative_imputer  # noqa
from sklearn.impute import IterativeImputer

# Define input and output folders
input_folder = "Input_Folder_for_GNN"
output_folder = "Input_Folder_without_Nan_MICE"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Initialize the IterativeImputer
imputer = IterativeImputer(max_iter=20, random_state=42)

# Loop through each file in the input folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".csv"):
        file_path = os.path.join(input_folder, file_name)
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Convert 'From Date' to datetime format if not already
        df['From Date'] = pd.to_datetime(df['From Date'])
        
        # Separate the 'From Date' column and the air quality parameters
        from_date = df['From Date']
        data = df.drop(columns=['From Date'])
        
        # Apply MICE (Iterative Imputer)
        imputed_data = imputer.fit_transform(data)
        
        # Convert the imputed data back to a DataFrame
        imputed_df = pd.DataFrame(imputed_data, columns=data.columns)
        
        # Add back the 'From Date' column
        imputed_df.insert(0, 'From Date', from_date)

        # Show number of null values in the imputed DataFrame
        print(f"Number of null values in {file_name} after MICE imputation:")
        print(imputed_df.isnull().sum())
        
        # Save the imputed file to the output folder
        output_path = os.path.join(output_folder, file_name)
        imputed_df.to_csv(output_path, index=False)

print("MICE imputation completed. Files saved to 'Input_Folder_without_Nan_MICE'.")


Number of null values in UP002.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP003.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP004.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64




Number of null values in UP008.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP009.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP012.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64




Number of null values in UP014.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64




Number of null values in UP016.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP017.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64




Number of null values in UP018.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP019.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP020.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP021.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (u



Number of null values in UP022.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64




Number of null values in UP023.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP024.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP025.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (ug/m3)     0
NO (ug/m3)       0
NO2 (ug/m3)      0
SO2 (ug/m3)      0
CO (mg/m3)       0
Ozone (ug/m3)    0
RH (%)           0
WS (m/s)         0
WD (degree)      0
AT (degree C)    0
dtype: int64
Number of null values in UP026.csv after MICE imputation:
From Date        0
PM2.5 (ug/m3)    0
PM10 (u