In [1]:
import pandas as pd

# 讀取 CSV 檔案
data = pd.read_csv('filled_time_data.csv')

In [2]:
# 顯示資料框的前幾行
data.head()

Unnamed: 0,siteid,aqi,pollutant,status,so2,o3,o3_8hr,no2,windspeed,winddirec,datacreationdate,co_8hr,pm2.5_avg,pm10_avg,longitude,latitude,year
0,25.0,49.0,,良好,0.5,9.7,26.0,17.4,0.3,291.0,2022-01-01 00:00:00,0.3,15.0,22.0,120.898693,24.696907,2022
1,25.0,72.0,細懸浮微粒,普通,0.7,9.8,23.0,19.8,0.3,189.0,2022-01-01 01:00:00,0.3,24.0,32.0,120.898693,24.696907,2022
2,25.0,92.0,細懸浮微粒,普通,4.0,3.0,19.0,22.9,0.9,201.0,2022-01-01 02:00:00,0.3,32.0,41.0,120.898693,24.696907,2022
3,25.0,91.0,細懸浮微粒,普通,2.7,12.1,16.0,12.5,0.7,122.0,2022-01-01 03:00:00,0.3,32.0,41.0,120.898693,24.696907,2022
4,25.0,90.0,細懸浮微粒,普通,2.0,12.7,14.0,9.0,0.7,262.0,2022-01-01 04:00:00,0.3,31.0,41.0,120.898693,24.696907,2022


In [3]:
# 檢查指定欄位的 NaN 值
columns_to_check = ['so2', 'o3', 'o3_8hr', 'no2', 'windspeed', 'winddirec', 'co_8hr', 'pm2.5_avg', 'pm10_avg']
nan_counts = data[columns_to_check].isna().sum()

print("各欄位中 NaN 值的數量:")
print(nan_counts)

各欄位中 NaN 值的數量:
so2          0
o3           0
o3_8hr       0
no2          0
windspeed    0
winddirec    0
co_8hr       0
pm2.5_avg    0
pm10_avg     0
dtype: int64


In [4]:
# 定義一個函數來轉換濃度值為 AQI
def calculate_aqi(concentration, breakpoints):
    for bp in breakpoints:
        if bp['low'] <= concentration < bp['high']:
            aqi = ((bp['I_high'] - bp['I_low']) / (bp['high'] - bp['low'])) * (concentration - bp['low']) + bp['I_low']
            return round(aqi)
    print("Err",concentration,breakpoints)
    return None

# 定義各個污染物的 AQI 區間
breakpoints = {
    'so2': [
        {'low': 0, 'high': 21, 'I_low': 0, 'I_high': 50},
        {'low': 21, 'high': 76, 'I_low': 51, 'I_high': 100},
        {'low': 76, 'high': 186, 'I_low': 101, 'I_high': 150},
        {'low': 186, 'high': 305, 'I_low': 151, 'I_high': 200},
        {'low': 305, 'high': 605, 'I_low': 201, 'I_high': 300},
        {'low': 605, 'high': 805, 'I_low': 301, 'I_high': 400},
        {'low': 805, 'high': 1005, 'I_low': 401, 'I_high': 500}
    ],
    'o3': [
        {'low': 00, 'high': 55, 'I_low': 0, 'I_high': 50},
        {'low': 55, 'high': 71, 'I_low': 51, 'I_high': 100},
        {'low': 71, 'high': 86, 'I_low': 101, 'I_high': 150},
        {'low': 86, 'high': 106, 'I_low': 151, 'I_high': 200},
        {'low': 106, 'high': 200, 'I_low': 201, 'I_high': 300}
    ],
    'o3_8hr': [
        {'low': 0, 'high': 125, 'I_low': 0, 'I_high': 100},
        {'low': 125, 'high': 165, 'I_low': 101, 'I_high': 150},
        {'low': 165, 'high': 205, 'I_low': 151, 'I_high': 200},
        {'low': 205, 'high': 405, 'I_low': 201, 'I_high': 300},
        {'low': 405, 'high': 505, 'I_low': 301, 'I_high': 400},
        {'low': 505, 'high': 605, 'I_low': 401, 'I_high': 500}
    ],
    'pm2.5_avg': [
        {'low': 0.0, 'high': 15.5, 'I_low': 0, 'I_high': 50},
        {'low': 15.5, 'high': 35.5, 'I_low': 51, 'I_high': 100},
        {'low': 35.5, 'high': 54.45, 'I_low': 101, 'I_high': 150},
        {'low': 54.5, 'high': 150.5, 'I_low': 151, 'I_high': 200},
        {'low': 150.5, 'high': 250.5, 'I_low': 201, 'I_high': 300},
        {'low': 250.5, 'high': 350.5, 'I_low': 301, 'I_high': 400},
        {'low': 350.5, 'high': 500.5, 'I_low': 401, 'I_high': 500}
    ],
    'pm10_avg': [
        {'low': 0, 'high': 51, 'I_low': 0, 'I_high': 50},
        {'low': 51, 'high': 101, 'I_low': 51, 'I_high': 100},
        {'low': 101, 'high': 255, 'I_low': 101, 'I_high': 150},
        {'low': 255, 'high': 355, 'I_low': 151, 'I_high': 200},
        {'low': 355, 'high': 425, 'I_low': 201, 'I_high': 300},
        {'low': 425, 'high': 505, 'I_low': 301, 'I_high': 400},
        {'low': 505, 'high': 605, 'I_low': 401, 'I_high': 500}
    ],
    'co_8hr': [
        {'low': 0, 'high': 4.5, 'I_low': 0, 'I_high': 50},
        {'low': 4.5, 'high': 9.5, 'I_low': 51, 'I_high': 100},
        {'low': 9.5, 'high': 12.5, 'I_low': 101, 'I_high': 150},
        {'low': 12.5, 'high': 15.5, 'I_low': 151, 'I_high': 200},
        {'low': 15.5, 'high': 30.5, 'I_low': 201, 'I_high': 300},
        {'low': 30.5, 'high': 40.5, 'I_low': 301, 'I_high': 400},
        {'low': 40.5, 'high': 50.5, 'I_low': 401, 'I_high': 500}
    ],
    'no2': [
        {'low': 0, 'high': 31, 'I_low': 0, 'I_high': 50},
        {'low': 31, 'high': 101, 'I_low': 51, 'I_high': 100},
        {'low': 101, 'high': 361, 'I_low': 101, 'I_high': 150},
        {'low': 361, 'high': 650, 'I_low': 151, 'I_high': 200},
        {'low': 650, 'high': 1250, 'I_low': 201, 'I_high': 300},
        {'low': 1250, 'high': 1650, 'I_low': 301, 'I_high': 400},
        {'low': 1650, 'high': 2050, 'I_low': 401, 'I_high': 500}
    ]
}

# 將污染物濃度轉換為 AQI
for pollutant in breakpoints.keys():
    data[f'{pollutant}_aqi'] = data[pollutant].apply(lambda x: calculate_aqi(x, breakpoints[pollutant]))

# 查看轉換後的資料
data.head()

Unnamed: 0,siteid,aqi,pollutant,status,so2,o3,o3_8hr,no2,windspeed,winddirec,...,longitude,latitude,year,so2_aqi,o3_aqi,o3_8hr_aqi,pm2.5_avg_aqi,pm10_avg_aqi,co_8hr_aqi,no2_aqi
0,25.0,49.0,,良好,0.5,9.7,26.0,17.4,0.3,291.0,...,120.898693,24.696907,2022,1,9,21,48,22,3,28
1,25.0,72.0,細懸浮微粒,普通,0.7,9.8,23.0,19.8,0.3,189.0,...,120.898693,24.696907,2022,2,9,18,72,31,3,32
2,25.0,92.0,細懸浮微粒,普通,4.0,3.0,19.0,22.9,0.9,201.0,...,120.898693,24.696907,2022,10,3,15,91,40,3,37
3,25.0,91.0,細懸浮微粒,普通,2.7,12.1,16.0,12.5,0.7,122.0,...,120.898693,24.696907,2022,6,11,13,91,40,3,20
4,25.0,90.0,細懸浮微粒,普通,2.0,12.7,14.0,9.0,0.7,262.0,...,120.898693,24.696907,2022,5,12,11,89,40,3,15


In [5]:
# 保存 final_df 到 CSV 文件
data.to_csv('aqi_breakpoints.csv', index=False)