In [1]:
import pandas as pd
import numpy as np

# 讀取 CSV 檔案
data = pd.read_csv('aqi_breakpoints.csv')

In [2]:
# 顯示資料框的前幾行
data.head()

Unnamed: 0,siteid,aqi,pollutant,status,so2,o3,o3_8hr,no2,windspeed,winddirec,...,longitude,latitude,year,so2_aqi,o3_aqi,o3_8hr_aqi,pm2.5_avg_aqi,pm10_avg_aqi,co_8hr_aqi,no2_aqi
0,25.0,49.0,,良好,0.5,9.7,26.0,17.4,0.3,291.0,...,120.898693,24.696907,2022,1,9,21,48,22,3,28
1,25.0,72.0,細懸浮微粒,普通,0.7,9.8,23.0,19.8,0.3,189.0,...,120.898693,24.696907,2022,2,9,18,72,31,3,32
2,25.0,92.0,細懸浮微粒,普通,4.0,3.0,19.0,22.9,0.9,201.0,...,120.898693,24.696907,2022,10,3,15,91,40,3,37
3,25.0,91.0,細懸浮微粒,普通,2.7,12.1,16.0,12.5,0.7,122.0,...,120.898693,24.696907,2022,6,11,13,91,40,3,20
4,25.0,90.0,細懸浮微粒,普通,2.0,12.7,14.0,9.0,0.7,262.0,...,120.898693,24.696907,2022,5,12,11,89,40,3,15


In [3]:
so2_max = data['so2_aqi'].max()
so2_max

180

In [4]:
# 轉換欄位
columns_to_check = ['so2_aqi', 'o3_aqi', 'o3_8hr_aqi', 'no2_aqi', 'co_8hr_aqi', 'pm2.5_avg_aqi', 'pm10_avg']


In [5]:
# siteid對應到的矩陣位置
site_positions = {
 25.0: (4, 3),
 26.0: (3, 3),
 27.0: (2, 2),
 28.0: (1, 1),
 29.0: (2, 1),
 30.0: (1, 2),
 31.0: (0, 1),
 32.0: (0, 2),
 33.0: (0, 0),
 34.0: (1, 0),
 35.0: (0, 3),
 36.0: (1, 3),
 37.0: (2, 0),
 38.0: (2, 3),
 41.0: (3, 0),
 69.0: (0, 4),
 72.0: (1, 4),
 83.0: (3, 1)}



In [6]:
import h5py
import numpy as np

In [7]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"1 污染物: so2")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['so2_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('so2_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

1 污染物: so2
開始時間: 2024-06-15 14:57:30.399562
結束時間: 2024-06-15 14:59:55.155456
執行時間: 0:02:24.755894
Time: 2022-01-01 00:00:00
[[ 3.  6.  5.  5.  6.]
 [ 5.  4.  6.  7.  3.]
 [ 3.  5.  3.  5. nan]
 [ 4.  4. nan  4. nan]
 [nan nan nan  1. nan]]


Time: 2022-01-01 01:00:00
[[ 4.  6.  4.  4.  5.]
 [ 5.  3.  5.  6.  4.]
 [ 4.  5.  3.  3. nan]
 [ 4.  4. nan  4. nan]
 [nan nan nan  2. nan]]


Time: 2022-01-01 02:00:00
[[ 5.  6.  3.  3.  3.]
 [ 7.  6.  5.  5.  3.]
 [ 5.  4.  2.  3. nan]
 [ 3.  3. nan  4. nan]
 [nan nan nan 10. nan]]


Time: 2022-01-01 03:00:00
[[ 4.  5.  4.  4.  4.]
 [ 7.  9.  5.  7.  3.]
 [ 4.  4.  2.  4. nan]
 [ 2.  3. nan  3. nan]
 [nan nan nan  6. nan]]


Time: 2022-01-01 04:00:00
[[ 4.  5.  3.  4.  5.]
 [ 6.  6.  4.  7.  3.]
 [ 5.  4.  2.  4. nan]
 [ 2.  4. nan  4. nan]
 [nan nan nan  5. nan]]




In [9]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"2 污染物: o3")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['o3_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('o3_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

2 污染物: o3
開始時間: 2024-06-15 15:00:46.351306
結束時間: 2024-06-15 15:03:12.104637
執行時間: 0:02:25.753331
Time: 2022-01-01 00:00:00
[[20. 15. 20. 24. 12.]
 [23. 11.  7. 15. 13.]
 [16. 20. 25. 25. nan]
 [36. 34. nan  8. nan]
 [nan nan nan  9. nan]]


Time: 2022-01-01 01:00:00
[[14.  8. 15. 21.  8.]
 [21.  9.  6. 12. 13.]
 [11. 15. 19. 18. nan]
 [32. 28. nan  8. nan]
 [nan nan nan  9. nan]]


Time: 2022-01-01 02:00:00
[[16.  8.  6. 20.  5.]
 [15.  6.  6.  7. 12.]
 [ 9. 17. 21. 16. nan]
 [29. 24. nan  2. nan]
 [nan nan nan  3. nan]]


Time: 2022-01-01 03:00:00
[[12. 17. 17. 15.  5.]
 [16.  5.  8.  8. 11.]
 [13. 24. 21. 15. nan]
 [25. 18. nan  9. nan]
 [nan nan nan 11. nan]]


Time: 2022-01-01 04:00:00
[[10. 17. 22. 15.  7.]
 [16.  8.  5.  9.  8.]
 [ 6. 19. 17. 16. nan]
 [18. 12. nan 12. nan]
 [nan nan nan 12. nan]]




In [10]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"3 污染物: o3_8")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['o3_8hr_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('o3_8hr_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

3 污染物: o3_8
開始時間: 2024-06-15 15:03:12.699727
結束時間: 2024-06-15 15:05:39.100739
執行時間: 0:02:26.401012
Time: 2022-01-01 00:00:00
[[21. 18. 21. 26. 19.]
 [24. 17. 17. 17. 25.]
 [20. 22. 23. 24. nan]
 [33. 32. nan 17. nan]
 [nan nan nan 21. nan]]


Time: 2022-01-01 01:00:00
[[18. 16. 19. 24. 16.]
 [23. 14. 14. 14. 21.]
 [17. 20. 22. 22. nan]
 [31. 30. nan 14. nan]
 [nan nan nan 18. nan]]


Time: 2022-01-01 02:00:00
[[18. 14. 17. 23. 13.]
 [22. 11. 11. 12. 18.]
 [14. 18. 20. 20. nan]
 [30. 28. nan 12. nan]
 [nan nan nan 15. nan]]


Time: 2022-01-01 03:00:00
[[16. 14. 16. 22. 10.]
 [20.  9. 10. 10. 14.]
 [13. 18. 19. 19. nan]
 [29. 26. nan 10. nan]
 [nan nan nan 13. nan]]


Time: 2022-01-01 04:00:00
[[15. 13. 16. 20.  8.]
 [19.  8.  9. 10. 12.]
 [11. 18. 18. 18. nan]
 [26. 24. nan  9. nan]
 [nan nan nan 11. nan]]




In [13]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"4 污染物: no2")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['no2_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('no2_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

4 污染物: no2
開始時間: 2024-06-15 15:10:32.715311
結束時間: 2024-06-15 15:12:58.523421
執行時間: 0:02:25.808110
Time: 2022-01-01 00:00:00
[[26. 31. 22. 16. 26.]
 [19. 19. 39. 29. 21.]
 [21. 22.  9. 11. nan]
 [ 4.  7. nan 35. nan]
 [nan nan nan 28. nan]]


Time: 2022-01-01 01:00:00
[[30. 39. 20. 17. 27.]
 [19. 19. 35. 29. 21.]
 [24. 25.  9. 13. nan]
 [ 5.  9. nan 33. nan]
 [nan nan nan 32. nan]]


Time: 2022-01-01 02:00:00
[[30. 43. 43. 17. 29.]
 [29. 33. 34. 33. 20.]
 [28. 27. 10. 18. nan]
 [10. 14. nan 35. nan]
 [nan nan nan 37. nan]]


Time: 2022-01-01 03:00:00
[[28. 29. 25. 18. 30.]
 [24. 33. 35. 29. 20.]
 [23. 18.  8. 18. nan]
 [ 9. 16. nan 24. nan]
 [nan nan nan 20. nan]]


Time: 2022-01-01 04:00:00
[[35. 25. 17. 18. 27.]
 [22. 24. 34. 28. 20.]
 [23. 18. 12. 17. nan]
 [14. 21. nan 18. nan]
 [nan nan nan 15. nan]]




In [11]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"5 污染物: co_8")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['co_8hr_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('co_8hr_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

5 污染物: co_8
開始時間: 2024-06-15 15:05:39.667048
結束時間: 2024-06-15 15:08:05.605009
執行時間: 0:02:25.937961
Time: 2022-01-01 00:00:00
[[ 3.  3.  2.  2.  3.]
 [ 2.  3.  4.  3.  4.]
 [ 3.  2.  2.  2. nan]
 [ 2.  1. nan  3. nan]
 [nan nan nan  3. nan]]


Time: 2022-01-01 01:00:00
[[ 3.  3.  2.  2.  3.]
 [ 2.  3.  4.  3.  4.]
 [ 3.  2.  2.  2. nan]
 [ 2.  1. nan  3. nan]
 [nan nan nan  3. nan]]


Time: 2022-01-01 02:00:00
[[ 3.  3.  2.  2.  3.]
 [ 2.  3.  4.  3.  4.]
 [ 3.  2.  2.  2. nan]
 [ 1.  1. nan  3. nan]
 [nan nan nan  3. nan]]


Time: 2022-01-01 03:00:00
[[ 3.  3.  2.  2.  3.]
 [ 2.  4.  4.  3.  4.]
 [ 3.  2.  2.  2. nan]
 [ 1.  1. nan  3. nan]
 [nan nan nan  3. nan]]


Time: 2022-01-01 04:00:00
[[ 3.  3.  2.  2.  3.]
 [ 2.  3.  4.  3.  4.]
 [ 3.  2.  2.  2. nan]
 [ 1.  2. nan  3. nan]
 [nan nan nan  3. nan]]




In [12]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"6 污染物: pm2.5")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['pm2.5_avg_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('pm2.5_avg_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

6 污染物: pm2.5
開始時間: 2024-06-15 15:08:06.181013
結束時間: 2024-06-15 15:10:32.149697
執行時間: 0:02:25.968684
Time: 2022-01-01 00:00:00
[[48. 60. 52. 45. 89.]
 [48. 55. 55. 60. 74.]
 [72. 52. 42. 55. nan]
 [45. 45. nan 42. nan]
 [nan nan nan 48. nan]]


Time: 2022-01-01 01:00:00
[[52. 79. 57. 45. 91.]
 [48. 62. 60. 62. 79.]
 [72. 55. 39. 52. nan]
 [45. 45. nan 52. nan]
 [nan nan nan 72. nan]]


Time: 2022-01-01 02:00:00
[[52. 89. 67. 45. 91.]
 [48. 67. 67. 62. 79.]
 [69. 57. 42. 52. nan]
 [42. 45. nan 62. nan]
 [nan nan nan 91. nan]]


Time: 2022-01-01 03:00:00
[[55. 87. 67. 45. 91.]
 [52. 72. 72. 67. 79.]
 [72. 55. 39. 55. nan]
 [42. 45. nan 64. nan]
 [nan nan nan 91. nan]]


Time: 2022-01-01 04:00:00
[[55. 84. 64. 45. 89.]
 [55. 72. 74. 67. 84.]
 [72. 52. 35. 55. nan]
 [42. 52. nan 64. nan]
 [nan nan nan 89. nan]]




In [14]:
import numpy as np
from datetime import datetime

# 記錄開始時間
start_time = datetime.now()
print(f"7 污染物: pm10")
print(f"開始時間: {start_time}")

# 初始化結果字典，用於存儲每小時的矩陣
hourly_matrices = {}

# 獲取所有唯一的時間戳
unique_timestamps = data['datacreationdate'].unique()

# 為每個時間戳創建一個矩陣
for timestamp in unique_timestamps:
    # 初始化 10x10 矩陣，填充為 NaN
    matrix = np.full((5, 5), np.nan)
    
    # 篩選當前時間戳的數據
    hourly_data = data[data['datacreationdate'] == timestamp]
    
    # 將濃度數據填入矩陣
    for index, row in hourly_data.iterrows():
        siteid = row['siteid']
        if siteid in site_positions:
            x, y = site_positions[siteid]
            matrix[x, y] = row['pm10_avg_aqi'] 
    
    # 將矩陣存入結果字典
    hourly_matrices[timestamp] = matrix

# 記錄結束時間
end_time = datetime.now()
print(f"結束時間: {end_time}")

# 計算並顯示執行時間
execution_time = end_time - start_time
print(f"執行時間: {execution_time}")

# 檢查前五個結果
for i, (timestamp, matrix) in enumerate(hourly_matrices.items()):
    if i < 5:  # 只打印前五個
        print(f"Time: {timestamp}")
        print(matrix)
        print("\n")
    else:
        break

# 將結果字典匯出為 HDF5 文件
with h5py.File('pm10_avg_hourly_matrices.h5', 'w') as h5f:
    for timestamp_str, matrix in hourly_matrices.items():
        h5f.create_dataset(timestamp_str, data=matrix)

1 污染物: pm10
開始時間: 2024-06-15 15:13:14.336206
結束時間: 2024-06-15 15:15:40.182514
執行時間: 0:02:25.846308
Time: 2022-01-01 00:00:00
[[33. 32. 32. 43. 53.]
 [38. 26. 33. 35. 40.]
 [51. 32. 20. 43. nan]
 [28. 35. nan 25. nan]
 [nan nan nan 22. nan]]


Time: 2022-01-01 01:00:00
[[32. 41. 33. 40. 53.]
 [35. 28. 35. 34. 42.]
 [51. 31. 19. 40. nan]
 [27. 34. nan 26. nan]
 [nan nan nan 31. nan]]


Time: 2022-01-01 02:00:00
[[30. 45. 36. 37. 52.]
 [32. 31. 37. 34. 43.]
 [49. 30. 18. 38. nan]
 [27. 33. nan 28. nan]
 [nan nan nan 40. nan]]


Time: 2022-01-01 03:00:00
[[28. 41. 34. 35. 49.]
 [29. 32. 39. 35. 44.]
 [47. 27. 16. 37. nan]
 [26. 32. nan 28. nan]
 [nan nan nan 40. nan]]


Time: 2022-01-01 04:00:00
[[28. 37. 30. 33. 48.]
 [29. 32. 39. 35. 44.]
 [47. 25. 15. 35. nan]
 [27. 31. nan 27. nan]
 [nan nan nan 40. nan]]


