# Data Understanding
## StationID 
    100t คือ สถานีอุตุนิยมวิทยาจันทบุรี ต.วัดใหม่ อ.เมือง, จันทบุรี
- PM25 PM2.5 คือ ฝุ่นละอองขนาดเล็กไม่เกิน 2.5 ไมครอน
- PM10 ฝุ่นหยาบ ที่มีอนุภาคเส้นผ่านศูนย์กลาง 2.5–10ไมครอน
- O3 ก๊าซโอโซน
- CO โคบอลต์
- NO2 ก๊าซไนโตรเจนไดออกไซด์
- SO2 ซัลเฟอร์ไดออกไซด์
- WS ความเร็วลม
- WD ทิศทางลม
- TEMP อุณหภูมิ
- RH ควมชื้นสัมพัทธ์
- BP ความกดอากาศ
- RAIN ปริมาณน้ำฝน

In [33]:
from math import sqrt
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [2]:
# Load Datasets
df = pd.read_csv('data/PM25_Station100t_with_Prep.csv')
df

Unnamed: 0,DATETIMEDATA,PM25,PM10,O3,CO,NO2,SO2,WS,WD,TEMP,RH,BP,RAIN
0,2023-08-01 00:00:00,11,32,9.0,0.31,7.000000,1.000000,1.4,183,28.3,87,750,0.0
1,2023-08-01 01:00:00,12,35,9.0,0.22,7.410755,1.067401,1.3,186,28.0,88,749,0.0
2,2023-08-01 02:00:00,15,35,10.0,0.19,6.000000,1.000000,1.3,185,28.1,88,749,0.0
3,2023-08-01 03:00:00,15,31,9.0,0.19,5.000000,1.000000,1.1,181,27.8,88,749,0.0
4,2023-08-01 04:00:00,12,33,9.0,0.17,4.000000,1.000000,1.0,192,27.9,88,749,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1901,2023-10-19 05:00:00,21,30,3.0,0.36,5.000000,1.067401,0.2,354,25.0,95,751,0.0
1902,2023-10-19 06:00:00,15,36,3.0,0.38,4.000000,1.067401,0.1,12,25.1,94,752,0.0
1903,2023-10-19 07:00:00,13,31,6.0,0.39,5.000000,1.067401,0.4,9,26.5,86,752,0.0
1904,2023-10-19 08:00:00,23,43,12.0,0.42,7.000000,1.067401,0.4,354,28.6,77,752,0.0


In [3]:
data = df.drop(columns=["DATETIMEDATA"])
data

Unnamed: 0,PM25,PM10,O3,CO,NO2,SO2,WS,WD,TEMP,RH,BP,RAIN
0,11,32,9.0,0.31,7.000000,1.000000,1.4,183,28.3,87,750,0.0
1,12,35,9.0,0.22,7.410755,1.067401,1.3,186,28.0,88,749,0.0
2,15,35,10.0,0.19,6.000000,1.000000,1.3,185,28.1,88,749,0.0
3,15,31,9.0,0.19,5.000000,1.000000,1.1,181,27.8,88,749,0.0
4,12,33,9.0,0.17,4.000000,1.000000,1.0,192,27.9,88,749,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
1901,21,30,3.0,0.36,5.000000,1.067401,0.2,354,25.0,95,751,0.0
1902,15,36,3.0,0.38,4.000000,1.067401,0.1,12,25.1,94,752,0.0
1903,13,31,6.0,0.39,5.000000,1.067401,0.4,9,26.5,86,752,0.0
1904,23,43,12.0,0.42,7.000000,1.067401,0.4,354,28.6,77,752,0.0


In [4]:
X = data.drop(columns=["PM25"])  # คอลัมน์ที่ไม่ใช่ "PM25" เป็นค่าเข้า (features)
y = data["PM25"]  # "PM25" เป็นค่าเป้าหมาย

In [5]:
X

Unnamed: 0,PM10,O3,CO,NO2,SO2,WS,WD,TEMP,RH,BP,RAIN
0,32,9.0,0.31,7.000000,1.000000,1.4,183,28.3,87,750,0.0
1,35,9.0,0.22,7.410755,1.067401,1.3,186,28.0,88,749,0.0
2,35,10.0,0.19,6.000000,1.000000,1.3,185,28.1,88,749,0.0
3,31,9.0,0.19,5.000000,1.000000,1.1,181,27.8,88,749,0.0
4,33,9.0,0.17,4.000000,1.000000,1.0,192,27.9,88,749,0.0
...,...,...,...,...,...,...,...,...,...,...,...
1901,30,3.0,0.36,5.000000,1.067401,0.2,354,25.0,95,751,0.0
1902,36,3.0,0.38,4.000000,1.067401,0.1,12,25.1,94,752,0.0
1903,31,6.0,0.39,5.000000,1.067401,0.4,9,26.5,86,752,0.0
1904,43,12.0,0.42,7.000000,1.067401,0.4,354,28.6,77,752,0.0


In [6]:
y

0       11
1       12
2       15
3       15
4       12
        ..
1901    21
1902    15
1903    13
1904    23
1905    26
Name: PM25, Length: 1906, dtype: int64

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [15]:
X_train_scaled

array([[-0.02768924, -0.34872666,  0.32542837, ...,  0.03579247,
         0.43198176, -0.13826791],
       [-0.75274777, -1.02331518, -0.56503234, ...,  1.25212255,
        -0.1992169 , -0.13826791],
       [-0.89775948, -0.18007953,  0.32542837, ...,  0.31648403,
         0.43198176, -0.13826791],
       ...,
       [ 1.42242781, -1.02331518,  1.01800892, ...,  0.12935632,
        -0.1992169 , -0.13826791],
       [-0.17270095, -0.01319494,  0.91906884, ..., -0.33846294,
         0.43198176, -0.13826791],
       [ 0.26233417,  0.15721473, -0.2682121 , ...,  0.50361173,
         0.43198176, -0.13826791]])

In [16]:
X_test_scaled

array([[ 0.40734587, -1.52925657, -0.16927202, ...,  1.1585587 ,
         0.43198176, -0.13826791],
       [ 1.42242781,  0.32586186, -0.66397242, ..., -1.08697376,
         0.43198176, -0.13826791],
       [-0.31771265,  2.34962742,  0.72118869, ..., -2.30330384,
        -0.83041557, -0.13826791],
       ...,
       [ 0.26233417,  0.66315612, -0.96079266, ..., -0.15133523,
        -1.46161424, -0.13826791],
       [ 0.40734587, -0.51737379,  0.42436845, ..., -0.99340991,
        -0.1992169 , -0.13826791],
       [-0.17270095,  0.15721473,  0.72118869, ..., -0.24489909,
        -2.0928129 , -0.13826791]])

In [17]:
svr = SVR()
svr.fit(X_train_scaled, y_train)

In [18]:
y_pred = svr.predict(X_test_scaled)

In [21]:
# คำนวณค่า RMSE (Root Mean Squared Error) เพื่อประเมินความผิดพลาด
rmse = sqrt(mean_squared_error(y_test, y_pred))
print("Root Mean Squared Error (RMSE):", rmse)

Root Mean Squared Error (RMSE): 3.086883834973105


In [24]:
# คำนวณค่า MAE (Mean Absolute Error) เพื่อประเมินความผิดพลาด
mae = mean_absolute_error(y_test, y_pred)
print("Mean Absolute Error (MAE):", mae)

Mean Absolute Error (MAE): 2.2204278217612985


In [28]:
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), max_iter=1000, random_state=42)
mlp.fit(X_train_scaled, y_train)

In [29]:
y_pred_mlp = mlp.predict(X_test_scaled)

In [31]:
# คำนวณค่า RMSE (Root Mean Squared Error) สำหรับ MLP
rmse_mlp = sqrt(mean_squared_error(y_test, y_pred_mlp))
print("MLP Root Mean Squared Error (RMSE):", rmse_mlp)

MLP Root Mean Squared Error (RMSE): 3.044648987755406


In [32]:
# คำนวณค่า MAE (Mean Absolute Error) สำหรับ MLP
mae_mlp = mean_absolute_error(y_test, y_pred_mlp)
print("MLP Mean Absolute Error (MAE):", mae_mlp)

MLP Mean Absolute Error (MAE): 2.238678432739723


In [34]:
lr = LinearRegression()
lr.fit(X_train_scaled, y_train)

In [35]:
y_pred_lr = lr.predict(X_test_scaled)

In [36]:
# คำนวณค่า RMSE (Root Mean Squared Error) สำหรับ Linear Regression
rmse_lr = sqrt(mean_squared_error(y_test, y_pred_lr))
print("Linear Regression Root Mean Squared Error (RMSE):", rmse_lr)

Linear Regression Root Mean Squared Error (RMSE): 3.139701144881956


In [37]:
# คำนวณค่า MAE (Mean Absolute Error) สำหรับ Linear Regression
mae_lr = mean_absolute_error(y_test, y_pred_lr)
print("Linear Regression Mean Absolute Error (MAE):", mae_lr)

Linear Regression Mean Absolute Error (MAE): 2.460051363520259


# ทดสอบ Model

In [40]:
test_data = {
    "PM10": [32],
    "O3": [9.0],
    "CO": [0.31],
    "NO2": [7.0],
    "SO2": [1.0],
    "WS": [1.4],
    "WD": [183],
    "TEMP": [28.3],
    "RH": [87],
    "BP": [750],
    "RAIN": [0.0]
}

df_test = pd.DataFrame(test_data)
df_test


Unnamed: 0,PM10,O3,CO,NO2,SO2,WS,WD,TEMP,RH,BP,RAIN
0,32,9.0,0.31,7.0,1.0,1.4,183,28.3,87,750,0.0


In [42]:
test = scaler.transform(df_test)
test

array([[ 1.27741611, -0.34872666,  0.32542837, -0.11720574, -0.22727841,
         0.37836458,  0.01584159, -0.00885934,  0.50361173,  0.43198176,
        -0.13826791]])

In [44]:
y_pred_svr = svr.predict(test)
y_pred_svr

array([10.60113865])

In [46]:
y_pred_mlp = mlp.predict(test)
y_pred_mlp

array([9.90509604])

In [47]:
y_pred_lr = lr.predict(test)
y_pred_lr

array([12.53107607])