<a href="https://colab.research.google.com/github/benson1231/DeepLearning/blob/main/NeuralNetwork.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Regression

In [1]:
from google.colab import drive
drive.mount('/content/drive')
data_path = "drive/MyDrive/MachineLearning/insurance.csv"

Mounted at /content/drive


In [10]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from tensorflow.keras.optimizers import Adam

tf.random.set_seed(35)  # for reproducibility

def design_model(features):
    model = Sequential(name="my_first_model")
    # Input layer
    input = InputLayer(shape=(features.shape[1],))
    model.add(input)
    # Hidden layer with 128 neurons
    model.add(Dense(64, activation='relu'))
    # Output layer
    model.add(Dense(1))
    # Compile model
    opt = Adam(learning_rate=0.01)
    model.compile(loss='mse', metrics=['mae'], optimizer=opt)
    return model

# 讀取資料
dataset = pd.read_csv(data_path)

# 特徵與目標變數
features = dataset.iloc[:, 0:6]  # 前6列為特徵
labels = dataset.iloc[:, -1]  # 最後一列為目標變數

# One-hot 編碼處理類別型變數
features = pd.get_dummies(features)

# 資料分割為訓練集和測試集
features_train, features_test, labels_train, labels_test = train_test_split(
    features, labels, test_size=0.3, random_state=42)

# 標準化處理
ct = ColumnTransformer([('standardize', StandardScaler(), ['age', 'bmi', 'children'])],
                       remainder='passthrough')
features_train = ct.fit_transform(features_train)
features_test = ct.transform(features_test)

# 確保數據轉換為數值格式
features_train = np.array(features_train, dtype=np.float32)
features_test = np.array(features_test, dtype=np.float32)
labels_train = np.array(labels_train, dtype=np.float32)
labels_test = np.array(labels_test, dtype=np.float32)

# 設計模型
model = design_model(features_train)
print(model.summary())

# 訓練模型
model.fit(features_train, labels_train, epochs=20, batch_size=1, verbose=1)

# 評估模型
val_mse, val_mae = model.evaluate(features_test, labels_test, verbose=0)

print("MAE: ", val_mae)

None
Epoch 1/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 275895488.0000 - mae: 12086.6592
Epoch 2/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 113062528.0000 - mae: 6810.2163
Epoch 3/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 81154560.0000 - mae: 6790.6309
Epoch 4/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 67589128.0000 - mae: 6303.7827
Epoch 5/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 56761092.0000 - mae: 5762.6953
Epoch 6/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - loss: 48590284.0000 - mae: 5378.2607
Epoch 7/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 43150412.0000 - mae: 5048.9722
Epoch 8/20
[1m936/936[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 40074000.0000 - mae: 47

# Classification

In [12]:
import pandas as pd
from collections import Counter
from sklearn.preprocessing import LabelEncoder
import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import InputLayer, Dense
from sklearn.metrics import classification_report
import numpy as np

# 載入訓練與測試資料集
train_data = pd.read_csv("drive/MyDrive/MachineLearning/air_quality_train.csv")
test_data = pd.read_csv("drive/MyDrive/MachineLearning/air_quality_test.csv")

# 查看訓練資料的欄位與類型資訊
print(train_data.info())

# 檢查目標變數 'Air_Quality' 的類別分佈
print("Class distribution in training data:", Counter(train_data["Air_Quality"]))

# 從訓練資料中提取特徵與標籤
# 假設特徵包括以下污染物指標和 AQI（空氣品質指數）
x_train = train_data[['PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI']]
y_train = train_data["Air_Quality"]

# 從測試資料中提取特徵與標籤
x_test = test_data[['PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3', 'Benzene', 'Toluene', 'Xylene', 'AQI']]
y_test = test_data["Air_Quality"]

# 將類別標籤（如 'Good', 'Moderate', 'Unhealthy'）編碼為整數
le = LabelEncoder()
y_train = le.fit_transform(y_train.astype(str))
y_test = le.transform(y_test.astype(str))

# 將整數標籤轉換為 One-Hot 編碼（多分類模型所需）
y_train = tensorflow.keras.utils.to_categorical(y_train)
y_test = tensorflow.keras.utils.to_categorical(y_test)


# 設計神經網路模型
model = Sequential()
# 輸入層：輸入形狀為特徵數量（13個特徵）
model.add(InputLayer(shape=(x_train.shape[1],)))
# 隱藏層：10個神經元，ReLU 激活函數
model.add(Dense(10, activation='relu'))
# 輸出層：6個神經元（對應6個類別），Softmax 激活函數
model.add(Dense(6, activation='softmax'))

# 編譯模型，使用分類交叉熵作為損失函數，Adam 優化器，並評估準確率
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 訓練模型：使用訓練資料進行學習
model.fit(x_train, y_train, epochs=30, batch_size=16, verbose=0)

# 使用測試資料進行評估
y_pred = model.predict(x_test)
# 將模型預測的 One-Hot 編碼轉換回類別索引
y_pred = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

# 顯示分類結果報告，包括精確度、召回率、F1分數
print(classification_report(y_true, y_pred, target_names=le.classes_))


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7782 entries, 0 to 7781
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PM2.5        7782 non-null   float64
 1   PM10         7782 non-null   float64
 2   NO           7782 non-null   float64
 3   NO2          7782 non-null   float64
 4   NOx          7782 non-null   float64
 5   NH3          7782 non-null   float64
 6   CO           7782 non-null   float64
 7   SO2          7782 non-null   float64
 8   O3           7782 non-null   float64
 9   Benzene      7782 non-null   float64
 10  Toluene      7782 non-null   float64
 11  Xylene       7782 non-null   float64
 12  AQI          7782 non-null   float64
 13  Air_Quality  7782 non-null   object 
dtypes: float64(13), object(1)
memory usage: 851.3+ KB
None
Class distribution in training data: Counter({'Very Poor': 1297, 'Poor': 1297, 'Moderate': 1297, 'Satisfactory': 1297, 'Severe': 1297, 'Good': 1297})




[1m44/44[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
              precision    recall  f1-score   support

        Good       0.68      0.97      0.80       100
    Moderate       0.90      0.50      0.64       508
        Poor       0.45      0.77      0.57       172
Satisfactory       0.76      0.85      0.80       452
      Severe       0.48      0.84      0.61        37
   Very Poor       0.67      0.57      0.61       125

    accuracy                           0.70      1394
   macro avg       0.66      0.75      0.67      1394
weighted avg       0.75      0.70      0.69      1394

