<a href="https://colab.research.google.com/github/Victor-huang1123/ADC/blob/main/hw.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# 載入資料集
data = pd.read_csv('/content/hypothyroid_modified_cjlin.arff', comment='@', skiprows=29, header=None)
data = data.apply(lambda x: x.replace("'", ""))

# 提取特徵和目標變數
# 提取特徵和目標變數
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
# LabelEncoder 將目標變數轉換為數值型
class_mapping = {'negative': 0, 'compensated_hypothyroid': 1, 'primary_hypothyroid': 2, 'secondary_hypothyroid': 3}
y = np.array([class_mapping[label] for label in y])

# 編碼類別型特徵
categorical_features = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
label_encoders = []
X_encoded_categorical = []

for feature in categorical_features:
    label_encoder = LabelEncoder()
    X_encoded = label_encoder.fit_transform(X[:, feature])
    X_encoded_categorical.append(X_encoded)
    label_encoders.append(label_encoder)

X_encoded_categorical = np.array(X_encoded_categorical).T



In [2]:
# 將數值型特徵進行標準化
numeric_features = [0,17, 18, 19, 20]
X_numeric = X[:, numeric_features].astype(float)
scaler = StandardScaler()
X_numeric = scaler.fit_transform(X_numeric)


# 合併編碼後的類別型特徵和標準化後的數值型特徵
X_encoded = np.concatenate((X_encoded_categorical, X_numeric), axis=1)

# 將資料分成訓練集和測試集
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2, random_state=42)

In [3]:
nb_classifier = GaussianNB()
svm_classifier = SVC()
dt_classifier = DecisionTreeClassifier()
rf_classifier = RandomForestClassifier()

In [4]:
classifiers = [nb_classifier, svm_classifier, dt_classifier, rf_classifier]
for classifier in classifiers:
    classifier.fit(X_train, y_train)
    accuracy = classifier.score(X_test, y_test)
    print(f"{classifier.__class__.__name__} accuracy: {accuracy}")

GaussianNB accuracy: 0.375
SVC accuracy: 0.9735294117647059
DecisionTreeClassifier accuracy: 0.9970588235294118
RandomForestClassifier accuracy: 0.9970588235294118


In [8]:
# 使用Naive Bayes分類器
nb_classifier = GaussianNB()

nb_classifier.fit(X_train, y_train)
nb_predictions = nb_classifier.predict(X_test)
nb_accuracy = accuracy_score(y_test, nb_predictions)
print("Naive Bayes 分類器準確率：", nb_accuracy)

# 使用SVM分類器
svm_classifier = SVC()
svm_classifier.fit(X_train, y_train)
svm_predictions = svm_classifier.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print("SVM 分類器準確率：", svm_accuracy)

# 使用Decision Tree分類器
dt_classifier = DecisionTreeClassifier()
dt_classifier.fit(X_train, y_train)
dt_predictions = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print("Decision Tree 分類器準確率：", dt_accuracy)

# 使用Random Forest分類器
rf_classifier = RandomForestClassifier()
rf_classifier.fit(X_train, y_train)
rf_predictions = rf_classifier.predict(X_test)
rf_accuracy = accuracy_score(y_test, rf_predictions)
print("Random Forest 分類器準確率：", rf_accuracy)


Naive Bayes 分類器準確率： 0.375
SVM 分類器準確率： 0.9735294117647059
Decision Tree 分類器準確率： 0.9970588235294118
Random Forest 分類器準確率： 0.9955882352941177
