In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier


# **การเตรียมข้อมูล (Preparing Data)**

In [None]:
df  = pd.read_csv('/content/mushrooms.csv')
# Display the column names of the dataframe again
print(df.columns)
df.head(10)

Index(['class', 'cap-shape', 'cap-surface', 'cap-color', 'bruises', 'odor',
       'gill-attachment', 'gill-spacing', 'gill-size', 'gill-color',
       'stalk-shape', 'stalk-root', 'stalk-surface-above-ring',
       'stalk-surface-below-ring', 'stalk-color-above-ring',
       'stalk-color-below-ring', 'veil-type', 'veil-color', 'ring-number',
       'ring-type', 'spore-print-color', 'population', 'habitat'],
      dtype='object')


Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,p,x,s,n,t,p,f,c,n,k,...,s,w,w,p,w,o,p,k,s,u
1,e,x,s,y,t,a,f,c,b,k,...,s,w,w,p,w,o,p,n,n,g
2,e,b,s,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,n,m
3,p,x,y,w,t,p,f,c,n,n,...,s,w,w,p,w,o,p,k,s,u
4,e,x,s,g,f,n,f,w,b,k,...,s,w,w,p,w,o,e,n,a,g
5,e,x,y,y,t,a,f,c,b,n,...,s,w,w,p,w,o,p,k,n,g
6,e,b,s,w,t,a,f,c,b,g,...,s,w,w,p,w,o,p,k,n,m
7,e,b,y,w,t,l,f,c,b,n,...,s,w,w,p,w,o,p,n,s,m
8,p,x,y,w,t,p,f,c,n,p,...,s,w,w,p,w,o,p,k,v,g
9,e,b,s,y,t,a,f,c,b,g,...,s,w,w,p,w,o,p,k,s,m


* class: edible=e, poisonous=p
* cap-shape: bell=b,conical=c,convex=x,flat=f, knobbed=k,sunken=s  
* cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s  
* cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yellow=y  
* bruises: bruises=t,no=f  
* odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s  
* gill-attachment: attached=a,descending=d,free=f,notched=n  
* gill-spacing: close=c,crowded=w,distant=d  
* gill-size: broad=b,narrow=n  
* gill-color: black=k,brown=n,buff=b,chocolate=h,gray=g,green=r,orange=o,pink=p,purple=u,red=e,white=w,yellow=y  
* stalk-shape: enlarging=e,tapering=t  
* stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?  
* stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s  
* stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s  
* stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y  
* stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,yellow=y  
* veil-type: partial=p,universal=u  
* veil-color: brown=n,orange=o,white=w,yellow=y  
* ring-number: none=n,one=o,two=t  
* ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=z  
* spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w,yellow=y  
* population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y  
* habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d  


In [None]:
df.describe()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
count,8124,8124,8124,8124,8124,8124,8124,8124,8124,8124,...,8124,8124,8124,8124,8124,8124,8124,8124,8124,8124
unique,2,6,4,10,2,9,2,2,2,12,...,4,9,9,1,4,3,5,9,6,7
top,e,x,y,n,f,n,f,c,b,b,...,s,w,w,p,w,o,p,w,v,d
freq,4208,3656,3244,2284,4748,3528,7914,6812,5612,1728,...,4936,4464,4384,8124,7924,7488,3968,2388,4040,3148


In [None]:
df_uniques = pd.DataFrame([[i, len(df[i].unique())] for i in df.columns], columns=['Variable', 'Unique Values']).set_index('Variable')
df_uniques

Unnamed: 0_level_0,Unique Values
Variable,Unnamed: 1_level_1
class,2
cap-shape,6
cap-surface,4
cap-color,10
bruises,2
odor,9
gill-attachment,2
gill-spacing,2
gill-size,2
gill-color,12


In [None]:
for attr in df.columns:
    print('\n', attr)
    print(df[attr].value_counts())


 class
e    4208
p    3916
Name: class, dtype: int64

 cap-shape
x    3656
f    3152
k     828
b     452
s      32
c       4
Name: cap-shape, dtype: int64

 cap-surface
y    3244
s    2556
f    2320
g       4
Name: cap-surface, dtype: int64

 cap-color
n    2284
g    1840
e    1500
y    1072
w    1040
b     168
p     144
c      44
u      16
r      16
Name: cap-color, dtype: int64

 bruises
f    4748
t    3376
Name: bruises, dtype: int64

 odor
n    3528
f    2160
y     576
s     576
a     400
l     400
p     256
c     192
m      36
Name: odor, dtype: int64

 gill-attachment
f    7914
a     210
Name: gill-attachment, dtype: int64

 gill-spacing
c    6812
w    1312
Name: gill-spacing, dtype: int64

 gill-size
b    5612
n    2512
Name: gill-size, dtype: int64

 gill-color
b    1728
p    1492
w    1202
n    1048
g     752
h     732
u     492
k     408
e      96
y      86
o      64
r      24
Name: gill-color, dtype: int64

 stalk-shape
t    4608
e    3516
Name: stalk-shape, dtype: int64

 

In [None]:
for i in df.columns:
  print(i,  df[i].unique())

class ['p' 'e']
cap-shape ['x' 'b' 's' 'f' 'k' 'c']
cap-surface ['s' 'y' 'f' 'g']
cap-color ['n' 'y' 'w' 'g' 'e' 'p' 'b' 'u' 'c' 'r']
bruises ['t' 'f']
odor ['p' 'a' 'l' 'n' 'f' 'c' 'y' 's' 'm']
gill-attachment ['f' 'a']
gill-spacing ['c' 'w']
gill-size ['n' 'b']
gill-color ['k' 'n' 'g' 'p' 'w' 'h' 'u' 'e' 'b' 'r' 'y' 'o']
stalk-shape ['e' 't']
stalk-root ['e' 'c' 'b' 'r' '?']
stalk-surface-above-ring ['s' 'f' 'k' 'y']
stalk-surface-below-ring ['s' 'f' 'y' 'k']
stalk-color-above-ring ['w' 'g' 'p' 'n' 'b' 'e' 'o' 'c' 'y']
stalk-color-below-ring ['w' 'p' 'g' 'b' 'n' 'e' 'y' 'o' 'c']
veil-type ['p']
veil-color ['w' 'n' 'o' 'y']
ring-number ['o' 't' 'n']
ring-type ['p' 'e' 'l' 'f' 'n']
spore-print-color ['k' 'n' 'u' 'h' 'w' 'r' 'o' 'y' 'b']
population ['s' 'n' 'a' 'v' 'y' 'c']
habitat ['u' 'g' 'm' 'd' 'p' 'w' 'l']


## feature extraction

In [None]:
# เพิ่มคอลัมน์ใน DataFrame แสดงค่าก่อนการแปลง
df_before_encoding = df.copy()

# Use LabelEncoder to do the numeric transformation
le = LabelEncoder()
# Iterate over all the values of each column and extract their dtypes
for col in df:
    # Compare if the dtype is object
    if df[col].dtype == 'object':
        # Use LabelEncoder to do the numeric transformation
         df[col] = le.fit_transform(df[col])

# แสดงค่าก่อนและหลังการแปลง
for col in df_before_encoding.columns:
    print(col)
    for before, after in zip(df_before_encoding[col].unique(), df[col].unique()):
        print(before, "->", after)
    print()


class
p -> 1
e -> 0

cap-shape
x -> 5
b -> 0
s -> 4
f -> 2
k -> 3
c -> 1

cap-surface
s -> 2
y -> 3
f -> 0
g -> 1

cap-color
n -> 4
y -> 9
w -> 8
g -> 3
e -> 2
p -> 5
b -> 0
u -> 7
c -> 1
r -> 6

bruises
t -> 1
f -> 0

odor
p -> 6
a -> 0
l -> 3
n -> 5
f -> 2
c -> 1
y -> 8
s -> 7
m -> 4

gill-attachment
f -> 1
a -> 0

gill-spacing
c -> 0
w -> 1

gill-size
n -> 1
b -> 0

gill-color
k -> 4
n -> 5
g -> 2
p -> 7
w -> 10
h -> 3
u -> 9
e -> 1
b -> 0
r -> 8
y -> 11
o -> 6

stalk-shape
e -> 0
t -> 1

stalk-root
e -> 3
c -> 2
b -> 1
r -> 4
? -> 0

stalk-surface-above-ring
s -> 2
f -> 0
k -> 1
y -> 3

stalk-surface-below-ring
s -> 2
f -> 0
y -> 3
k -> 1

stalk-color-above-ring
w -> 7
g -> 3
p -> 6
n -> 4
b -> 0
e -> 2
o -> 5
c -> 1
y -> 8

stalk-color-below-ring
w -> 7
p -> 6
g -> 3
b -> 0
n -> 4
e -> 2
y -> 8
o -> 5
c -> 1

veil-type
p -> 0

veil-color
w -> 2
n -> 0
o -> 1
y -> 3

ring-number
o -> 1
t -> 2
n -> 0

ring-type
p -> 4
e -> 0
l -> 2
f -> 1
n -> 3

spore-print-color
k -> 2
n -> 3
u ->

In [None]:
for i in df.columns:
  print(i,  df[i].unique())

class [1 0]
cap-shape [5 0 4 2 3 1]
cap-surface [2 3 0 1]
cap-color [4 9 8 3 2 5 0 7 1 6]
bruises [1 0]
odor [6 0 3 5 2 1 8 7 4]
gill-attachment [1 0]
gill-spacing [0 1]
gill-size [1 0]
gill-color [ 4  5  2  7 10  3  9  1  0  8 11  6]
stalk-shape [0 1]
stalk-root [3 2 1 4 0]
stalk-surface-above-ring [2 0 1 3]
stalk-surface-below-ring [2 0 3 1]
stalk-color-above-ring [7 3 6 4 0 2 5 1 8]
stalk-color-below-ring [7 6 3 0 4 2 8 5 1]
veil-type [0]
veil-color [2 0 1 3]
ring-number [1 2 0]
ring-type [4 0 2 1 3]
spore-print-color [2 3 6 1 7 5 4 8 0]
population [3 2 0 4 5 1]
habitat [5 1 3 0 4 6 2]


##บันทึกข้อมูลที่แปลงจากตัวอักษรเป็นตัวเลข

In [None]:
df.to_csv('encode_data.csv', index=False)
df.head()

Unnamed: 0,class,cap-shape,cap-surface,cap-color,bruises,odor,gill-attachment,gill-spacing,gill-size,gill-color,...,stalk-surface-below-ring,stalk-color-above-ring,stalk-color-below-ring,veil-type,veil-color,ring-number,ring-type,spore-print-color,population,habitat
0,1,5,2,4,1,6,1,0,1,4,...,2,7,7,0,2,1,4,2,3,5
1,0,5,2,9,1,0,1,0,0,4,...,2,7,7,0,2,1,4,3,2,1
2,0,0,2,8,1,3,1,0,0,5,...,2,7,7,0,2,1,4,3,2,3
3,1,5,3,8,1,6,1,0,1,5,...,2,7,7,0,2,1,4,2,3,5
4,0,5,2,3,0,5,1,1,0,4,...,2,7,7,0,2,1,0,3,0,1


In [None]:
#เอาคอลั่ม veil-type ออกเพราะ มีแค่ค่า 0 ค่าเดียว
df.drop('veil-type', axis=1, inplace=True)

In [None]:
# แบ่งข้อมูลเป็น features (X) และ target (y)
X = df.drop('class', axis=1)
y = df['class']

##บันทึกข้อมูลสำหรับ train และ test

In [None]:
# บันทึก DataFrame ลงในไฟล์ CSV
df.to_csv('mushroom_data_processed.csv', index=False)

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
# แบ่งข้อมูลเป็น Train และ Test Set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# **การเลือกโมเดล (Model Selection)**

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score


##การทดลองสร้างโมเดล

In [None]:
# Decision Tree
best_score = 0
best_depth = None

# ทดลองค่าต่างๆ ของพารามิเตอร์ max_depth
for depth in range(1, 11):  # ทดลองค่า max_depth ตั้งแต่ 1 ถึง 10
    dt_model = DecisionTreeClassifier(max_depth=depth)
    scores = cross_val_score(dt_model, X, y, cv=5)
    avg_score = scores.mean()
    if avg_score > best_score:
        best_score = avg_score
        best_depth = depth

print("Best Average Score:", best_score)
print("Best max_depth:", best_depth)

dt_model = DecisionTreeClassifier(max_depth=best_depth)
dt_model.fit(X_train, y_train)
dt_y_pred = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_y_pred)
print("Decision Tree Accuracy Score:", dt_accuracy)

# Support Vector Machine (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_y_pred)
print("SVM Accuracy Score:", svm_accuracy)

# K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_y_pred = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_y_pred)
print("KNN Accuracy Score:", knn_accuracy)

# Artificial Neural Network (ANN)
ann_model = MLPClassifier()
ann_model.fit(X_train, y_train)
ann_y_pred = ann_model.predict(X_test)
ann_accuracy = accuracy_score(y_test, ann_y_pred)
print("ANN Accuracy Score:", ann_accuracy)


Best Average Score: 0.9540862447896931
Best max_depth: 6
Decision Tree Accuracy Score: 0.9993846153846154
SVM Accuracy Score: 0.9938461538461538
KNN Accuracy Score: 0.9963076923076923
ANN Accuracy Score: 1.0


##cross validation score

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score


# Decision Tree
dt_model = DecisionTreeClassifier(max_depth=best_depth)
dt_model.fit(X_train, y_train)
dt_scores = cross_val_score(dt_model, X_test, y_test, cv=5)
dt_avg_accuracy = dt_scores.mean()
print("Decision Tree Cross-Validation Scores:", dt_scores)
print("Decision Tree Average Accuracy:", dt_avg_accuracy)

# Support Vector Machine (SVM)
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_scores = cross_val_score(svm_model, X_test, y_test, cv=5)
svm_avg_accuracy = svm_scores.mean()
print("SVM Cross-Validation Scores:", svm_scores)
print("SVM Average Accuracy:", svm_avg_accuracy)

# K-Nearest Neighbors (KNN)
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_scores = cross_val_score(knn_model, X_test, y_test, cv=5)
knn_avg_accuracy = knn_scores.mean()
print("KNN Cross-Validation Scores:", knn_scores)
print("KNN Average Accuracy:", knn_avg_accuracy)

# Artificial Neural Network (ANN)
ann_model = MLPClassifier()
ann_model.fit(X_train, y_train)
ann_scores = cross_val_score(ann_model, X_test, y_test, cv=5)
ann_avg_accuracy = ann_scores.mean()
print("ANN Cross-Validation Scores:", ann_scores)
print("ANN Average Accuracy:", ann_avg_accuracy)


Decision Tree Cross-Validation Scores: [1.         0.97846154 0.98769231 0.98153846 0.99384615]
Decision Tree Average Accuracy: 0.9883076923076922
SVM Cross-Validation Scores: [0.96307692 0.98769231 0.94769231 0.94769231 0.95384615]
SVM Average Accuracy: 0.96
KNN Cross-Validation Scores: [0.98461538 0.98769231 0.99076923 0.99076923 0.98461538]
KNN Average Accuracy: 0.9876923076923078




ANN Cross-Validation Scores: [1.         1.         0.99384615 0.99692308 1.        ]
ANN Average Accuracy: 0.9981538461538463




# **การประเมินโมเดล (Model Evaluation)**

##ค่า confusion matrix

In [None]:
from sklearn.metrics import confusion_matrix

# สร้างโมเดล Decision Tree
dt_model = DecisionTreeClassifier(max_depth=best_depth)
dt_model.fit(X_train, y_train)
dt_y_pred = dt_model.predict(X_test)

# สร้างโมเดล SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)

# สร้างโมเดล KNN
knn_model = KNeighborsClassifier()
knn_model.fit(X_train, y_train)
knn_y_pred = knn_model.predict(X_test)

# สร้างโมเดล ANN
ann_model = MLPClassifier()
ann_model.fit(X_train, y_train)
ann_y_pred = ann_model.predict(X_test)

# คำนวณ Confusion Matrix สำหรับทุกโมเดล
dt_cm = confusion_matrix(y_test, dt_y_pred)
svm_cm = confusion_matrix(y_test, svm_y_pred)
knn_cm = confusion_matrix(y_test, knn_y_pred)
ann_cm = confusion_matrix(y_test, ann_y_pred)

# แสดง Confusion Matrix สำหรับทุกโมเดล
print("Confusion Matrix for Decision Tree:")
print(dt_cm)
print("\nConfusion Matrix for SVM:")
print(svm_cm)
print("\nConfusion Matrix for KNN:")
print(knn_cm)
print("\nConfusion Matrix for ANN:")
print(ann_cm)


Confusion Matrix for Decision Tree:
[[843   0]
 [  1 781]]

Confusion Matrix for SVM:
[[842   1]
 [  9 773]]

Confusion Matrix for KNN:
[[837   6]
 [  0 782]]

Confusion Matrix for ANN:
[[843   0]
 [  0 782]]


## อธิบายและวิเคราะห์ผลการประเมินประสิทธิภาพของโมเดลที่ได้



Decision Tree:

True Positives (TP): 843
False Positives (FP): 0
True Negatives (TN): 781
False Negatives (FN): 1

SVM:

True Positives (TP): 842
False Positives (FP): 1
True Negatives (TN): 773
False Negatives (FN): 9

KNN:

True Positives (TP): 837
False Positives (FP): 6
True Negatives (TN): 782
False Negatives (FN): 0

ANN:

True Positives (TP): 843
False Positives (FP): 0
True Negatives (TN): 782
False Negatives (FN): 0

ANN เป็นโมเดลที่ดีที่สุดเพราะ มีค่า TP สูงถึง 843 และ TN สูงถึง 782 และค่า FP FN เป็น 0

# **การนำโมเดลไปใช้งาน (Deploy Model)**


## บันทึกโมเดลที่ดีกว่า




In [46]:
import pickle

# บันทึกโมเดล ANN ที่ดีที่สุดไปยังไฟล์ชื่อ 'best_ann_model.pkl'
model_file = "best_ann_model.pkl"
with open(model_file, "wb") as file:
    pickle.dump(ann_model, file)

# โหลดโมเดล
loaded_model_file = "best_ann_model.pkl"
with open(loaded_model_file, "rb") as file:
    loaded_model = pickle.load(file)


## อ่านข้อมูลเข้ามา 2 ตัวอย่างและทำนายผล

In [None]:

#x_sample = [5, 2, 9, 1, 0, 1, 0, 0, 4, 0, 2, 2, 2, 7 ,7, 2, 0, 4 ,3 ,2, 2]
x_sample = df.iloc[0].values[1:]  # เรียกใช้ข้อมูลใน index ที่ 2 และเอาค่าทั้งหมดนอกเส้นแรก (เริ่มที่ index 1)
print("x_sample:", x_sample)

# ทำนายผลลัพธ์
prediction = loaded_model.predict([x_sample])

# แปลงผลลัพธ์เป็นข้อความ "กินได้" หรือ "มีพิษ"
if prediction[0] == 0:
    print("Prediction: กินได้")
else:
    print("Prediction: มีพิษ")


x_sample: [5 2 4 1 6 1 0 1 4 0 3 2 2 7 7 2 1 4 2 3 5]
Prediction: มีพิษ




In [None]:

x_sample = df.iloc[4].values[1:]  # เรียกใช้ข้อมูลใน index ที่ 2 และเอาค่าทั้งหมดนอกเส้นแรก (เริ่มที่ index 1)
print("x_sample:", x_sample)

# ทำนายผลลัพธ์
prediction = loaded_model.predict([x_sample])

# แปลงผลลัพธ์เป็นข้อความ "กินได้" หรือ "มีพิษ"
if prediction[0] == 0:
    print("Prediction: กินได้")
else:
    print("Prediction: มีพิษ")


x_sample: [5 2 3 0 5 1 1 0 4 1 3 2 2 7 7 2 1 0 3 0 1]
Prediction: กินได้


