## Dataset Breast Cancer

### Load Dataset

In [358]:
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()

X = breast_cancer.data
y = breast_cancer.target

### Splitting Dataset

In [359]:
from sklearn.model_selection import train_test_split

X_train_set, X_test_set, y_train_set, y_test_set = train_test_split(X, y, test_size = 0.2)

### Training Dataset
#### a. DecisionTreeClassifier

In [360]:
from sklearn.tree import DecisionTreeClassifier, export_text

model = DecisionTreeClassifier(random_state=0)

model.fit(X_train_set, y_train_set)

DTL_prediction = model.predict(X_test_set)

array_feature_name = []
for i in range(len(breast_cancer["feature_names"])):
    array_feature_name.append(breast_cancer["feature_names"][i])
    
r = export_text(model, feature_names=array_feature_name)
print(r)

|--- worst concave points <= 0.14
|   |--- worst area <= 961.55
|   |   |--- worst perimeter <= 101.95
|   |   |   |--- area error <= 48.98
|   |   |   |   |--- worst texture <= 33.35
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst texture >  33.35
|   |   |   |   |   |--- worst texture <= 33.80
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |   |--- worst texture >  33.80
|   |   |   |   |   |   |--- class: 1
|   |   |   |--- area error >  48.98
|   |   |   |   |--- worst smoothness <= 0.11
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- worst smoothness >  0.11
|   |   |   |   |   |--- class: 0
|   |   |--- worst perimeter >  101.95
|   |   |   |--- worst texture <= 29.18
|   |   |   |   |--- worst smoothness <= 0.14
|   |   |   |   |   |--- concave points error <= 0.02
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- concave points error >  0.02
|   |   |   |   |   |   |--- worst area <= 819.55
|   |   |   |   |   |   |   |--- class: 1
|   | 

#### b. Id3Estimator

In [361]:
import six

from id3 import Id3Estimator

model_id3 = Id3Estimator()
model_id3 = model_id3.fit(X_train_set, y_train_set)

id3_prediction = model_id3.predict(X_test_set)

#### c. K Means

In [362]:
from sklearn.cluster import KMeans

model_kmeans = KMeans(n_clusters=2, random_state=0)

model_kmeans.fit(X_train_set)

kmeans_prediction = model_kmeans.predict(X_test_set)

#### d. LogisticRegression

In [363]:
from sklearn.linear_model import LogisticRegression

model_logistic = LogisticRegression(max_iter=10000)

model_logistic.fit(X_train_set, y_train_set)

logistic_prediction = model_logistic.predict(X_test_set)

#### e. Neural_network

In [364]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

clf = MLPClassifier(max_iter=700, random_state=1).fit(X_train_set, y_train_set)

neural_prediction = clf.predict(X_test_set)

#### f. SVM

In [365]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

model_svm = make_pipeline(StandardScaler(), SVC(gamma='auto'))

model_svm.fit(X_train_set, y_train_set)

svm_prediction = model_svm.predict(X_test_set)

### Accuracy and F1-Score Table

In [366]:
from sklearn.metrics import accuracy_score, f1_score

# DecisionTreeClassifier
DTL_accuracy = accuracy_score(y_test_set, DTL_prediction)
DTL_f1 = f1_score(y_test_set, DTL_prediction)

# Id3Estimator
id3_accuracy = accuracy_score(y_test_set, id3_prediction)
id3_f1 = f1_score(y_test_set, id3_prediction)

# K Means
kmeans_accuracy = accuracy_score(y_test_set, kmeans_prediction)
kmeans_f1 = f1_score(y_test_set, kmeans_prediction)

# LogisticRegression
logistic_accuracy = accuracy_score(y_test_set, logistic_prediction)
logistic_f1 = f1_score(y_test_set, logistic_prediction)

# Neural_network
neural_accuracy = accuracy_score(y_test_set, neural_prediction)
neural_f1 = f1_score(y_test_set, neural_prediction)

# SVM
svm_accuracy = accuracy_score(y_test_set, svm_prediction)
svm_f1 = f1_score(y_test_set, svm_prediction)

accuracyAll = [DTL_accuracy, id3_accuracy, kmeans_accuracy, logistic_accuracy, neural_accuracy, svm_accuracy]
f1All = [DTL_f1, id3_f1, kmeans_f1, logistic_f1, neural_f1, svm_f1]
score_dataAll = {'accuracy': accuracyAll, 'f1': f1All}
allScore = pd.DataFrame(data = score_dataAll, index=['DecisionTreeClassifier', 'Id3Estimator', 'KMeans', 'LogisticRegression', 'Neural_Network', 'SVM'])
allScore

Unnamed: 0,accuracy,f1
DecisionTreeClassifier,0.929825,0.941176
Id3Estimator,0.938596,0.950355
KMeans,0.842105,0.886076
LogisticRegression,0.95614,0.964029
Neural_Network,0.95614,0.964539
SVM,0.964912,0.971014


## Dataset Play-tennis

### Load Dataset

In [158]:
import pandas as pd

dataset_play_tennis = pd.read_csv("PlayTennis.csv")
dataset_play_tennis_data = dataset_play_tennis[{'Outlook', 'Temperature', 'Humidity', 'Wind'}]
dataset_play_tennis_target = dataset_play_tennis[{'Play Tennis'}]

# dataset_play_tennis_target
# dataset_play_tennis_data

### Label Encoder

##### Humidity : 0 = "High", 1 = "Normal"
##### Temperature : 0 = "Cool", 1 = "Hot", 2 = "Mild"
##### Outlook : 0 = "Overcast", 1 = "Rain", 2 = "Sunny"
##### Wind : 0 = "Strong", 1 = "Weak"
##### Play Tennis : 0 = "No", 1 = "Yes"

In [159]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

x_data_encoded = dataset_play_tennis_data.copy()
if dataset_play_tennis_data is not None:
    for column in dataset_play_tennis_data.columns:
        x_data_encoded[column] = le.fit_transform(x_data_encoded[column])

y_target_encoded = dataset_play_tennis_target.copy()
if dataset_play_tennis_target is not None:
    for column in dataset_play_tennis_target.columns:
        y_target_encoded[column] = le.fit_transform(y_target_encoded[column]) 

### Splitting Data

In [160]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x_data_encoded, y_target_encoded, test_size=0.2, random_state=1)

### Training Dataset
#### a. DecisionTreeClassifier

In [272]:
from sklearn.tree import DecisionTreeClassifier

model_pandas = DecisionTreeClassifier(max_depth = 10)

model_pandas.fit(x_train, y_train)

array_feature_name = []
for i in range(len(dataset_play_tennis_data.columns)):
    array_feature_name.append(dataset_play_tennis_data.columns[i])
    
DTL_prediction = model_pandas.predict(x_test)
    
r = export_text(model_pandas, feature_names=array_feature_name)
print(r)

|--- Outlook <= 0.50
|   |--- class: 1
|--- Outlook >  0.50
|   |--- Humidity <= 0.50
|   |   |--- class: 0
|   |--- Humidity >  0.50
|   |   |--- Wind <= 0.50
|   |   |   |--- Outlook <= 1.50
|   |   |   |   |--- class: 0
|   |   |   |--- Outlook >  1.50
|   |   |   |   |--- class: 1
|   |   |--- Wind >  0.50
|   |   |   |--- class: 1



#### b. Id3Estimator

In [271]:
from id3 import Id3Estimator

model_id3_pandas = Id3Estimator()
model_id3_pandas = model_id3_pandas.fit(x_train, y_train.values.ravel())

id3_prediction = model_id3_pandas.predict(x_test)

#### c. K Means

In [270]:
from sklearn.cluster import KMeans

model_kmeans_pandas = KMeans(n_clusters=2, random_state=0)

model_kmeans_pandas.fit(x_train)

kmeans_prediction = model_kmeans_pandas.predict(x_test)

#### d. LogisticRegression

In [273]:
from sklearn.linear_model import LogisticRegression

model_logistic_pandas = LogisticRegression(max_iter=10000)

model_logistic_pandas.fit(x_train, y_train.values.ravel())

logistic_prediction = model_logistic_pandas.predict(x_test)

#### e. Neural_network

In [268]:
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

clf_pandas = MLPClassifier(random_state=1, max_iter=700).fit(x_train, y_train.values.ravel())

neural_prediction = clf_pandas.predict(x_test)

#### f. SVM

In [274]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

model_svm_pandas = make_pipeline(StandardScaler(), SVC(gamma='auto'))

model_svm_pandas.fit(x_train, y_train.values.ravel())

svm_prediction = model_svm_pandas.predict(x_test)

### Accuracy and F1-Score Table

In [276]:
from sklearn.metrics import accuracy_score, f1_score

# DecisionTreeClassifier
DTL_accuracy = accuracy_score(y_test, DTL_prediction)
DTL_f1 = f1_score(y_test, DTL_prediction)

# Id3Estimator
id3_accuracy = accuracy_score(y_test, id3_prediction)
id3_f1 = f1_score(y_test, id3_prediction)

# K Means
kmeans_accuracy = accuracy_score(y_test, kmeans_prediction)
kmeans_f1 = f1_score(y_test, kmeans_prediction)

# LogisticRegression
logistic_accuracy = accuracy_score(y_test, logistic_prediction)
logistic_f1 = f1_score(y_test, logistic_prediction)

# Neural_network
neural_accuracy = accuracy_score(y_test, neural_prediction)
neural_f1 = f1_score(y_test, neural_prediction)

# SVM
svm_accuracy = accuracy_score(y_test, svm_prediction)
svm_f1 = f1_score(y_test, svm_prediction)

accuracyAll = [DTL_accuracy, id3_accuracy, kmeans_accuracy, logistic_accuracy, neural_accuracy, svm_accuracy]
f1All = [DTL_f1, id3_f1, kmeans_f1, logistic_f1, neural_f1, svm_f1]
score_dataAll = {'accuracy': accuracyAll, 'f1': f1All}
allScore = pd.DataFrame(data = score_dataAll, index=['DecisionTreeClassifier', 'Id3Estimator', 'KMeans', 'LogisticRegression', 'Neural_Network', 'SVM'])
allScore

Unnamed: 0,accuracy,f1
DecisionTreeClassifier,0.666667,0.666667
Id3Estimator,0.666667,0.666667
KMeans,1.0,1.0
LogisticRegression,0.666667,0.8
Neural_Network,1.0,1.0
SVM,1.0,1.0
