In [1]:
import pandas as pd
import scipy as sp
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn import svm
import six
import sys
sys.modules['sklearn.externals.six'] = six
import mlrose
import matplotlib as plt

In [2]:
from sklearn.datasets import load_breast_cancer

#Preparing data
breast_cancer = load_breast_cancer()
data_play_tennis  = pd.read_csv('play_tennis.csv')

#convert breast cancer data to pandas dataframe
data_breast_cancer = pd.DataFrame(breast_cancer['data'], columns=breast_cancer['feature_names'])
data_breast_cancer['target'] = breast_cancer['target']

In [3]:
#Breast Cancer Data Head
data_breast_cancer.head()


Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [4]:
# Play Tennis Data Head
from sklearn.preprocessing import LabelEncoder
Le = LabelEncoder()

data_play_tennis = data_play_tennis.drop(columns=['day'])
data_play_tennis['outlook'] = Le.fit_transform(data_play_tennis['outlook'])
data_play_tennis['temp'] = Le.fit_transform(data_play_tennis['temp'])
data_play_tennis['humidity'] = Le.fit_transform(data_play_tennis['humidity'])
data_play_tennis['wind'] = Le.fit_transform(data_play_tennis['wind'])
data_play_tennis['play'] = Le.fit_transform(data_play_tennis['play'])
data_play_tennis.head()

Unnamed: 0,outlook,temp,humidity,wind,play
0,2,1,0,1,0
1,2,1,0,0,0
2,0,1,0,1,1
3,1,2,0,1,1
4,1,0,1,1,1


In [5]:
#Split breast cancer to training and testing data
y1 = data_breast_cancer.target
x1 = data_breast_cancer.drop('target', axis = 1)

x1_train,x1_test,y1_train,y1_test=train_test_split(x1,y1,test_size=0.2)

print("shape of original dataset :", data_breast_cancer.shape)
print("shape of input - training set", x1_train.shape)
print("shape of output - training set", y1_train.shape)
print("shape of input - testing set", x1_test.shape)
print("shape of output - testing set", y1_test.shape)

shape of original dataset : (569, 31)
shape of input - training set (455, 30)
shape of output - training set (455,)
shape of input - testing set (114, 30)
shape of output - testing set (114,)


In [6]:
#Split play tennis to training and testing data
y2 = data_play_tennis.play
x2 = data_play_tennis.drop('play', axis = 1)

x2_train,x2_test,y2_train,y2_test=train_test_split(x2,y2,test_size=0.2)

print("shape of original dataset :", data_play_tennis.shape)
print("shape of input - training set", x2_train.shape)
print("shape of output - training set", y2_train.shape)
print("shape of input - testing set", x2_test.shape)
print("shape of output - testing set", y2_test.shape)

shape of original dataset : (14, 5)
shape of input - training set (11, 4)
shape of output - training set (11,)
shape of input - testing set (3, 4)
shape of output - testing set (3,)


In [7]:
# Decision Tree Classifier - Breast Cancer
from sklearn import tree
from sklearn import metrics
from sklearn.tree import DecisionTreeClassifier, export_text

clf = tree.DecisionTreeClassifier()
clf = clf.fit(x1_train, y1_train)
y_pred_1 = clf.predict(x1_test)
print("Accuracy:",metrics.accuracy_score(y1_test, y_pred_1))
print("F1:",metrics.f1_score(y1_test, y_pred_1))

#show graph
r = export_text(clf, feature_names=x1.columns.values.tolist())
print(r)

Accuracy: 0.9473684210526315
F1: 0.9594594594594594
|--- mean concave points <= 0.05
|   |--- worst radius <= 16.83
|   |   |--- area error <= 48.70
|   |   |   |--- worst smoothness <= 0.18
|   |   |   |   |--- smoothness error <= 0.00
|   |   |   |   |   |--- worst texture <= 27.76
|   |   |   |   |   |   |--- class: 1
|   |   |   |   |   |--- worst texture >  27.76
|   |   |   |   |   |   |--- class: 0
|   |   |   |   |--- smoothness error >  0.00
|   |   |   |   |   |--- class: 1
|   |   |   |--- worst smoothness >  0.18
|   |   |   |   |--- smoothness error <= 0.01
|   |   |   |   |   |--- class: 0
|   |   |   |   |--- smoothness error >  0.01
|   |   |   |   |   |--- class: 1
|   |   |--- area error >  48.70
|   |   |   |--- class: 0
|   |--- worst radius >  16.83
|   |   |--- worst texture <= 26.01
|   |   |   |--- class: 1
|   |   |--- worst texture >  26.01
|   |   |   |--- class: 0
|--- mean concave points >  0.05
|   |--- worst texture <= 20.70
|   |   |--- worst perimeter <

In [8]:
#Decision Tree Classifier - Breast Cancer

clf = tree.DecisionTreeClassifier()
clf = clf.fit(x2_train, y2_train)
y_pred_2 = clf.predict(x2_test)
print("Accuracy:",metrics.accuracy_score(y2_test, y_pred_2))
print("F1:",metrics.f1_score(y2_test, y_pred_2))

#show graph
r = export_text(clf, feature_names=x2.columns.values.tolist())
print(r)

Accuracy: 0.6666666666666666
F1: 0.6666666666666666
|--- humidity <= 0.50
|   |--- outlook <= 1.50
|   |   |--- wind <= 0.50
|   |   |   |--- outlook <= 0.50
|   |   |   |   |--- class: 1
|   |   |   |--- outlook >  0.50
|   |   |   |   |--- class: 0
|   |   |--- wind >  0.50
|   |   |   |--- class: 1
|   |--- outlook >  1.50
|   |   |--- class: 0
|--- humidity >  0.50
|   |--- class: 1



In [9]:
#Decision tree Estimator - Breast Cancer
from id3 import Id3Estimator, export_text

clf = Id3Estimator()
clf.fit(x1_train, y1_train, check_input=True)
y_pred_1 = clf.predict(x1_test)

print("Accuracy:",metrics.accuracy_score(y1_test, y_pred_1))
print("F1:",metrics.f1_score(y1_test, y_pred_1))

#show graph

print(export_text(clf.tree_, x1.columns.values.tolist()))

Accuracy: 0.9473684210526315
F1: 0.9589041095890412

mean concave points <=0.05
|   worst radius <=16.83
|   |   area error <=48.70
|   |   |   worst area <=766.45: 1 (233) 
|   |   |   worst area >766.45
|   |   |   |   mean radius <=13.56
|   |   |   |   |   mean texture <=18.17: 1 (1) 
|   |   |   |   |   mean texture >18.17: 0 (2) 
|   |   |   |   mean radius >13.56: 1 (19) 
|   |   area error >48.70: 0 (2) 
|   worst radius >16.83
|   |   mean texture <=18.68: 1 (7) 
|   |   mean texture >18.68: 0 (10) 
mean concave points >0.05
|   worst perimeter <=114.45
|   |   worst texture <=23.74
|   |   |   worst area <=810.80: 1 (16) 
|   |   |   worst area >810.80
|   |   |   |   mean radius <=14.50: 0 (3) 
|   |   |   |   mean radius >14.50: 1 (4) 
|   |   worst texture >23.74
|   |   |   worst symmetry <=0.27
|   |   |   |   mean texture <=21.49: 1 (3) 
|   |   |   |   mean texture >21.49: 0 (2) 
|   |   |   worst symmetry >0.27: 0 (23) 
|   worst perimeter >114.45: 0 (130) 



In [10]:
#Decision tree Estimator - Play Tennis
from id3 import Id3Estimator

clf = Id3Estimator()
clf.fit(x2_train, y2_train, check_input=True)
y_pred_2 = clf.predict(x2_test)

print("Accuracy:",metrics.accuracy_score(y2_test, y_pred_2))
print("F1:",metrics.f1_score(y2_test, y_pred_2))

#show graph

print(export_text(clf.tree_, x2.columns.values.tolist()))

Accuracy: 0.6666666666666666
F1: 0.6666666666666666

humidity <=0.50
|   outlook <=1.50
|   |   wind <=0.50: 0 (1/1) 
|   |   wind >0.50: 1 (1) 
|   outlook >1.50: 0 (2) 
humidity >0.50: 1 (6) 



In [11]:
#Network Algorithm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.neural_network import MLPClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split


clf = MLPClassifier(random_state=1,max_iter=1000).fit(x2_train,y2_train)
clf.predict_proba(x2_train)
npredict = clf.predict(x2_test)
print("Accuracy:",metrics.accuracy_score(y2_test, npredict))
print("F1:",metrics.f1_score(y2_test, npredict))

Accuracy: 0.6666666666666666
F1: 0.6666666666666666


In [12]:
# Logistixc Regression Breast Cancer
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=10000).fit(x1_train, y1_train)
y_pred_1 = clf.predict(x1_test)
print("Accuracy:",metrics.accuracy_score(y1_test, y_pred_1))
print("F1:",metrics.f1_score(y1_test, y_pred_1))

Accuracy: 0.9385964912280702
F1: 0.9523809523809523


In [13]:
# Logistixc Regression Play Tennis

clf = LogisticRegression(random_state=0, solver='lbfgs', max_iter=3000).fit(x2_train, y2_train)
y_pred_2 = clf.predict(x2_test)
print("Accuracy:",metrics.accuracy_score(y2_test, y_pred_2))
print("F1:",metrics.f1_score(y2_test, y_pred_2))

Accuracy: 0.3333333333333333
F1: 0.5


In [14]:
#K Means
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

kmeans = KMeans(2)
kmeans.fit_predict(x2_train,y2_train)
kpredict = kmeans.predict(x2_test)
print("Accuracy:",metrics.accuracy_score(y2_test, kpredict))
print("F1:",metrics.f1_score(y2_test, kpredict))


Accuracy: 0.3333333333333333
F1: 0.0


In [15]:
#SVC
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

clf = make_pipeline(StandardScaler(), SVC(gamma='auto'))
clf.fit(x2_train,y2_train)
spredict = clf.predict(x2_test)
print("Accuracy:",metrics.accuracy_score(y2_test, spredict))
print("F1:",metrics.f1_score(y2_test, kpredict))


Accuracy: 0.3333333333333333
F1: 0.0
