In [224]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score

In [225]:
data_cancer = datasets.load_breast_cancer()
data = data_cancer.data
label = data_cancer.target
train_data, test_data, train_label, test_label = train_test_split(data, label, test_size = 0.25, random_state = 0)
sc = StandardScaler()
sc.fit(train_data)
train_data_std = sc.transform(train_data)
test_data_std = sc.transform(test_data)

In [226]:
logistic_model = LogisticRegression()
logistic_model.fit(train_data_std, train_label)
print(logistic_model.score(train_data_std, train_label))
print(logistic_model.score(test_data_std, test_label) == accuracy_score(test_label, logistic_model.predict(test_data_std)))

0.9906103286384976
True


In [227]:
# accuracy_score is only used in classification problems, not used in regression problems
linear_model = LinearRegression()
linear_model.fit(train_data_std, train_label)
print(linear_model.score(train_data_std, train_label))
print(linear_model.score(test_data_std, test_label))

0.7824123695930644
0.729175870611405


In [228]:
from sklearn.svm import SVC
svm = SVC()
svm.fit(train_data_std, train_label)
print(svm.score(train_data_std, train_label))
print(svm.score(test_data_std, test_label) == accuracy_score(test_label, svm.predict(test_data_std)))
# print(np.sum(svm.predict(test_data_std) == test_label)/test_label.shape[0])

0.9859154929577465
True


In [229]:
# decision_tree.score is r2_score, not accuracy_score.
from sklearn.tree import DecisionTreeRegressor
decision_tree = DecisionTreeRegressor()
decision_tree.fit(train_data_std, train_label)
print(decision_tree.score(train_data_std, train_label))
print(decision_tree.score(test_data_std, test_label))
print(r2_score(test_label, decision_tree.predict(test_data_std)))
print(accuracy_score(test_label, decision_tree.predict(test_data_std)))
print(np.sum(decision_tree.predict(test_data_std) == test_label)/test_label.shape[0])

1.0
0.550314465408805
0.550314465408805
0.8951048951048951
0.8951048951048951


In [230]:
# naive_bayes.score is r2_score, not accuracy_score.
from sklearn.naive_bayes import GaussianNB
naive_bayes = DecisionTreeRegressor()
naive_bayes.fit(train_data_std, train_label)
print(naive_bayes.score(train_data_std, train_label))
print(naive_bayes.score(test_data_std, test_label))
print(r2_score(test_label, naive_bayes.predict(test_data_std)))
print(accuracy_score(test_label, naive_bayes.predict(test_data_std),sample_weight = None))
print(np.sum(naive_bayes.predict(test_data_std) == test_label)/test_label.shape[0])

1.0
0.6702306079664571
0.6702306079664571
0.9230769230769231
0.9230769230769231


In [231]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 3)
knn.fit(train_data_std, train_label)
print(knn.score(train_data_std, train_label))
print(knn.score(test_data_std, test_label) == accuracy_score(test_label, knn.predict(test_data_std)))

0.9835680751173709
True
