In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import load_iris

In [8]:
models = {
"knn": KNeighborsClassifier(n_neighbors=1),
"naive_bayes": GaussianNB(),
"logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
"svm": SVC(kernel="rbf", gamma="auto"),
"decision_tree": DecisionTreeClassifier(),
"random_forest": RandomForestClassifier(n_estimators=100),}

In [9]:
# load the Iris dataset and perform a training and testing split
# using 75% of the data for training and 25% for evaluation
print("[INFO] loading data...")
dataset = load_iris()
(trainX, testX, trainY, testY) = train_test_split(dataset.data,
dataset.target, random_state=3, test_size=0.25)

[INFO] loading data...


In [10]:
dataset

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [11]:
# train the modelprint("[INFO] using '{}' model".format(model_name))
model_name="knn"
model = models[model_name]
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating...")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating...
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       0.92      0.92      0.92        12
   virginica       0.91      0.91      0.91        11

    accuracy                           0.95        38
   macro avg       0.94      0.94      0.94        38
weighted avg       0.95      0.95      0.95        38



In [12]:
model_name="knn"
model = models["random_forest"]
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating...")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating...
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        15
  versicolor       1.00      0.92      0.96        12
   virginica       0.92      1.00      0.96        11

    accuracy                           0.97        38
   macro avg       0.97      0.97      0.97        38
weighted avg       0.98      0.97      0.97        38



In [13]:
from sklearn.datasets import load_breast_cancer
load_breast_cancer()

{'data': array([[1.799e+01, 1.038e+01, 1.228e+02, ..., 2.654e-01, 4.601e-01,
         1.189e-01],
        [2.057e+01, 1.777e+01, 1.329e+02, ..., 1.860e-01, 2.750e-01,
         8.902e-02],
        [1.969e+01, 2.125e+01, 1.300e+02, ..., 2.430e-01, 3.613e-01,
         8.758e-02],
        ...,
        [1.660e+01, 2.808e+01, 1.083e+02, ..., 1.418e-01, 2.218e-01,
         7.820e-02],
        [2.060e+01, 2.933e+01, 1.401e+02, ..., 2.650e-01, 4.087e-01,
         1.240e-01],
        [7.760e+00, 2.454e+01, 4.792e+01, ..., 0.000e+00, 2.871e-01,
         7.039e-02]]),
 'target': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0,
        1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0

In [16]:
import pandas as pd
df = pd.read_csv(r'C:\\Users\\asus\\Anaconda3a\\lib\\site-packages\\sklearn\\datasets\\data\\breast_cancer.csv')

FileNotFoundError: [Errno 2] File b'C:\\\\Users\\\\asus\\\\Anaconda3a\\\\lib\\\\site-packages\\\\sklearn\\\\datasets\\\\data\\\\breast_cancer.csv' does not exist: b'C:\\\\Users\\\\asus\\\\Anaconda3a\\\\lib\\\\site-packages\\\\sklearn\\\\datasets\\\\data\\\\breast_cancer.csv'

In [None]:
dataset = load_breast_cancer()
(trainX, testX, trainY, testY) = train_test_split(dataset.data,
dataset.target, random_state=3, test_size=0.25)

In [18]:
model_name="knn"
model = models[model_name]
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating knn")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating knn
              precision    recall  f1-score   support

   malignant       0.90      0.87      0.88        53
      benign       0.92      0.94      0.93        90

   micro avg       0.92      0.92      0.92       143
   macro avg       0.91      0.91      0.91       143
weighted avg       0.92      0.92      0.92       143



In [19]:
#models = {
#"knn": KNeighborsClassifier(n_neighbors=1),
#"naive_bayes": GaussianNB(),
#"logit": LogisticRegression(solver="lbfgs", multi_class="auto"),
#"svm": SVC(kernel="rbf", gamma="auto"),
#"decision_tree": DecisionTreeClassifier(),
#"random_forest": RandomForestClassifier(n_estimators=100),}

In [20]:
model = models['naive_bayes']
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating naive bayes")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating naive bayes
              precision    recall  f1-score   support

   malignant       0.94      0.89      0.91        53
      benign       0.94      0.97      0.95        90

   micro avg       0.94      0.94      0.94       143
   macro avg       0.94      0.93      0.93       143
weighted avg       0.94      0.94      0.94       143



In [21]:
model = models['logit']
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating logit")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating logit
              precision    recall  f1-score   support

   malignant       0.92      0.91      0.91        53
      benign       0.95      0.96      0.95        90

   micro avg       0.94      0.94      0.94       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.94      0.94      0.94       143





In [22]:
model = models['svm']
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating svm")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating svm
              precision    recall  f1-score   support

   malignant       0.00      0.00      0.00        53
      benign       0.63      1.00      0.77        90

   micro avg       0.63      0.63      0.63       143
   macro avg       0.31      0.50      0.39       143
weighted avg       0.40      0.63      0.49       143



  'precision', 'predicted', average, warn_for)


In [23]:
model = models['decision_tree']
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating decision tree")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating decision tree
              precision    recall  f1-score   support

   malignant       0.91      0.92      0.92        53
      benign       0.96      0.94      0.95        90

   micro avg       0.94      0.94      0.94       143
   macro avg       0.93      0.93      0.93       143
weighted avg       0.94      0.94      0.94       143



In [24]:
model = models['random_forest']
model.fit(trainX, trainY)
# make predictions on our data and show a classification report
print("[INFO] evaluating random_forest")
predictions = model.predict(testX)
print(classification_report(testY, predictions,
target_names=dataset.target_names))

[INFO] evaluating random_forest
              precision    recall  f1-score   support

   malignant       0.94      0.91      0.92        53
      benign       0.95      0.97      0.96        90

   micro avg       0.94      0.94      0.94       143
   macro avg       0.94      0.94      0.94       143
weighted avg       0.94      0.94      0.94       143

