In [4]:
from sklearn import datasets, neighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
from ECDNN import ECDNNClassifier

In [12]:
def test_digits(k):
  print("---------------Digits dataset------------------")
  print("Loading data.....")
  data = datasets.load_digits()
  n_classes = len(np.unique(data.target))
  print("Done loading data!\n")
  X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2,random_state=1)
  data_dim = len(X_train[0])
  print("Number of classes: %d" % n_classes)
  print("Data dimension: %d" % data_dim)
  print("Number of training samples: %d" % (len(y_train)))
  print("Number of testing samples: %d\n" % (len(y_test)))
  #predict with ecdnn
  ecdnn=ECDNNClassifier(k)
  start_cdnn = time.time()
  ecdnn.fit(X_train,y_train)
  predict = ecdnn.predict(X_test)
  t = time.time() - start_cdnn
  acc = accuracy_score(y_test, predict)
  print("Predict time for ECDNN: %.3fs" % (t))
  print("Accuracy for ECDNN with k = %d: %.3f\n" % (k, acc))

  #predict with sklearn knn
  for weights in ['uniform', 'distance']:
    knn = neighbors.KNeighborsClassifier(k, weights=weights)
    start_knn = time.time()
    knn.fit(X_train, y_train)
    predict = knn.predict(X_test)
    t = time.time() - start_knn
    acc = accuracy_score(y_test, predict)
    print("Predict time for kNN with %s weights: %.3fs" % (weights, t))
    print("Accuracy for kNN with k = %d and %s weights: %.3f\n" % (k, weights, acc))

  print("-----------------------------------------------\n")

def test_breast_cancer(k):
  print("---------------Breast Cancer dataset------------------")
  print("Loading data.....")
  data = datasets.load_breast_cancer()
  n_classes = len(np.unique(data.target))
  print("Done loading data!\n")
  X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2,random_state=1)
  data_dim = len(X_train[0])
  print("Number of classes: %d" % n_classes)
  print("Data dimension: %d" % data_dim)
  print("Number of training samples: %d" % (len(y_train)))
  print("Number of testing samples: %d\n" % (len(y_test)))
  #predict with ecdnn
  ecdnn=ECDNNClassifier(k)
  start_cdnn = time.time()
  ecdnn.fit(X_train,y_train)
  predict = ecdnn.predict(X_test)
  t = time.time() - start_cdnn
  acc = accuracy_score(y_test, predict)
  print("Predict time for ECDNN: %.3fs" % (t))
  print("Accuracy for ECDNN with k = %d: %.3f\n" % (k, acc))

  #predict with sklearn knn
  for weights in ['uniform', 'distance']:
    knn = neighbors.KNeighborsClassifier(k, weights=weights)
    start_knn = time.time()
    knn.fit(X_train, y_train)
    predict = knn.predict(X_test)
    t = time.time() - start_knn
    acc = accuracy_score(y_test, predict)
    print("Predict time for kNN with %s weights: %.3fs" % (weights, t))
    print("Accuracy for kNN with k = %d and %s weights: %.3f\n" % (k, weights, acc))

In [13]:
k=21
print("Testing with k = %d\n" % (k))
test_digits(k)
test_breast_cancer(k)

Testing with k = 21

---------------Digits dataset------------------
Loading data.....
Done loading data!

Number of classes: 10
Data dimension: 64
Number of training samples: 1437
Number of testing samples: 360

Predict time for ECDNN: 0.050s
Accuracy for ECDNN with k = 21: 0.992

Predict time for kNN with uniform weights: 0.025s
Accuracy for kNN with k = 21 and uniform weights: 0.978

Predict time for kNN with distance weights: 0.015s
Accuracy for kNN with k = 21 and distance weights: 0.983

-----------------------------------------------

---------------Breast Cancer dataset------------------
Loading data.....
Done loading data!

Number of classes: 2
Data dimension: 30
Number of training samples: 455
Number of testing samples: 114

Predict time for ECDNN: 0.007s
Accuracy for ECDNN with k = 21: 0.947

Predict time for kNN with uniform weights: 0.011s
Accuracy for kNN with k = 21 and uniform weights: 0.912

Predict time for kNN with distance weights: 0.003s
Accuracy for kNN with k = 2

In [14]:
def test_digits_distance_metrics(k=21,distance_metrics='euclidean'):
  print("---------------Digits dataset------------------")
  print("Loading data.....")
  data = datasets.load_digits()
  n_classes = len(np.unique(data.target))
  print("Done loading data!\n")
  X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2,random_state=1)
  data_dim = len(X_train[0])
  print("Distance metrics: %s" % distance_metrics)
  #predict with ecdnn
  ecdnn=ECDNNClassifier(k,distance_metrics)
  start_cdnn = time.time()
  ecdnn.fit(X_train,y_train)
  predict = ecdnn.predict(X_test)
  t = time.time() - start_cdnn
  acc = accuracy_score(y_test, predict)
  print("Predict time for ECDNN: %.3fs" % (t))
  print("Accuracy for ECDNN with k = %d: %.3f\n" % (k, acc))

  print("-----------------------------------------------\n")

In [15]:
distance_metrics=['cosine','jensenshannon','sqeuclidean']
for dm in distance_metrics:
  print(dm)
  test_digits_distance_metrics(k=21,distance_metrics=dm)

cosine
---------------Digits dataset------------------
Loading data.....
Done loading data!

Distance metrics: cosine
Predict time for ECDNN: 0.046s
Accuracy for ECDNN with k = 21: 0.997

-----------------------------------------------

jensenshannon
---------------Digits dataset------------------
Loading data.....
Done loading data!

Distance metrics: jensenshannon
Predict time for ECDNN: 1.420s
Accuracy for ECDNN with k = 21: 0.989

-----------------------------------------------

sqeuclidean
---------------Digits dataset------------------
Loading data.....
Done loading data!

Distance metrics: sqeuclidean
Predict time for ECDNN: 0.047s
Accuracy for ECDNN with k = 21: 0.992

-----------------------------------------------

