### The Breast Cancer Wisconsin dataset provided by Scikit-learn has been used for the experiments.

For more details about the dataset visit - https://archive.ics.uci.edu/ml/datasets/breast+cancer+wisconsin+(diagnostic)

In [None]:
from sklearn import datasets
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
import numpy as np


In [None]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [None]:
cancer = datasets.load_breast_cancer()

# M = 8

In [None]:
import random

pairs = list(zip(cancer.data, cancer.target))  # make pairs out of the two lists
pairs = random.sample(pairs, 16)  # pick 16 random pairs
data, target = zip(*pairs)  # separate the pairs

In [None]:
sample_train, sample_test, label_train, label_test = train_test_split(
     data, target, test_size=0.5, random_state=22)

### We perform dimensionality reduction using PCA for reducing number of features in our samples in order to reduce some computation time.

In [None]:
n_dim = 1
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

In [None]:
X = sample_train
y = label_train

### **In order to maintain consistency across all experiments, the same data samples printed below have been copied and used as inputs in all our experiments**

In [None]:
X

array([[-0.36998041],
       [ 0.18273028],
       [-0.83346932],
       [-0.4136746 ],
       [ 0.12647021],
       [-0.74441099],
       [ 0.90043098],
       [-0.52703073]])

In [None]:
y

[0, 0, 1, 1, 1, 1, 0, 1]

In [None]:
sample_test

array([[-0.61831361],
       [ 1.        ],
       [ 0.46821185],
       [-0.85423059],
       [-0.95132588],
       [-0.75467596],
       [-1.        ],
       [-0.91779177]])

In [None]:
label_test

[0, 0, 0, 1, 1, 0, 1, 1]

# M = 16


In [None]:
import random

pairs = list(zip(cancer.data, cancer.target))  # make pairs out of the two lists
pairs = random.sample(pairs, 32)  # pick 128 random pairs
data, target = zip(*pairs)  # separate the pairs

In [None]:
sample_train, sample_test, label_train, label_test = train_test_split(
     data, target, test_size=0.5, random_state=22)

In [None]:
n_dim = 1
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

In [None]:
no_of_Q = 4

X = sample_train
y = label_train

In [None]:
X

array([[-0.76764442],
       [-0.61776561],
       [-0.76501246],
       [ 0.13807384],
       [-0.61909054],
       [ 0.36288268],
       [-1.        ],
       [-0.61300765],
       [-0.33114908],
       [-0.65033625],
       [-0.79890335],
       [-0.56009028],
       [ 0.10323057],
       [-0.52076065],
       [-0.89158817],
       [-0.73638509]])

In [None]:
y 

[1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1]

In [None]:
sample_test

array([[-0.95872091],
       [-0.92576013],
       [-0.73496398],
       [-0.79805629],
       [-0.57538087],
       [-0.49002715],
       [ 0.72440735],
       [ 1.        ],
       [-0.56658794],
       [-0.34205072],
       [-0.83562247],
       [-0.68286875],
       [-0.74067301],
       [-0.74345532],
       [-0.9301953 ],
       [-0.81564803]])

In [None]:
label_test

[1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1]

# M = 32 

In [None]:
import random

pairs = list(zip(cancer.data, cancer.target))  # make pairs out of the two lists
pairs = random.sample(pairs, 64)  # pick 64 random pairs
data, target = zip(*pairs)  # separate the pairs

In [None]:
sample_train, sample_test, label_train, label_test = train_test_split(
     data, target, test_size=0.5, random_state=22)

In [None]:
n_dim = 1
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

In [None]:
X = sample_train
y = label_train

In [None]:
X

array([[-0.52961818],
       [-0.73694129],
       [-0.98076452],
       [ 0.30469623],
       [-0.78349866],
       [-0.48788974],
       [-0.79870995],
       [ 0.25658932],
       [-0.70611322],
       [-0.74200188],
       [-0.7363877 ],
       [ 0.31199637],
       [-0.97364873],
       [ 0.71250752],
       [-0.55861402],
       [-0.63612516],
       [-0.90847131],
       [-0.54241534],
       [-0.21227206],
       [-0.49821623],
       [-0.7787262 ],
       [-0.69581626],
       [-0.77763277],
       [ 0.38337651],
       [-0.02174895],
       [-1.        ],
       [-0.63179088],
       [-0.9240283 ],
       [-0.83105823],
       [-0.61640269],
       [-0.95954346],
       [-0.81083621]])

In [None]:
y

[1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [None]:
sample_test

array([[-0.40839483],
       [-0.3025159 ],
       [-0.56281703],
       [ 1.        ],
       [-0.51608625],
       [-0.30182866],
       [-0.89821949],
       [-0.3258409 ],
       [ 0.24403701],
       [-0.08547389],
       [-0.81397378],
       [-0.52275241],
       [-0.84506346],
       [ 0.06944435],
       [ 0.12100471],
       [-0.82295834],
       [-0.56187142],
       [-0.47338434],
       [ 0.27271194],
       [-0.40050862],
       [-0.44610167],
       [-0.69609962],
       [-0.82142116],
       [-0.79394096],
       [-0.73949103],
       [-0.27940424],
       [ 0.80421303],
       [-0.32988797],
       [-0.78661245],
       [-0.75657348],
       [-0.77425805],
       [-0.89459281]])

In [None]:
label_test

[1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1]

# M = 64

In [None]:
import random

pairs = list(zip(cancer.data, cancer.target))  # make pairs out of the two lists
pairs = random.sample(pairs, 128)  # pick 128 random pairs
data, target = zip(*pairs)  # separate the pairs

In [None]:
sample_train, sample_test, label_train, label_test = train_test_split(
     data, target, test_size=0.5, random_state=22)

In [None]:
n_dim = 1
pca = PCA(n_components=n_dim).fit(sample_train)
sample_train = pca.transform(sample_train)
sample_test = pca.transform(sample_test)

# Normalise
std_scale = StandardScaler().fit(sample_train)
sample_train = std_scale.transform(sample_train)
sample_test = std_scale.transform(sample_test)

# Scale
samples = np.append(sample_train, sample_test, axis=0)
minmax_scale = MinMaxScaler((-1, 1)).fit(samples)
sample_train = minmax_scale.transform(sample_train)
sample_test = minmax_scale.transform(sample_test)

In [None]:
no_of_Q = 4

X = sample_train

no_of_partitions = 2
km = KMeans(n_clusters=no_of_partitions, init='random',
        n_init=1, max_iter=30, tol=1e-04, random_state=0)
fitted_km = km.fit(X[:no_of_Q])
prediction = fitted_km.predict(X)

fitted_km.labels_

prediction

d = prediction
y = label_train
print('###### the old predictions', d)

from scipy.stats import mode 
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y, d)
cm_argmax = cm.argmax(axis=0)
if (cm_argmax[0] == cm_argmax[1]):
    correct_d = d
else:
    correct_d = np.array([cm_argmax[i] for i in d])

print('###### the new predictions', correct_d)

from sklearn.metrics import accuracy_score
acc = metrics.accuracy_score(y,correct_d)
print("Accuracy",acc)



###### the old predictions [1 1 0 0 0 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1
 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1]
###### the new predictions [1 1 0 0 0 0 0 1 1 1 0 1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 1 1 1 1
 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 1 0 0 1 1 1 1 1 1]
Accuracy 0.890625


In [None]:
X

array([[-0.92033957],
       [-0.62676039],
       [-0.28869688],
       [-0.4545198 ],
       [-0.5639226 ],
       [-0.13063581],
       [-0.12721978],
       [-0.84393553],
       [-0.8167348 ],
       [-0.86877809],
       [ 0.68405998],
       [-0.77438584],
       [-0.72603092],
       [-0.97403678],
       [ 0.82394382],
       [ 0.04393449],
       [-0.62029416],
       [-0.7575307 ],
       [-0.78364214],
       [-0.69133675],
       [ 0.4711372 ],
       [-0.86332019],
       [-0.7846598 ],
       [-0.97647863],
       [-0.91662612],
       [-0.9170126 ],
       [-0.79788711],
       [-0.92582589],
       [ 0.00207795],
       [-0.94046638],
       [ 0.28818783],
       [-0.85261899],
       [-0.26015489],
       [-0.81450528],
       [-0.66534136],
       [-0.99248397],
       [-0.71814302],
       [-0.85732167],
       [-0.40941704],
       [-0.26379642],
       [ 0.00921227],
       [-0.8450853 ],
       [-0.81489227],
       [-0.59669884],
       [-0.73294047],
       [-0

In [None]:
y

[1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1]

In [None]:
sample_test

array([[-0.75423437],
       [-0.65602419],
       [ 0.21333776],
       [-0.51939951],
       [-0.81243976],
       [-0.79156646],
       [-0.80595879],
       [-0.69137113],
       [-0.79167719],
       [-0.63640346],
       [-0.65655968],
       [-0.60008819],
       [ 0.08046681],
       [-0.66995772],
       [-0.9143032 ],
       [-0.7346537 ],
       [-0.83086306],
       [-0.69154759],
       [-0.32046433],
       [-0.61913998],
       [-0.99949378],
       [ 0.32645699],
       [-0.9091592 ],
       [-0.38972082],
       [-0.836701  ],
       [-0.23793032],
       [-0.6540042 ],
       [-0.86902482],
       [-0.804716  ],
       [ 0.1423963 ],
       [-0.68438072],
       [-0.90523266],
       [-0.06363657],
       [ 0.13351811],
       [-0.8986342 ],
       [-0.67632821],
       [-0.83384046],
       [-0.24302343],
       [-0.11478386],
       [-0.1748094 ],
       [-0.59596087],
       [-0.83984607],
       [-0.57101925],
       [-0.72698474],
       [-0.67562877],
       [-0

In [None]:
label_test

[1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1]