diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index e366269..1c1079a 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/__pycache__/__init__.cpython-36.pyc b/q01_k_means/__pycache__/__init__.cpython-36.pyc index 5030f09..511f9b9 100644 Binary files a/q01_k_means/__pycache__/__init__.cpython-36.pyc and b/q01_k_means/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/__pycache__/build.cpython-36.pyc b/q01_k_means/__pycache__/build.cpython-36.pyc index d257235..f926d10 100644 Binary files a/q01_k_means/__pycache__/build.cpython-36.pyc and b/q01_k_means/__pycache__/build.cpython-36.pyc differ diff --git a/q01_k_means/build.py b/q01_k_means/build.py index 925feb4..f36f773 100644 --- a/q01_k_means/build.py +++ b/q01_k_means/build.py @@ -1,18 +1,34 @@ +# %load q01_k_means/build.py # Default imports from sklearn.cluster import KMeans import matplotlib.pyplot as plt plt.switch_backend('agg') from sklearn import datasets - +import pandas as pd digits = datasets.load_digits() - X_train = digits.images y_train = digits.target - -# Write your solution here : - - +def k_means(X_train, y_train, cluster=10, random_state=9): + X = X_train.reshape((len(X_train), -1)) + kmeans = KMeans(n_clusters=cluster, random_state=random_state).fit(X, y_train) + a = X_train[(y_train == 0) & (kmeans.labels_ == 0)][0:20] + b = X_train[(y_train == 1) & (kmeans.labels_ == 1)][0:20] + c = X_train[(y_train == 2) & (kmeans.labels_ == 2)][0:20] + d = X_train[(y_train == 3) & (kmeans.labels_ == 3)][0:20] + e = X_train[(y_train == 4) & (kmeans.labels_ == 4)][0:20] + f = X_train[(y_train == 5) & (kmeans.labels_ == 5)][0:20] + g = X_train[(y_train == 6) & (kmeans.labels_ == 6)][0:20] + h = X_train[(y_train == 7) & (kmeans.labels_ == 7)][0:20] + i = X_train[(y_train == 8) & (kmeans.labels_ == 8)][0:20] + j = X_train[(y_train == 9) & (kmeans.labels_ == 9)][0:20] + for char in (a, b, c, d, e, f, g, h, i, j): + for index in range(0, len(char)): + plt.subplot(10, 20, index + 1) + plt.axis('off') + plt.imshow(char[index]) + plt.show() +k_means(X_train, y_train, cluster=10, random_state=9) diff --git a/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc b/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc index 4d0c118..16ea746 100644 Binary files a/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc and b/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc b/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc index c05e1f0..df58898 100644 Binary files a/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc and b/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc b/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc index 8510bd5..f538941 100644 Binary files a/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc and b/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc b/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc index 5171cbf..f7c4037 100644 Binary files a/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc and b/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/build.py b/q02_hierarchy_clustering/build.py index 724237c..cf7e9d2 100644 --- a/q02_hierarchy_clustering/build.py +++ b/q02_hierarchy_clustering/build.py @@ -1,3 +1,4 @@ +# %load q02_hierarchy_clustering/build.py # Default imports import pandas as pd @@ -11,5 +12,18 @@ df = pd.DataFrame(scale(digits.data), index=digits.target) # Write your solution here : +def hierarchy_clustering(df): + Z = hierarchy.linkage(df, 'average') + plt.figure(figsize=(25, 10)) + plt.title('Hierarchical Clustering Dendrogram') + plt.xlabel('sample index') + plt.ylabel('distance') + hierarchy.dendrogram( + Z, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + plt.show() +hierarchy_clustering(df) diff --git a/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc b/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc index c783cf0..3b6f5e0 100644 Binary files a/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc and b/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc b/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc index 180ff26..17c4bbe 100644 Binary files a/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc and b/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc differ