diff --git a/__pycache__/__init__.cpython-36.pyc b/__pycache__/__init__.cpython-36.pyc index e366269..57af355 100644 Binary files a/__pycache__/__init__.cpython-36.pyc and b/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/__pycache__/__init__.cpython-36.pyc b/q01_k_means/__pycache__/__init__.cpython-36.pyc index 5030f09..ca90628 100644 Binary files a/q01_k_means/__pycache__/__init__.cpython-36.pyc and b/q01_k_means/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/__pycache__/build.cpython-36.pyc b/q01_k_means/__pycache__/build.cpython-36.pyc index d257235..c15eb02 100644 Binary files a/q01_k_means/__pycache__/build.cpython-36.pyc and b/q01_k_means/__pycache__/build.cpython-36.pyc differ diff --git a/q01_k_means/build.py b/q01_k_means/build.py index 925feb4..948eed8 100644 --- a/q01_k_means/build.py +++ b/q01_k_means/build.py @@ -1,8 +1,18 @@ +# %load q01_k_means/build.py # Default imports from sklearn.cluster import KMeans import matplotlib.pyplot as plt plt.switch_backend('agg') from sklearn import datasets +import matplotlib.pyplot as plt +import seaborn as sns +import sklearn.cluster as cluster +import time +import numpy as np + +sns.set_context('poster') +sns.set_color_codes() +plot_kwds = {'alpha' : 0.25, 's' : 80, 'linewidths':0} digits = datasets.load_digits() @@ -11,8 +21,20 @@ y_train = digits.target # Write your solution here : - - - +def k_means(X_train,y_train,cluster=10,random_state=9): + X_train=X_train.reshape(14376,8) + start_time = time.time() + km = KMeans( n_clusters=cluster,random_state=random_state) + km.fit(X_train) + labels=km.predict(X_train) + print(km.cluster_centers_) + print(km.labels_) + print(labels) + palette = sns.color_palette('deep', np.unique(labels).max() + 1) + colors = [palette[x] if x >= 0 else (0.0, 0.0, 0.0) for x in labels] + plt.scatter(X_train.T[0], X_train.T[1], c=colors, **plot_kwds) + frame = plt.gca() + frame.axes.get_xaxis().set_visible(False) + frame.axes.get_yaxis().set_visible(False) diff --git a/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc b/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc index 4d0c118..cf680f6 100644 Binary files a/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc and b/q01_k_means/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc b/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc index c05e1f0..be5125f 100644 Binary files a/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc and b/q01_k_means/tests/__pycache__/test_q01_k_means.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc b/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc index 8510bd5..885f139 100644 Binary files a/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc and b/q02_hierarchy_clustering/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc b/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc index 5171cbf..7215569 100644 Binary files a/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc and b/q02_hierarchy_clustering/__pycache__/build.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/build.py b/q02_hierarchy_clustering/build.py index 724237c..602855d 100644 --- a/q02_hierarchy_clustering/build.py +++ b/q02_hierarchy_clustering/build.py @@ -1,3 +1,4 @@ +# %load q02_hierarchy_clustering/build.py # Default imports import pandas as pd @@ -6,10 +7,30 @@ from sklearn.preprocessing import scale from scipy.cluster import hierarchy from sklearn import datasets - +from scipy.cluster.hierarchy import dendrogram, linkage digits = datasets.load_digits() df = pd.DataFrame(scale(digits.data), index=digits.target) + + + +X_train = df +y_train = df.index # Write your solution here : +def hierarchy_clustering(X_train): + # generate the linkage matrix + Z = linkage(X_train, 'ward') + # calculate full dendrogram + plt.figure(figsize=(25, 10)) + plt.title('Hierarchical Clustering Dendrogram') + plt.xlabel('sample index') + plt.ylabel('distance') + dendrogram( + Z, + leaf_rotation=90., # rotates the x axis labels + leaf_font_size=8., # font size for the x axis labels + ) + + plt.show() diff --git a/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc b/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc index c783cf0..341cd45 100644 Binary files a/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc and b/q02_hierarchy_clustering/tests/__pycache__/__init__.cpython-36.pyc differ diff --git a/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc b/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc index 180ff26..d0cb4bb 100644 Binary files a/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc and b/q02_hierarchy_clustering/tests/__pycache__/test_q02_hierarchy_clustering.cpython-36.pyc differ