COSMIT + MISC pep8, pyflakes, typos and some other cleanup of examples.

davidmarek · Apr 29, 2012 · dcc1c74 · dcc1c74
1 parent 692e07d
commit dcc1c74
Show file tree

Hide file tree

Showing 26 changed files with 159 additions and 198 deletions.
diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
@@ -85,7 +85,7 @@
         elif algorithm == affinity_propagation:
             # Set a low preference to avoid creating too many
             # clusters. This parameter is hard to set in practice
-            algorithm.fit(-distances, p=-50*distances.max())
+            algorithm.fit(-distances, p=-50 * distances.max())
         else:
             algorithm.fit(X)
         t1 = time.time()

diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
@@ -76,9 +76,9 @@
 for name, label in [('Setosa', 0),
                     ('Versicolour', 1),
                     ('Virginica', 2)]:
-    ax.text3D(X[y==label, 3].mean(), 
-              X[y==label, 0].mean()+1.5, 
-              X[y==label, 2].mean(), name,
+    ax.text3D(X[y == label, 3].mean(),
+              X[y == label, 0].mean() + 1.5,
+              X[y == label, 2].mean(), name,
               horizontalalignment='center',
               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'),
              )
@@ -92,6 +92,4 @@
 ax.set_xlabel('Petal width')
 ax.set_ylabel('Sepal length')
 ax.set_zlabel('Petal length')
-#pl.savefig('cluster_iris_truth.png')
-
-
+pl.show()
diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py
@@ -5,8 +5,8 @@
 =========================================================
 Feature agglomeration
 =========================================================
-These images how similiar features are merged together using 
-feautre agglomeration.
+These images how similiar features are merged together using
+feature agglomeration.
 
 
 """
@@ -40,14 +40,14 @@
 pl.clf()
 pl.subplots_adjust(left=.01, right=.99, bottom=.01, top=.91)
 for i in range(4):
-    pl.subplot(3, 4, i+1)
+    pl.subplot(3, 4, i + 1)
     pl.imshow(images[i], cmap=pl.cm.gray,
                 vmax=16, interpolation='nearest')
     pl.xticks(())
     pl.yticks(())
     if i == 1:
         pl.title('Original data')
-    pl.subplot(3, 4, 4+i+1)
+    pl.subplot(3, 4, 4 + i + 1)
     pl.imshow(images_restored[i],
                 cmap=pl.cm.gray, vmax=16, interpolation='nearest')
     if i == 1:
@@ -61,5 +61,3 @@
 pl.xticks(())
 pl.yticks(())
 pl.title('Labels')
-
-
diff --git a/examples/cluster/plot_lena_compress.py b/examples/cluster/plot_lena_compress.py
@@ -32,7 +32,7 @@
     # Newer versions of scipy have lena in misc
     from scipy import misc
     lena = misc.lena()
-X = lena.reshape((-1, 1)) # We need an (n_sample, n_feature) array
+X = lena.reshape((-1, 1))  # We need an (n_sample, n_feature) array
 k_means = cluster.KMeans(k=n_clusters, n_init=4)
 k_means.fit(X)
 values = k_means.cluster_centers_.squeeze()
@@ -47,19 +47,19 @@
 
 # original lena
 pl.figure(1, figsize=(3, 2.2))
-pl.imshow(lena , cmap=pl.cm.gray, vmin=vmin, vmax=256)
+pl.imshow(lena, cmap=pl.cm.gray, vmin=vmin, vmax=256)
 
 # compressed lena
 pl.figure(2, figsize=(3, 2.2))
 pl.imshow(lena_compressed, cmap=pl.cm.gray, vmin=vmin, vmax=vmax)
 
 # equal bins lena
-regular_values = np.linspace(0, 256, n_clusters+1)
+regular_values = np.linspace(0, 256, n_clusters + 1)
 regular_labels = np.searchsorted(regular_values, lena) - 1
-regular_values = .5*(regular_values[1:] + regular_values[:-1]) #mean
+regular_values = .5 * (regular_values[1:] + regular_values[:-1])  # mean
 regular_lena = np.choose(regular_labels.ravel(), regular_values)
 regular_lena.shape = lena.shape
-pl.figure(3, figsize=(3,2.2))
+pl.figure(3, figsize=(3, 2.2))
 pl.imshow(regular_lena, cmap=pl.cm.gray, vmin=vmin, vmax=vmax)
 
 # histogram
@@ -71,10 +71,9 @@
 pl.xticks(regular_values)
 values = np.sort(values)
 for center_1, center_2 in zip(values[:-1], values[1:]):
-    pl.axvline(.5*(center_1+center_2), color='b')
+    pl.axvline(.5 * (center_1 + center_2), color='b')
 
 for center_1, center_2 in zip(regular_values[:-1], regular_values[1:]):
-    pl.axvline(.5*(center_1+center_2), color='b', linestyle='--')
+    pl.axvline(.5 * (center_1 + center_2), color='b', linestyle='--')
 
 pl.show()
-
diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py
@@ -13,41 +13,41 @@
 """
 print __doc__
 
-
 # Code source: Gael Varoqueux
 # Modified for Documentation merge by Jaques Grobler
 # License: BSD
 
-
 import pylab as pl
 import numpy as np
 from scipy import stats
 from mpl_toolkits.mplot3d import Axes3D
 
 e = np.exp(1)
-
 np.random.seed(4)
 
 
+def pdf(x):
+    return 0.5 * (stats.norm(scale=0.25 / e).pdf(x)
+            + stats.norm(scale=4 / e).pdf(x))
+
 y = np.random.normal(scale=0.5, size=(30000))
 x = np.random.normal(scale=0.5, size=(30000))
-z = np.random.normal(scale=0.1, size=(len(x)), )
-def pdf(x):
-    return 0.5*(  stats.norm(scale=0.25/e).pdf(x)
-                + stats.norm(scale=4/e).pdf(x))
+z = np.random.normal(scale=0.1, size=len(x))
+
 density = pdf(x) * pdf(y)
-pdf_z = pdf(5*z)
+pdf_z = pdf(5 * z)
 
 density *= pdf_z
 
-a = x+y
-b = 2*y
-c = a-b+z
+a = x + y
+b = 2 * y
+c = a - b + z
 
 norm = np.sqrt(a.var() + b.var())
 a /= norm
 b /= norm
 
+
 ###############################################################################
 # Plot the figures
 def plot_figs(fig_num, elev, azim):
@@ -57,18 +57,11 @@ def plot_figs(fig_num, elev, azim):
 
     pl.set_cmap(pl.cm.hot_r)
 
-    pts = ax.scatter(a[::10], b[::10], c[::10], c=density,
-                     marker='+', alpha=.4)
-
     Y = np.c_[a, b, c]
     U, pca_score, V = np.linalg.svd(Y, full_matrices=False)
-    x_pca_axis, y_pca_axis, z_pca_axis = V.T*pca_score/pca_score.min()
-
-#ax.quiver(0.1*x_pca_axis, 0.1*y_pca_axis, 0.1*z_pca_axis,
-#                x_pca_axis, y_pca_axis, z_pca_axis,
-#                color=(0.6, 0, 0))
+    x_pca_axis, y_pca_axis, z_pca_axis = V.T * pca_score / pca_score.min()
 
-    x_pca_axis, y_pca_axis, z_pca_axis = 3*V.T
+    x_pca_axis, y_pca_axis, z_pca_axis = 3 * V.T
     x_pca_plane = np.r_[x_pca_axis[:2], - x_pca_axis[1::-1]]
     y_pca_plane = np.r_[y_pca_axis[:2], - y_pca_axis[1::-1]]
     z_pca_plane = np.r_[z_pca_axis[:2], - z_pca_axis[1::-1]]

diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py
@@ -38,12 +38,10 @@
 pca.fit(X)
 X = pca.transform(X)
 
-for name, label in [('Setosa', 0),
-                    ('Versicolour', 1),
-                    ('Virginica', 2)]:
-    ax.text3D(X[y==label, 0].mean(), 
-              X[y==label, 1].mean()+1.5, 
-              X[y==label, 2].mean(), name,
+for name, label in [('Setosa', 0), ('Versicolour', 1), ('Virginica', 2)]:
+    ax.text3D(X[y == label, 0].mean(),
+              X[y == label, 1].mean() + 1.5,
+              X[y == label, 2].mean(), name,
               horizontalalignment='center',
               bbox=dict(alpha=.5, edgecolor='w', facecolor='w'),
              )
@@ -52,7 +50,7 @@
 ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y)
 
 x_surf = [X[:, 0].min(), X[:, 0].max(),
-          X[:, 0].min(), X[:, 0].max(),]
+          X[:, 0].min(), X[:, 0].max()]
 y_surf = [X[:, 0].max(), X[:, 0].max(),
           X[:, 0].min(), X[:, 0].min()]
 x_surf = np.array(x_surf)
@@ -62,13 +60,8 @@
 v1 = pca.transform(pca.components_[1])
 v1 /= v1[-1]
 
-#ax.plot_surface(x_surf, y_surf,
-
-
-
-
 ax.w_xaxis.set_ticklabels([])
 ax.w_yaxis.set_ticklabels([])
 ax.w_zaxis.set_ticklabels([])
 
-
+pl.show()
diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py
@@ -3,10 +3,8 @@
 Cross-validation on Digits Dataset Exercise
 =============================================
 
-This exercise is used in the
-:ref:`cv_generators_tut` part of the 
-:ref:`model_selection_tut` section of the
-:ref:`stat_learn_tut_index`.
+This exercise is used in the :ref:`cv_generators_tut` part of the
+:ref:`model_selection_tut` section of the :ref:`stat_learn_tut_index`.
 """
 print __doc__
 
@@ -24,7 +22,7 @@
 scores = list()
 scores_std = list()
 for C in C_s:
-    svc.C  = C
+    svc.C = C
     this_scores = cross_validation.cross_val_score(svc, X, y, n_jobs=-1)
     scores.append(np.mean(this_scores))
     scores_std.append(np.std(this_scores))
@@ -41,9 +39,6 @@
 pl.xlabel('Parameter C')
 pl.ylim(0, 1.1)
 #pl.axhline(np.max(scores), linestyle='--', color='.5')
-pl.text(C_s[np.argmax(scores)], .9*np.max(scores), '%.3f' % np.max(scores), 
-        verticalalignment='top',
-        horizontalalignment='center',
-        )
+pl.text(C_s[np.argmax(scores)], .9 * np.max(scores), '%.3f' % np.max(scores),
+        verticalalignment='top', horizontalalignment='center',)
 pl.show()
-
diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py
@@ -17,14 +17,15 @@
 
 n_samples = len(X_digits)
 
-X_train = X_digits[:.9*n_samples]
-y_train = y_digits[:.9*n_samples]
-X_test = X_digits[.9*n_samples:]
-y_test = y_digits[.9*n_samples:]
+X_train = X_digits[:.9 * n_samples]
+y_train = y_digits[:.9 * n_samples]
+X_test = X_digits[.9 * n_samples:]
+y_test = y_digits[.9 * n_samples:]
 
 knn = neighbors.KNeighborsClassifier()
 logistic = linear_model.LogisticRegression()
 
-print 'KNN score:', knn.fit(X_train, y_train).score(X_test, y_test)
-print 'LogisticRegression score:', logistic.fit(X_train, y_train).score(X_test, y_test)
-
+print('KNN score: %f' %
+        knn.fit(X_train, y_train).score(X_test, y_test))
+print('LogisticRegression score: %f' %
+        logistic.fit(X_train, y_train).score(X_test, y_test))
diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
@@ -5,11 +5,9 @@
 =========================================================
 Logistic Regression 3-class Classifier
 =========================================================
-Show below is a logistic-regression classifiers decision
-boundaries on the 
-`iris <http://en.wikipedia.org/wiki/Iris_flower_data_set>`_
-dataset. The datapoints are colored according to their
-labels.
+Show below is a logistic-regression classifiers decision boundaries on the
+`iris <http://en.wikipedia.org/wiki/Iris_flower_data_set>`_ dataset. The
+datapoints are colored according to their labels.
 
 """
 print __doc__
@@ -25,20 +23,20 @@
 
 # import some data to play with
 iris = datasets.load_iris()
-X = iris.data[:, :2] # we only take the first two features. 
+X = iris.data[:, :2]  # we only take the first two features.
 Y = iris.target
 
-h = .02 # step size in the mesh
+h = .02  # step size in the mesh
 
-logreg=linear_model.LogisticRegression(C=1e5)
+logreg = linear_model.LogisticRegression(C=1e5)
 
 # we create an instance of Neighbours Classifier and fit the data.
 logreg.fit(X, Y)
 
 # Plot the decision boundary. For that, we will asign a color to each
 # point in the mesh [x_min, m_max]x[y_min, y_max].
-x_min, x_max = X[:,0].min() - .5, X[:,0].max() + .5
-y_min, y_max = X[:,1].min() - .5, X[:,1].max() + .5
+x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5
+y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5
 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
 Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()])
 
@@ -49,7 +47,7 @@
 pl.pcolormesh(xx, yy, Z)
 
 # Plot also the training points
-pl.scatter(X[:,0], X[:,1],c = Y, edgecolors='k' )
+pl.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k')
 pl.xlabel('Sepal length')
 pl.ylabel('Sepal width')