In [93]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn import svm
from sklearn.semi_supervised import label_propagation
from sklearn.decomposition import PCA
import pandas as pd

In [94]:
rng = np.random.RandomState(0)

iris = datasets.load_iris()

In [95]:
X = iris.data[:, :5]
y = iris.target

In [96]:
pca = PCA(n_components=2)
principalComponents = pca.fit_transform(X)
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['pca1', 'pca2'])

In [97]:
y.shape

(150,)

In [98]:
X.shape

(150, 4)

In [99]:
principalDf.shape

(150, 2)

In [100]:
principalDf.columns

Index([u'pca1', u'pca2'], dtype='object')

In [101]:
# step size in the mesh
h = .02

In [102]:
y_30 = np.copy(y)
y_30[rng.rand(len(y)) < 0.3] = -1

In [103]:
len(y_30[rng.rand(len(y)) < 0.3])

51

In [104]:
len([i for i in y_30 if i == -1])

43

In [105]:
y_50 = np.copy(y)
y_50[rng.rand(len(y)) < 0.5] = -1

In [106]:
len([i for i in y_50 if i == -1])

84

In [107]:
ls30 = (label_propagation.LabelSpreading().fit(principalDf, y_30),
        y_30)
ls50 = (label_propagation.LabelSpreading().fit(principalDf, y_50),
        y_50)
ls100 = (label_propagation.LabelSpreading().fit(principalDf, y), y)

In [108]:
rbf_svc = (svm.SVC(kernel='rbf').fit(principalDf, y), y)

In [109]:
# create a mesh to plot in
x_min, x_max = principalDf.pca1.min() - 1, principalDf.pca1.max() + 1
y_min, y_max = principalDf.pca2.min() - 1, principalDf.pca2.max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                     np.arange(y_min, y_max, h))


In [110]:
len(yy.ravel())

104632

In [111]:
len(xx.ravel())

104632

In [112]:
x_min

-4.2252004462749841

In [113]:
x_max

4.7946868612099678

In [114]:
y_min

-2.2624919538621397

In [115]:
y_max

2.3705240359763318

In [116]:
# title for the plots
titles = ['Label Spreading 30% data',
          'Label Spreading 50% data',
          'Label Spreading 100% data',
          'SVC with rbf kernel']

In [117]:
color_map = {-1: (1, 1, 1), 0: (0, 0, .9), 1: (1, 0, 0), 2: (.8, .6, 0)}


In [118]:
for i, (clf, y_train) in enumerate((ls30, ls50, ls100, rbf_svc)):
    # Plot the decision boundary. For that, we will assign a color to each
    # point in the mesh [x_min, x_max]x[y_min, y_max].
    plt.subplot(2, 2, i + 1)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired)
    plt.axis('off')

    # Plot also the training points
    colors = [color_map[y] for y in y_train]
    plt.scatter(principalDf.pca1, principalDf.pca2, c=colors, edgecolors='black')

    plt.title(titles[i])
plt.suptitle("Unlabeled points are colored white", y=0.1)
plt.show()    