In [None]:
# This is just a preamble that sets a bunch of options up.
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Make graphs a little prettier
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (5, 5)

In [None]:
data = []
for i in range(0, 100):
    for j in range(0, 100):
        data.append([i, j])

df = pd.DataFrame(data, columns=['x', 'y'])
df['color'] = 'yellow'

# Abracadabra
idx = ((df['x'] - 30)**2 + (df['y'] - 80)**2 <= 100)
df['color'] = np.where(idx, 'black', 'yellow')

# Bibbidi-Bobbidi-Boo
idx = ((df['x'] - 70)**2 + (df['y'] - 80)**2 <= 100)
df['color'] = np.where(df['color']!='black', np.where(idx, 'black', 'yellow'), df['color'])

# Wingardium Leviosa
idx = ((df['x'] - 50)**2 + (df['y'] - 40)**2 <= 1000)
idx = idx & (df['y'] < 40)

df['color'] = np.where(df['color']!='black', np.where(idx, 'black', 'yellow'), df['color'])

print(df.head())

In [None]:
for color in ['yellow', 'black']:
    plt.scatter(df[df.color==color].x, df[df.color==color].y,
                c=color, marker=',', edgecolors='none')

plt.show()

In [None]:
# First, let's create our train and test sets.
from sklearn.model_selection import train_test_split

train, test = train_test_split(df, test_size=0.5)

# Here's how the train set looks like now
for color in ['yellow', 'black']:
    plt.scatter(train[train.color == color].x,
                train[train.color == color].y,
                c=color, marker=',', edgecolors='none')
plt.show()

In [None]:
# Just to make sure no one's cheating, let's get rid of the color
# field in the test set.
test['color'] = ''

print(test.head())

In [None]:
# Now, can we figure out what the original image should look like?

# 1) Import the right classifier
from sklearn.neighbors import KNeighborsClassifier

# 2) Train the classifier using fit()
clf = KNeighborsClassifier()
clf.fit(train[['x', 'y']], train['color'])

# 3) Now, we can make predictions using predict()
test.color = clf.predict(test[['x', 'y']])

print(test.head())

In [None]:
for color in ['yellow', 'black']:
    plt.scatter(train[train.color == color].x,
                train[train.color == color].y,
                c=color, marker=',', edgecolors='none')

for color in ['yellow', 'black']:
    plt.scatter(test[test.color == color].x,
                test[test.color == color].y,
                c=color, marker=',', edgecolors='none')

plt.show()

In [None]:
# This example adapted from http://blog.yhat.com/posts/classification-using-knn-and-python.html