In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib

matplotlib.style.use('ggplot') # Look Pretty
%matplotlib inline

def plotDecisionBoundary(model, X, y):
  fig = plt.figure()
  ax = fig.add_subplot(111)

  padding = 0.6
  resolution = 0.0025
  colors = ['royalblue','forestgreen','ghostwhite']

  # Calculate the boundaris
  x_min, x_max = X[:, 0].min(), X[:, 0].max()
  y_min, y_max = X[:, 1].min(), X[:, 1].max()
  x_range = x_max - x_min
  y_range = y_max - y_min
  x_min -= x_range * padding
  y_min -= y_range * padding
  x_max += x_range * padding
  y_max += y_range * padding

  # Create a 2D Grid Matrix. The values stored in the matrix
  # are the predictions of the class at at said location
  xx, yy = np.meshgrid(np.arange(x_min, x_max, resolution),
                       np.arange(y_min, y_max, resolution))

  # What class does the classifier say?
  Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
  Z = Z.reshape(xx.shape)

  # Plot the contour map
  cs = plt.contourf(xx, yy, Z, cmap=plt.cm.terrain)

  # Plot the test original points as well...
  for label in range(len(np.unique(y))):
    indices = np.where(y == label)
    plt.scatter(X[indices, 0], X[indices, 1], c=colors[label], label=str(label), alpha=0.8)

  p = model.get_params()
  plt.axis('tight')
  plt.title('K = ' + str(p['n_neighbors']))

In [2]:
# 
# TODO: Load up the dataset into a variable called X. Check the .head and
# compare it to the file you loaded in a text editor. Make sure you're
# loading your data properly--don't fail on the 1st step!
#
# .. your code here ..
datafile = "./Datasets/wheat.data"
X = pd.read_csv(datafile, header=0)
X.head()

Unnamed: 0,id,area,perimeter,compactness,length,width,asymmetry,groove,wheat_type
0,0,15.26,14.84,0.871,5.763,3.312,2.221,5.22,kama
1,1,14.88,14.57,0.8811,5.554,3.333,1.018,4.956,kama
2,2,14.29,14.09,0.905,5.291,3.337,2.699,4.825,kama
3,3,13.84,13.94,0.8955,5.324,3.379,2.259,4.805,kama
4,4,16.14,14.99,0.9034,5.658,3.562,1.355,5.175,kama


In [3]:
#
# TODO: Copy the 'wheat_type' series slice out of X, and into a series
# called 'y'. Then drop the original 'wheat_type' column from the X
#
# .. your code here ..
y = pd.DataFrame(X.wheat_type)
X = X.drop(labels=['wheat_type'], axis=1)
y

Unnamed: 0,wheat_type
0,kama
1,kama
2,kama
3,kama
4,kama
5,kama
6,kama
7,canadian
8,kama
9,kama


In [4]:
# TODO: Do a quick, "ordinal" conversion of 'y'. In actuality our
# classification isn't ordinal, but just as an experiment...
#
# .. your code here ..
y['wheat_type'] = y.wheat_type.astype("category").cat.codes
y

Unnamed: 0,wheat_type
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,0
8,1
9,1


In [5]:
#
# TODO: Basic nan munging. Fill each row's nans with the mean of the feature
#
# .. your code here ..
X.describe()



Unnamed: 0,id,area,perimeter,compactness,length,width,asymmetry,groove
count,210.0,210.0,210.0,207.0,210.0,209.0,210.0,206.0
mean,104.5,14.847524,14.559286,0.87128,5.563918,3.28144,3.69353,5.407529
std,60.765944,2.909699,1.305959,0.023306,0.719594,0.419907,1.495112,0.53233
min,0.0,10.59,12.41,0.8081,0.8189,2.63,0.7651,3.485
25%,52.25,12.27,13.45,,5.24475,,2.60025,
50%,104.5,14.355,14.32,,5.518,,3.599,
75%,156.75,17.305,15.715,,5.97975,,4.76875,
max,209.0,21.18,17.25,0.9183,6.675,5.325,8.456,6.735


In [8]:
X.compactness.fillna(X.compactness.mean())
X.width.fillna(X.width.mean())
X.groove.fillna(X.groove.mean())
X.describe()

AttributeError: 'Series' object has no attribute 'compactness'