# Demo: KNN

In [1]:
#Load datasets
from sklearn.datasets import load_wine
wine = load_wine()
print(wine.DESCR)

Wine Data Database

Notes
-----
Data Set Characteristics:
    :Number of Instances: 178 (50 in each of three classes)
    :Number of Attributes: 13 numeric, predictive attributes and the class
    :Attribute Information:
 		- 1) Alcohol
 		- 2) Malic acid
 		- 3) Ash
		- 4) Alcalinity of ash  
 		- 5) Magnesium
		- 6) Total phenols
 		- 7) Flavanoids
 		- 8) Nonflavanoid phenols
 		- 9) Proanthocyanins
		- 10)Color intensity
 		- 11)Hue
 		- 12)OD280/OD315 of diluted wines
 		- 13)Proline
        	- class:
                - class_0
                - class_1
                - class_2
		
    :Summary Statistics:
    
                                   Min   Max   Mean     SD
    Alcohol:                      11.0  14.8    13.0   0.8
    Malic Acid:                   0.74  5.80    2.34  1.12
    Ash:                          1.36  3.23    2.36  0.27
    Alcalinity of Ash:            10.6  30.0    19.5   3.3
    Magnesium:                    70.0 162.0    99.7  14.3
    Total Phenols:     

In [2]:
import pandas as pd

#Create dataframe for features
df = pd.DataFrame(wine.data,
                  columns = ['Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash',
                            'Magnesium', 'Total phenols', 'Flavanoids', 'Nonflavanoid phenols',
                            'Proanthocyanins', 'Color intensity', 'Hue',
                            'OD280/OD315 of diluted wines', 'Proline']) 

df['targets'] = pd.DataFrame(wine.target)

df.head()

Unnamed: 0,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315 of diluted wines,Proline,targets
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0,0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0,0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0,0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0,0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0,0


In [3]:
# Get features and targets from a dataframe 
X = df.iloc[:,:13]
y = df['targets']

In [4]:
#Split train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [5]:
#Import KNN model, and train
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=19)
knn.fit(X_train, y_train) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=19, p=2,
           weights='uniform')

In [6]:
#Let the KNN make predictions
knn.predict(X_test)

array([0, 1, 1, 0, 1, 1, 0, 2, 1, 1, 2, 1, 0, 2, 1, 1, 0, 0, 1, 0, 1, 0,
       1, 2, 2, 1, 1, 1, 2, 2, 0, 0, 2, 0, 0, 0, 2, 1, 1, 1, 0, 0, 1, 2,
       1])

In [7]:
#Evaluate the KNN model
knn.score(X_test, y_test)

0.7555555555555555

# Keras demo

In [8]:
#Load dataset
from keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()

Using TensorFlow backend.


In [9]:
X_train.shape

(60000, 28, 28)

In [10]:
X_train[0].max()

255

In [11]:
X_train[0].min()

0

In [12]:
X_test.shape

(10000, 28, 28)

In [13]:
y_train.shape

(60000,)

In [14]:
y_train[0:20]

array([5, 0, 4, 1, 9, 2, 1, 3, 1, 4, 3, 5, 3, 6, 1, 7, 2, 8, 6, 9],
      dtype=uint8)

In [15]:
y_test.shape

(10000,)

In [16]:
#Transform data into desired shape
import numpy as np
from keras.utils import to_categorical

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

X_train /= 255
X_test /= 255

X_train = np.reshape(X_train, (60000, 784))
X_test = np.reshape(X_test, (10000, 784))

In [17]:
X_train.shape

(60000, 784)

In [18]:
X_train[0].max()

1.0

In [19]:
X_train[0].min()

0.0

In [20]:
#One-hot encode the targets
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [21]:
y_train[0:20]

array([[0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [22]:
#Build the neural network model
from keras.models import Sequential
from keras.layers.core import Dense, Activation

model = Sequential()
model.add(Dense(10, activation='softmax', input_shape=(28 * 28, )))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 10)                7850      
Total params: 7,850
Trainable params: 7,850
Non-trainable params: 0
_________________________________________________________________


In [23]:
#Compile and fit the model
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['accuracy'])

model.fit(x=X_train, y=y_train, epochs=10, batch_size=128)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x22db3e8e908>

In [24]:
#Returns the loss value & metrics values for the model in test mode.
model.evaluate(X_test, y_test)



[0.26707916164398193, 0.9257]

# Simple Neural Network using Tensorflow

In [25]:
def get_batch(inputs, targets, batch_size):
    for i in range(0, inputs.shape[0], batch_size):
        yield inputs[i:i+batch_size], targets[i:i+batch_size]

In [26]:
import tensorflow as tf

#Build the computational graph for the neural network
x = tf.placeholder(tf.float32, shape=[None, 784], name='input')
y = tf.placeholder(tf.float32, shape=[None, 10], name='output')

W = tf.Variable(tf.zeros([784,10]))
b = tf.Variable(tf.zeros([10]))

y_out = tf.nn.softmax(tf.matmul(x,W) + b)

In [27]:
# Define the loss function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y * tf.log(y_out), reduction_indices=[1]))

# Training step
train = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy)

In [28]:
# Accuracy calculation
correct_prediction = tf.equal(tf.argmax(y_out, axis=1), tf.argmax(y, axis=1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [29]:
#Initialize and train
init = tf.global_variables_initializer()

In [30]:
epoches=10

with tf.Session() as sess:
    sess.run(init)
    for step in range(epoches):
        for x_in_batch, y_in_batch in get_batch(X_train, y_train, batch_size=128):
            sess.run(train, feed_dict={x: x_in_batch, y: y_in_batch})
            
        print('step {0} of {1}'.format(step+1, epoches))
        print(sess.run(accuracy, feed_dict={x: X_test, y: y_test}))

step 1 of 10
0.9052
step 2 of 10
0.9112
step 3 of 10
0.9122
step 4 of 10
0.9123
step 5 of 10
0.9138
step 6 of 10
0.9151
step 7 of 10
0.9153
step 8 of 10
0.9152
step 9 of 10
0.9159
step 10 of 10
0.9166


# Pytorch

In [31]:
import torch
from torchvision import datasets
import torchvision.transforms as transforms

# how many samples per batch to load
batch_size = 20

# convert data to torch.FloatTensor
transform = transforms.ToTensor()

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False, download=True, transform=transform)

# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)

In [32]:
from torch import nn

class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        # Inputs to hidden layer linear transformation
        self.hidden = nn.Linear(784, 10)
        
        # Define softmax output 
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self, x):
        # Change the shape of the input tensor
        x = x.view(-1, 28 * 28)
        
        # Pass the input tensor through each of our operations
        x = self.hidden(x)
        x = self.softmax(x)
        
        return x
    
model = Network()
print(model)

Network(
  (hidden): Linear(in_features=784, out_features=10, bias=True)
  (softmax): Softmax()
)


In [33]:
# specify loss function (cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer (stochastic gradient descent) and learning rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [34]:
# number of epochs to train the model
n_epochs = 10

model.train() # prep model for training

for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    
    ###################
    # train the model #
    ###################
    for data, target in train_loader:
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
             
    # print training statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.dataset)

    print('Epoch: {} \tTraining Loss: {:.6f}'.format(
        epoch+1, 
        train_loss
        ))

Epoch: 1 	Training Loss: 1.993917
Epoch: 2 	Training Loss: 1.762465
Epoch: 3 	Training Loss: 1.715749
Epoch: 4 	Training Loss: 1.695773
Epoch: 5 	Training Loss: 1.683876
Epoch: 6 	Training Loss: 1.675620
Epoch: 7 	Training Loss: 1.668864
Epoch: 8 	Training Loss: 1.651830
Epoch: 9 	Training Loss: 1.631660
Epoch: 10 	Training Loss: 1.618137


In [44]:
test_loss = 0
accuracy = 0

for data, target in test_loader:
    output = model(data)
    loss = criterion(output, target)
    test_loss += loss.item()*data.size(0)
    
    ps = torch.exp(output)
    _, top_class = ps.topk(1, dim=1)
    equals = top_class == target.view(*top_class.shape)
    accuracy += torch.mean(equals.type(torch.FloatTensor))
    
test_loss = test_loss/len(test_loader.dataset)
accuracy = accuracy/len(test_loader)

print('Test Loss: {:.6f}, accuracy: {}'.format(test_loss, accuracy))

Test Loss: 1.606325, accuracy: 0.8968005776405334
