# KNN using One Hot Encoding

In [2]:
import pandas as pd 
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing

## Loading Dataset

In [3]:
dataset = pd.read_csv("../../../Datasets/balloons.txt", sep=",")
dataset.columns = ['color','size','action','age','inflated']
dataset.head()

Unnamed: 0,color,size,action,age,inflated
0,YELLOW,SMALL,STRETCH,ADULT,T
1,YELLOW,SMALL,STRETCH,CHILD,F
2,YELLOW,SMALL,DIP,ADULT,F
3,YELLOW,SMALL,DIP,CHILD,F
4,YELLOW,LARGE,STRETCH,ADULT,T


In [4]:
samples_count, features_count = dataset.shape
samples_count, features_count

(19, 5)

## Separating target column from dataset

In [5]:
target = dataset['inflated']
dataset = dataset.drop('inflated', axis=1)

## One-Hot Encoding of Dataset & Target column

### Dataset

In [6]:
ohe_dataset = preprocessing.OneHotEncoder()
ohe_dataset.fit(dataset.values)  #One-Hot Encoder learns categories 
dataset_encoded = ohe_dataset.transform(dataset.values).toarray()

### Target Column

In [7]:
ohe_target = preprocessing.OneHotEncoder()
target_2D = target.values.reshape(-1,1)
ohe_target.fit(target_2D)
target_encoded = ohe_target.transform(target_2D).toarray()

## Train & Test split

In [8]:
x = dataset_encoded
y = target_encoded

In [9]:
train_x, test_x, train_y, test_y = train_test_split(x, y, random_state=0)

## KNN

In [10]:
knn = KNeighborsClassifier(n_neighbors=2,weights='distance')
knn.fit(train_x, train_y)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=2, p=2,
                     weights='distance')

## Check Accuracy

In [11]:
predictions = knn.predict(test_x)

In [12]:
# We divided by 2 because there are 2 true values for each correct prediction
correct_predictions = np.sum(predictions==test_y) / 2

In [13]:
accuracy = correct_predictions / float(len(test_y))
print("%.2f" % accuracy)

1.00


## Prediction by using input data from user

In [14]:
color = input("Enter Color(YELLOW,PURPLE)::")
size = input("Enter Size(LARGE,SMALL)::")
action = input("Enter Action(STRETCH,DIP)::")
age = input("Enter Age(ADULT,CHILD)::")

Enter Color(YELLOW,PURPLE)::YELLOW
Enter Size(LARGE,SMALL)::SMALL
Enter Action(STRETCH,DIP)::DIP
Enter Age(ADULT,CHILD)::CHILD


In [19]:
user_sample = [[color, size, action, age]]    
user_sample_encoded = ohe_dataset.transform(user_sample).toarray()

In [20]:
result_encoded = knn.predict(user_sample_encoded)
result_decoded = ohe_target.inverse_transform(result_encoded)

In [21]:
result = result_decoded[0]

In [22]:
if result == 'T':
    print("Balloon is inflated")
else:
    print("Balloon is not inflated")

Balloon is not inflated
