## We can also predict a product that you might like...

based on new information never seen before!

In [42]:
import pandas as pd

labeled = pd.read_csv('cereal_labeled.csv')
labeled.head()

Unnamed: 0,A,B,name,y
0,0.320354,0.761119,100% Bran,0
1,-0.039224,0.648311,100% Natural Bran,0
2,0.821615,-0.250014,All-Bran,1
3,1.027443,0.091117,All-Bran with Extra Fiber,1
4,-0.097316,0.179982,Almond Delight,0


In [43]:
# we save the labels in an array
y = labeled.y 

In [44]:
# load normalized data
data = pd.read_csv('cereal_norm.csv')
# add labels
data['y'] = y
# remove name and save it apart
name = data.name
data = data[data.columns.difference(['name'])]

data.head()

Unnamed: 0,calories,carbo,cups,fat,fiber,mfr_G,mfr_K,mfr_N,mfr_P,mfr_Q,mfr_R,potass,protein,rating,sodium,sugars,type_H,vitamins,weight,y
0,0.181818,0.25,0.064,0.2,0.714286,0.0,0.0,1.0,0.0,0.0,0.0,0.848943,0.6,0.665593,0.40625,0.4375,0.0,0.25,0.5,0
1,0.636364,0.375,0.6,1.0,0.142857,0.0,0.0,0.0,0.0,1.0,0.0,0.410876,0.4,0.210685,0.046875,0.5625,0.0,0.0,0.5,0
2,0.181818,0.333333,0.064,0.2,0.642857,0.0,1.0,0.0,0.0,0.0,0.0,0.969789,0.6,0.546941,0.8125,0.375,0.0,0.25,0.5,1
3,0.0,0.375,0.2,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.6,1.0,0.4375,0.0625,0.0,0.25,0.5,1
4,0.545455,0.625,0.4,0.4,0.071429,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.2,0.215987,0.625,0.5625,0.0,0.25,0.5,0


### Train KNN Classifier with Sklearn

In [45]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,:-1], data.y, test_size=0.33, random_state=42)

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)

In [46]:
from sklearn.metrics import accuracy_score

acc = accuracy_score(y_test, y_pred) * 100

print("The model is able to predict any cereal with an accuracy of %", acc)

The model is able to predict any cereal with an accuracy of % 96.15384615384616


### Recommend a product based on some features!

<img src="naranitas.jpg" />

In [47]:
from sklearn.preprocessing import MinMaxScaler

# lets create an array of features based on the Naranitas nutritional table...

naranitas = {
    'calories' : 100,
    'carbo' : 22,
    'cups' : 1,
    'fat' : 0.5,
    'fiber' : 1,
    'mfr': 'K',
    'potass' : 0,
    'protein' : 1,
    'rating' : 50,
    'sodium' : 51,
    'sugars' : 5,
    'type' : 'C',
    'vitamins' : 25,
    'weight' : 0.88,
    'name': 'Naranitas'
}

# load data and append new value
cereal = pd.read_csv('cereal.csv')
cereal = cereal.append(naranitas, ignore_index=True)

# clean data and normalize
cereal = cereal[cereal.columns.difference(['shelf','name'])]
cereal = pd.get_dummies(cereal,prefix=['mfr'], columns = ['mfr'] , drop_first=True)
cereal = pd.get_dummies(cereal,prefix=['type'], columns = ['type'] , drop_first=True)
cereal[cereal.columns] = MinMaxScaler().fit_transform(cereal)

naran_row = cereal.iloc[-1]
naran_row

calories    0.454545
carbo       0.958333
cups        0.600000
fat         0.100000
fiber       0.071429
potass      0.003021
protein     0.000000
rating      0.422367
sodium      0.159375
sugars      0.375000
vitamins    0.250000
weight      0.380000
mfr_G       0.000000
mfr_K       1.000000
mfr_N       0.000000
mfr_P       0.000000
mfr_Q       0.000000
mfr_R       0.000000
type_H      0.000000
Name: 77, dtype: float64

In [48]:
label_naranitas = knn.predict([naran_row])
print("Naranitas belongs to group:", label_naranitas[0])

Naranitas belongs to group: 0


In [49]:
# re attach name
data['name'] = name
data[data.y == 0]

Unnamed: 0,calories,carbo,cups,fat,fiber,mfr_G,mfr_K,mfr_N,mfr_P,mfr_Q,...,potass,protein,rating,sodium,sugars,type_H,vitamins,weight,y,name
0,0.181818,0.25,0.064,0.2,0.714286,0.0,0.0,1.0,0.0,0.0,...,0.848943,0.6,0.665593,0.40625,0.4375,0.0,0.25,0.5,0,100% Bran
1,0.636364,0.375,0.6,1.0,0.142857,0.0,0.0,0.0,0.0,1.0,...,0.410876,0.4,0.210685,0.046875,0.5625,0.0,0.0,0.5,0,100% Natural Bran
4,0.545455,0.625,0.4,0.4,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.0,0.2,0.215987,0.625,0.5625,0.0,0.25,0.5,0,Almond Delight
8,0.363636,0.666667,0.336,0.2,0.285714,0.0,0.0,0.0,0.0,0.0,...,0.380665,0.2,0.41074,0.625,0.4375,0.0,0.25,0.5,0,Bran Chex
9,0.363636,0.583333,0.336,0.0,0.357143,0.0,0.0,0.0,1.0,0.0,...,0.577039,0.4,0.466164,0.65625,0.375,0.0,0.25,0.5,0,Bran Flakes
10,0.636364,0.541667,0.4,0.4,0.0,0.0,0.0,0.0,0.0,1.0,...,0.108761,0.0,0.0,0.6875,0.8125,0.0,0.25,0.5,0,Cap'n'Crunch
15,0.545455,0.958333,0.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.07855,0.2,0.309299,0.875,0.25,0.0,0.25,0.5,0,Corn Chex
20,0.454545,0.916667,0.6,0.0,0.071429,0.0,0.0,1.0,0.0,0.0,...,0.0,0.4,0.614455,0.25,0.0625,1.0,0.0,0.5,0,Cream of Wheat (Quick)
23,0.454545,0.791667,0.4,0.0,0.071429,0.0,0.0,0.0,0.0,0.0,...,0.244713,0.2,0.34744,0.59375,0.375,0.0,0.25,0.5,0,Double Chex
27,0.636364,0.541667,0.336,0.4,0.357143,0.0,0.0,0.0,1.0,0.0,...,0.607251,0.4,0.302321,0.5,0.6875,0.0,0.25,0.75,0,Fruit & Fibre Dates; Walnuts; and Oats
