# KNN example

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.linear_model import LogisticRegression

In [2]:
zoo = pd.read_csv('data/zoo.csv')
classes = pd.read_csv('data/class.csv')

In [3]:
zoo.head(1)

Unnamed: 0,animal_name,hair,feathers,eggs,milk,airborne,aquatic,predator,toothed,backbone,breathes,venomous,fins,legs,tail,domestic,catsize,class_type
0,aardvark,1,0,0,1,0,0,1,1,1,1,0,0,4,0,0,1,1


In [4]:
classes = classes[['Class_Number', 'Class_Type']]

In [5]:
df = pd.merge(zoo, classes, how='left', left_on='class_type', right_on='Class_Number')

In [6]:
df = df.drop('Class_Number', axis=1)
df = df.drop('animal_name', axis=1)
df = df.drop('class_type', axis=1)

In [7]:
target = 'Class_Type'
X = df.drop(target, axis=1)
y = df[target]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [9]:
model = KNeighborsClassifier(n_neighbors=4)
model.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=4, p=2,
                     weights='uniform')

In [10]:
model.predict(X_test)

array(['Bird', 'Fish', 'Mammal', 'Fish', 'Bug', 'Mammal', 'Mammal', 'Bug',
       'Mammal', 'Bird', 'Bird', 'Fish', 'Bug', 'Fish', 'Bird', 'Mammal',
       'Invertebrate', 'Mammal', 'Bug', 'Bird', 'Mammal', 'Mammal',
       'Bird', 'Mammal', 'Mammal', 'Bird'], dtype=object)

In [11]:
model.classes_

array(['Amphibian', 'Bird', 'Bug', 'Fish', 'Invertebrate', 'Mammal',
       'Reptile'], dtype=object)

In [12]:
model.predict_proba(X_test)

array([[0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 0.  , 0.  , 0.75, 0.  , 0.  , 0.25],
       [0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 0.  , 0.5 , 0.  , 0.5 , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 1.  , 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.75, 0.  , 0.25],
       [0.  , 0.  , 0.  , 0.  , 0.  , 1.  , 0.  ],
       [0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 1.  , 0.  , 0.  ,

In [13]:
pd.DataFrame({
    'ytrue': y_test, 
    'yhat': model.predict(X_test)
})

Unnamed: 0,ytrue,yhat
83,Bird,Bird
73,Fish,Fish
64,Mammal,Mammal
80,Reptile,Fish
40,Bug,Bug
44,Mammal,Mammal
75,Mammal,Mammal
46,Invertebrate,Bug
9,Mammal,Mammal
79,Bird,Bird


In [14]:
model.score(X_train, y_train), model.score(X_test, y_test)

(0.9333333333333333, 0.8846153846153846)

In [15]:
model2 = LogisticRegression(solver='lbfgs')
model2.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [16]:
model2.score(X_train, y_train), model2.score(X_test, y_test)

(0.9866666666666667, 0.9230769230769231)