# ESA 10: Instance Based Methods
## Exercise 1

## Initialization

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import preprocessing
import numpy as np

# Source: https://www.listendata.com/2017/12/k-nearest-neighbor-step-by-step-tutorial.html
# Assigning features and label variables
# First Feature
height=['158','158','158','160','160','163','163','160','163','165','165','165','168','168','168','170','170','170']
# Second Feature
weight=['58','59','63','59','60','60','61','64','64','61','62','65','62','63','66','63','64','68']

# Label or target variable
shirt_size=['M','M','M','M','M','M','M','L','L','L','L','L','L','L','L','L','L','L']

features = np.stack((height, weight), axis=-1)

print(features)

[['158' '58']
 ['158' '59']
 ['158' '63']
 ['160' '59']
 ['160' '60']
 ['163' '60']
 ['163' '61']
 ['160' '64']
 ['163' '64']
 ['165' '61']
 ['165' '62']
 ['165' '65']
 ['168' '62']
 ['168' '63']
 ['168' '66']
 ['170' '63']
 ['170' '64']
 ['170' '68']]


## Normalize the data

In [8]:
normalized_features = preprocessing.normalize(features)
print(normalized_features)

[[0.93874823 0.34460378]
 [0.93681561 0.34982355]
 [0.92888159 0.37037683]
 [0.93824296 0.34597709]
 [0.93632918 0.35112344]
 [0.93844137 0.34543854]
 [0.93656509 0.35049369]
 [0.92847669 0.37139068]
 [0.93082089 0.36547569]
 [0.93795431 0.34675887]
 [0.93609582 0.3517451 ]
 [0.93040839 0.36652452]
 [0.93815229 0.34622287]
 [0.93632918 0.35112344]
 [0.93075149 0.36565237]
 [0.93768221 0.34749399]
 [0.93587592 0.35232976]
 [0.92847669 0.37139068]]


## Simple euclidean distance function

```
def distance(instance1, instance2):
    # just in case, if the instances are lists or tuples:
    instance1 = np.array(instance1) 
    instance2 = np.array(instance2)
    
    return np.linalg.norm(instance1 - instance2)
```
(Source: https://colab.research.google.com/drive/1DnD_RRAZuanLlJSCmJjRbGtuloZVOirX?usp=drive_open#scrollTo=xEcFguiEynCS)

## Take a point and calculate the distance to all points

In [9]:
def distance(instance1, instance2):
    # just in case, if the instances are lists or tuples:
    instance1 = np.array(instance1) 
    instance2 = np.array(instance2)
    
    return np.linalg.norm(instance1 - instance2)

point = (170, 70)
normalized_point = preprocessing.normalize([point])

length = features.size/2
distances = []
for i in range(int(length)):
    distances.append(distance(normalized_features[i], normalized_point))

print(distances)

[0.038787932315880354, 0.033222769493538896, 0.01119231078938203, 0.037324885668345055, 0.03183502894810439, 0.037898720634011944, 0.032507436791182995, 0.010100623647668021, 0.016463067125007627, 0.03649166667004819, 0.031171100803768604, 0.015336070547418148, 0.0370629622842436, 0.0318350289481044, 0.01627324812396743, 0.03570792346923691, 0.030546526610812537, 0.01010062364766798]


## Take the list and sort it

In [10]:
# Important step: Merge the calculated distances with the target classes
targets = np.stack((distances, shirt_size), axis=-1)

# Sort rows by ascending distances values (without breaking the relationship to the assigned label)
sorted_targets = targets[np.argsort(targets[:, 0])]
sorted_targets

array([['0.01010062364766798', 'L'],
       ['0.010100623647668021', 'L'],
       ['0.01119231078938203', 'M'],
       ['0.015336070547418148', 'L'],
       ['0.01627324812396743', 'L'],
       ['0.016463067125007627', 'L'],
       ['0.030546526610812537', 'L'],
       ['0.031171100803768604', 'L'],
       ['0.03183502894810439', 'M'],
       ['0.0318350289481044', 'L'],
       ['0.032507436791182995', 'M'],
       ['0.033222769493538896', 'M'],
       ['0.03570792346923691', 'L'],
       ['0.03649166667004819', 'L'],
       ['0.0370629622842436', 'L'],
       ['0.037324885668345055', 'M'],
       ['0.037898720634011944', 'M'],
       ['0.038787932315880354', 'M']], dtype='<U32')

## Take the max in order to determine the target class

In [12]:

target_class = sorted_targets[0][1]
print("The data point {} can be assigned to the target class {}.".format(point, target_class))

The data point (170, 70) can be assigned to the target class L.


## Exercise 2
### Initialization

In [13]:
from sklearn.datasets import load_iris
iris = load_iris()

## Determine KNN value
### Initialization of model

In [14]:
# Teilen des Datasets in training und test sets
X_train, X_test, y_train, y_test = train_test_split(iris['data'], iris['target'], random_state=0)
model = KNeighborsClassifier(n_neighbors=1)

model.fit(X_train, y_train)

KNeighborsClassifier(n_neighbors=1)

### Feeding the sample data

In [15]:
X_sample = np.array([[4.8, 2.5, 5.3, 2.4]])

### Prediction

In [16]:
prediction = model.predict(X_sample)

### Accuracy

In [17]:
accuracy = model.score(X_test, y_test)
accuracyInPercent = round(accuracy * 100, 2);

### Result

In [18]:
result = iris['target_names'][prediction]
print("According to this learner, the given sample is a flower of the type {} with an accuracy of {}%.".format(result, accuracyInPercent))

According to this learner, the given sample is a flower of the type ['virginica'] with an accuracy of 97.37%.
