### Pichu/Pikachu Discriminator - Supervised ML Model (k-NN)

--- 

Datapoints, __width__ and __height__, (features) categorized by __label__, are imported as raw data.

After cleaning the original data, we convert it into test and training data by randomly selecting a subset of the datapoints as training data (80%), and use the remaining datapoints as test data.

We then predict the newly added Pokémon based of k-nearest-neighbour classification method.

The result is plotted and represented with blue datapoints as *Pichus* and red as *Pikachus*.

In [195]:
from __future__ import annotations
import re, math, pprint
import numpy as np
import matplotlib.pyplot as plt

# Function to extract Pokémon data from datapoints.txt
def extract_data(data_list: list, dictionary_return: bool) -> None | dict:
    """ Takes list with comma separated values and appends rounded values to
    PokeWidth, PokeHeight & PokeLabel lists OR returns dictionary.
    Currently has two different cleaning methods."""

    # Method 1: Dictionary ouput: Inspired by @Andreas-Svensson

    if dictionary_return: # Create and return dictionary
        datapoints = [i.split(", ") for i in data_list]
        item = [[float(i) for i in value] for value in datapoints]

        Pokedex = {
            "Pikachu" : {
            "Width (cm):" :[i[0] for i in item if i[2] == 1],
            "Height (cm):":[i[1] for i in item if i[2] == 1]},
            "Pichu"   : {
            "Width (cm)"  :[i[0] for i in item if i[2] == 0],
            "Height (cm)" :[i[1] for i in item if i[2] == 0]} }

        return Pokedex

    # Method 2: List append: Inspired by @Kevy69

    for item in data_list: # Append to lists if dictionary_true = False
        # Remove white space and line break
        item = re.sub('[ \n]', '', item)

        # Round (1 decimal) and append data to lists
        width, height, label = [float(string) for string in item.split(',')]
        PokeWidth.append(round(width,1))
        PokeHeight.append(round(height,1))
        PokeLabel.append(int(label))

# Function to calculate distance between points
def get_distance(P: tuple, Q: tuple) -> float:
    """Equation for euclidean distance between two points.
    Takes two tuples, returns one float."""

    return math.sqrt((P[0] - Q[0])**2 + (P[1] - Q[1])**2)

if __name__ == "__main__":

    # Open and read datapoints.txt
    file_path = "../../Data/datapoints.txt"
    with open(file_path) as test_point:
        data_list = test_point.readlines()
        data_list.pop(0) # Remove title from list

    PokeWidth = []
    PokeHeight = []
    PokeLabel = []

    # Extract data to lists
    clean1 = extract_data(data_list, True)
    clean2 = extract_data(data_list, False)


    print(f"All {PokeHeight=}")
    pprint.pprint(clean1)
        

All PokeHeight=[31.2, 36.5, 31.4, 33.2, 36.6, 32.3, 35.3, 38.1, 36.7, 35.1, 35.6, 34.5, 34.1, 34.3, 34.6, 31.4, 34.8, 33.2, 32.8, 28.9, 37.3, 35.2, 30.5, 31.9, 35.3, 32.5, 31.0, 33.0, 31.4, 29.9, 30.7, 34.4, 31.3, 35.4, 35.4, 33.2, 32.7, 32.5, 31.2, 32.1, 35.4, 35.6, 35.7, 35.1, 35.1, 31.4, 33.1, 32.6, 35.6, 36.8, 32.6, 32.2, 36.1, 33.8, 30.6, 32.3, 33.9, 31.6, 33.3, 34.7, 31.1, 34.7, 36.8, 30.8, 33.9, 33.6, 36.2, 36.4, 33.3, 30.7, 37.2, 36.1, 30.8, 31.2, 34.6, 37.0, 33.9, 35.3, 35.6, 32.5, 36.8, 32.2, 34.8, 37.1, 30.3, 34.1, 33.5, 33.1, 30.9, 33.4, 31.9, 35.2, 32.6, 35.2, 31.4, 33.2, 30.9, 32.1, 36.5, 36.5, 32.2, 29.9, 33.1, 30.8, 36.3, 37.1, 31.8, 34.7, 31.3, 36.0, 35.6, 32.6, 34.6, 35.4, 31.0, 32.5, 32.0, 29.0, 33.3, 35.1, 31.7, 32.6, 35.5, 30.8, 32.8, 40.1, 34.2, 29.0, 32.8, 32.3, 34.8, 32.2, 36.1, 36.0, 33.9, 34.3, 35.0, 35.0, 35.5, 38.2, 34.5, 32.3, 31.9, 31.7, 32.0, 32.4, 32.5, 30.3, 33.6, 32.7]
{'Pichu': {'Height (cm)': [31.23956701424158,
                           31.44170391

---
First we convert the original data into test and training data by randomly selecting a subset (75%) of the datapoints as training data, and use the remaining datapoints as test data.