In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter


def plot_data(X, Y, Z):
    style_plot()
    
    # Plot visual representation of X, Y, Z
    for i, data in enumerate(zip(X, Y)):
        plt.scatter(data[0], data[1], c='k', marker='o' if Z[i] == 'male' else 'x')
    
    plt.show()


def style_plot():
    plt.style.use('seaborn-whitegrid')
    plt.figure()
    plt.xlabel('Height in foot.')
    plt.ylabel('Age in years.')

    
    """
Call this method to read a csv file into a pandas dataframe
    """
def read_csv_frame(path = '', delimiter = None, header = 'infer'):
    if path != '':
        return pd.read_csv(path, delimiter = delimiter, header = header)
    else:
        return pd.read_csv(input('Enter path to CSV: '), delimiter = delimiter, header = header)
    

    """
Method allows reading a number of columns from a CSV file. 
    """
def read_columns(path, *headers):
    frame = pd.read_csv(path)
    for header in headers:
        yield frame[header]


def read_test_input():
    return [float(x) for x in input('Enter Height and Age for evaluation: ').split()]


def evaluate_nearest(X, Y, x, y, verbose=False):
    euclidean_distances = np.sqrt((X - x)**2  + (Y - y)**2)
    if verbose:
        print(f'Distances: {euclidean_distances}')
    
    nearest_neighbor_indices = euclidean_distances.argsort()[:3]
    nearest_neighbor_sexes = np.take(Z, nearest_neighbor_indices)
    
    if verbose:
        print(f'K Nearest: {nearest_neighbor_sexes}')
        
    return most_common(nearest_neighbor_sexes) 


def most_common(nearest_neighbors):
    counts = Counter(nearest_neighbors)
    return counts.most_common()[0][0]



def separate_by_class(dataset):
    """
    splits a dataset by class value. assumes the class value is the last 
    column in the dataset

    """
    
    separated = dict()
    for i in range(len(dataset)):
        vector = dataset[i]
        class_value = vector[-1]
        if (class_value not in separated):
            separated[class_value] = list()
        separated[class_value].append(vector)
        
    return separated
    
    
    
def summarize_dataset(dataset):
    summaries = [(average(column), std_dev(column), len(column)) for column in zip(*dataset)]
    del(summaries[-1])
    
    return summaries



def summarize_by_class(dataset):
    separated = separate_by_class(dataset)
    summaries = dict()
    for class_value, rows in separated.items():
        summaries[class_value] = summarize_dataset(rows)
        
    return summaries







