In [None]:
from datascience import *
import numpy as np
import matplotlib
from mpl_toolkits.mplot3d import Axes3D
%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

In [None]:
def standard_units(x):
    return (x - np.mean(x))/np.std(x)

## Classifying Patients CKD ##

In [None]:
ckd = Table.read_table('ckd.csv').relabeled('Blood Glucose Random', 'Glucose')
ckd.show(3)

In [None]:
ckd.group('Class')

In [None]:
ckd.scatter('White Blood Cell Count', 'Glucose', group='Class')

In [None]:
ckd.scatter('Hemoglobin', 'Glucose', group='Class')

In [None]:
# we want to be able to way to predict the class of someone
# without having to plot & eye ball this graph every time.
#
# one way to do this is to put some thresholds into code

max_glucose_for_0 = ckd.where('Class',are.equal_to(0)).column('Glucose').max()
min_hemoglobin_for_0 = ckd.where('Class',are.equal_to(0)).column('Hemoglobin').min()

max_glucose_for_0, min_hemoglobin_for_0

In [None]:
def classify(hemoglobin, glucose):
    if hemoglobin < min_hemoglobin_for_0 or glucose > max_glucose_for_0:
        return 1
    else:
        return 0

In [None]:
# Let's try our classifier!
classify(15, 100)

In [None]:
ckd.scatter('Hemoglobin', 'Glucose', group='Class')
plots.scatter(15, 100, color='red');

In [None]:
classify(10, 300)

In [None]:
ckd.scatter('Hemoglobin', 'Glucose', group='Class')
plots.scatter(10, 300, color='red');

## Classifying Banknotes ##

In [None]:
banknotes = Table.read_table('banknote.csv')
banknotes

In [None]:
banknotes.group('Class')

In [None]:
banknotes.scatter('WaveletVar', 'WaveletCurt', group='Class')

In [None]:
banknotes.scatter('WaveletSkew', 'Entropy', group='Class')

In [None]:
fig = plots.figure(figsize=(8,8))
ax = Axes3D(fig)
ax.scatter(banknotes.column('WaveletSkew'), 
           banknotes.column('WaveletVar'), 
           banknotes.column('WaveletCurt'), 
           c=banknotes.column('Class'),
           cmap='viridis',
          s=50);

## Back to Classifying Patients CKD##

In [None]:
ckd.scatter('Hemoglobin', 'Glucose', group='Class')

In [None]:
ckd = ckd.with_columns(
    'Hemoglobin SU', standard_units(ckd.column('Hemoglobin')),
    'Glucose SU', standard_units(ckd.column('Glucose'))
)

ckd.scatter('Hemoglobin SU', 'Glucose SU', group='Class')

In [None]:
x_array = make_array()
y_array = make_array()
for x in np.arange(-4, 2, 0.2):
    for y in np.arange(-1, 6, 0.2):
        x_array = np.append(x_array, x)
        y_array = np.append(y_array, y)
        


In [None]:
x_array, y_array

In [None]:
test_grid = Table().with_columns(
    'Hemoglobin SU', x_array,
    'Glucose SU', y_array
)

test_grid.scatter('Hemoglobin SU')

In [None]:
max_glucose_SU_for_0 = ckd.where('Class',are.equal_to(0)).column('Glucose SU').max()
min_hemoglobin_SU_for_0 = ckd.where('Class',are.equal_to(0)).column('Hemoglobin SU').min()

max_glucose_SU_for_0, min_hemoglobin_SU_for_0

In [None]:
def classify_SU(hemoglobin_SU, glucose_SU):
    if hemoglobin_SU < min_hemoglobin_SU_for_0 or glucose_SU > max_glucose_SU_for_0:
        return 1
    else:
        return 0

In [None]:
test_grid = test_grid.with_column(
    'Classification', test_grid.apply(classify_SU, 'Hemoglobin SU', 'Glucose SU'))

In [None]:
test_grid

In [None]:
test_grid.scatter('Hemoglobin SU', 'Glucose SU', group='Classification')

## Nearest Neighbor ##

In [None]:
ckd.scatter('Hemoglobin', 'Glucose', group='Class')

In [None]:
banknotes.scatter('WaveletVar', 'WaveletCurt', group='Class')

In [None]:
banknotes.scatter('WaveletVar', 'WaveletCurt', group='Class')
plots.scatter(-1, -2, color='red');

In [None]:
banknotes.scatter('WaveletVar', 'WaveletCurt', group='Class')
plots.scatter(-1, -2, color='red');
plots.ylim(-2.5, -1.5)
plots.xlim(-1.5, -0.5)