In [0]:
## Install utilities
# !pip install pandas
# !pip install numpy
# !pip install tensorflow
# !pip install matplotlib
# !pip install altair
# !pip install sklearn

## Update utilities
# !pip install -q pandas
# !pip install -q numpy
# !pip install -q tensorflow
# !pip install -q matplotlib
# !pip install -q altair
# !pip install -q sklearn

In [11]:
import pandas as pd
import numpy as np
import tensorflow as tf

print(pd.__version__)

## allows intellisense-like feel to the notebook
%config IPCompleter.greedy=True
## Used by matplotlib to remove duplicate legend handles
from collections import OrderedDict
## used for interactive charts
import altair as alt

## converts the dictionary from google.colab's upload file into a stream
import io
from sklearn.datasets import load_iris

0.25.3


# Read the Iris Dataset and insert to pandas dataframe

In [0]:
iris = load_iris()
# np.c_ is the numpy concatenate function
# which is used to concat iris['data'] and iris['target'] arrays 
# for pandas column argument: concat iris['feature_names'] list
# and string list (in this case one string); you can make this anything you'd like..  
# the original dataset would probably call this ['Species']
frame = pd.DataFrame(data= np.c_[iris['data'], iris['target']],
                     columns= iris['feature_names'] + ['target'])

frame['target'] = frame['target'].astype(np.int8)

In [123]:
frame.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


# HELPER FUNCTIONS

In [0]:
def complexCharts(frame: pd.DataFrame, col : 'List[string]') -> list:
  '''
  What it does: It feeds the selected(samples inside the selected x-interval) samples from chart1 to chart2,
  chart 2 to chart 3, ... so on and so forth, this effectively culls to how the user wishes.

  What it's for: Threshold based classification modeling -> you have a data set is already classified as class 1, 2, 3, 4...,etc
  presumambly the class is the last column.

  How this works: First, the brushes can be passed to the charts as boolean operations
  For example:
  '''
  if(frame is None):
    print('Brushcount is 0 or frame is null, returning')
    return

  _maxlen = len(col)
  brush_lst = []
  chart_lst = []
  class_id_lst = []
  _classification = frame.columns[-1]

  # setup class_id_lst, each list item 'i' displays the count of 'class_i'
  # for example: should we select 50 class_0, and 25 class_1, our class counter object will display 50, 25
  for class_id in frame[_classification].unique():
    class_id_lst.append(alt.Chart(frame[frame[_classification] == class_id]).mark_text().encode(
    y=alt.Y('count:O',axis=None), text = 'count():N', key = _classification
    ).properties(title = 'Class_'+str(class_id) +'Count'))

  # setup brushes_lst, the number of brushes == number of feature columns
  # This will be used to create a the boolean brush selection concatenation altair viz uses
  for i in range(_maxlen):
    brush_lst.append(alt.selection_interval(encodings=['x']))
  chart_lst.append(alt.Chart(frame).mark_circle(opacity = 0.2, size = 100).encode(
    x = col[0] + ':Q',
    y = alt.Y(_classification + ':N', title = 'Target ID'),
    color = alt.condition(brush_lst[0], _classification+':N', alt.value('lightgray'), legend = None),
  ))

  # the first chart only uses brush[0], needs to be setup
  chart_lst[0] = chart_lst[0].add_selection(
      brush_lst[0]
      )

  # sets up the charts to use boolean brushes
  # example chart[0] = brush[0], chart1 = brush[0] & brush[1], chart2 = brush[0] & brush[1] & brush[2]
  pbrush_lst = [brush_lst[0]]
  for i in range(1, _maxlen):
    pbrush_lst.append(pbrush_lst[i-1] & brush_lst[i])
    chart_lst.append(alt.Chart(frame).mark_circle(opacity = 0.2, size = 100).encode(
    x = col[i] + ':Q',
    y = alt.Y(_classification+':N', title = 'Target ID'),
    color = alt.condition(brush_lst[i], _classification+':N', alt.value('lightgray'), legend = None),
  ).add_selection(
    brush_lst[i]
  ).transform_filter(
      pbrush_lst[i-1]
  )
  )
  
  # Concatenates the plot, with the counter objects
  # essentially => (display of selection chart) (display of class count)
  for i in range(_maxlen):
    class_counter_lst = [class_id_lst[0].transform_filter(pbrush_lst[i])]
    for j in range(1,len(class_id_lst)):
      current = class_counter_lst.pop() & class_id_lst[j].transform_filter(pbrush_lst[i])
      class_counter_lst.append(current)
    chart_lst[i] = chart_lst[i] | (class_counter_lst[0])
  return chart_lst

def removeColumn(frame : pd.DataFrame, columnsToRemove : 'List[string]') -> pd.DataFrame:
  '''
  Peturns a dataframe without the listed columns.
  '''
  if frame is None:
    print('Frame is null')
    return None
  if columnsToRemove is None or len(columnsToRemove) == 0:
    return frame
  for c in columnsToRemove:
      if c in frame.columns:
          frame = frame.drop(c, 1)
  return frame

# Remove unwanted columns

In [0]:
# add a to the list, column names to remove
columnsToRemove = []
frame = removeColumn(frame, columnsToRemove)

# Ordered Correlation Table

In [154]:
sorted_corr_frame = pd.DataFrame(frame.corr()['target'].iloc[frame.corr()['target'].abs().argsort()][::-1])
corrframe = sorted_corr_frame[1:].transpose()
corrframe

Unnamed: 0,petal width (cm),petal length (cm),sepal length (cm),sepal width (cm)
target,0.956547,0.949035,0.782561,-0.426658


# Classifier with thresholding chart

In [150]:
col = corrframe.columns.to_list()
print('Top(start) bottom(end)')
print('\n')
print('You have to select boundaries at the very top chart first, \n then consecutively move down')
test = complexCharts(frame, col)
t = test[0]
for i in range(1, len(col)):
  t &= test[i]
t

Top(start) bottom(end)


You have to select boundaries at the very top chart first, 
 then the next chart, rinse and repeat
gets here 0
