In [2]:
from data import Data
import numpy as np
DATA_DIR = 'data/'

### Loading the data:

You can either call the Data object with a numpy array or you can pass in a path to a file.

In [3]:
data = np.loadtxt(DATA_DIR + 'train.csv', delimiter=',', dtype = str)
data_obj = Data(data = data)

In [4]:
data_obj2 = Data(fpath = DATA_DIR + 'test.csv')

### Attribute information

Each attribute is defined as a custom dictionary object with the below properties:

- Column name
- Column Index
- Possible values


#### Get the list of attributes:

Returns a dictionary that maps attribute names to their respective attribute objects

In [6]:
data_obj.attributes
#print(data_obj.attributes[2])

{'bruises': <data.Attribute at 0x7fe5adccff60>,
 'cap-color': <data.Attribute at 0x7fe5cc059b38>,
 'cap-shape': <data.Attribute at 0x7fe5cc0591d0>,
 'cap-surface': <data.Attribute at 0x7fe5cc059ac8>,
 'gill-attachment': <data.Attribute at 0x7fe5adccff98>,
 'gill-color': <data.Attribute at 0x7fe5ad979080>,
 'gill-size': <data.Attribute at 0x7fe5ad979048>,
 'gill-spacing': <data.Attribute at 0x7fe5adccffd0>,
 'habitat': <data.Attribute at 0x7fe5ad979358>,
 'population': <data.Attribute at 0x7fe5ad979320>,
 'ring-number': <data.Attribute at 0x7fe5ad979278>,
 'ring-type': <data.Attribute at 0x7fe5ad9792b0>,
 'spore-print-color': <data.Attribute at 0x7fe5ad9792e8>,
 'stalk-color-above-ring': <data.Attribute at 0x7fe5ad979198>,
 'stalk-color-below-ring': <data.Attribute at 0x7fe5ad9791d0>,
 'stalk-root': <data.Attribute at 0x7fe5ad9790f0>,
 'stalk-shape': <data.Attribute at 0x7fe5ad9790b8>,
 'stalk-surface-above-ring': <data.Attribute at 0x7fe5ad979128>,
 'stalk-surface-below-ring': <data.At

#### Fetch information for specific attribute: 

##### Column index:

In [5]:
data_obj.attributes['cap-color'].index

2

##### Possible values:

In [6]:
data_obj.attributes['cap-color'].possible_vals

array(['b', 'c', 'e', 'g', 'n', 'p', 'r', 'u', 'w', 'y'],
      dtype='<U24')

### Data selection:

#### Select rows:

Selects all the rows that match the attribute name and value. Returns a new copy of the Data object that is set to those selected rows.

In [7]:
data_subset = data_obj.get_row_subset('cap-color', 'b')

In [8]:
data_subset

<data.Data at 0x1066fdef0>

In [9]:
data_subset.raw_data

array([['e', 'x', 'y', ..., 'w', 'c', 'w'],
       ['p', 'x', 's', ..., 'h', 's', 'u'],
       ['p', 'x', 's', ..., 'h', 's', 'g'],
       ..., 
       ['p', 'x', 's', ..., 'h', 'v', 'u'],
       ['p', 'b', 'y', ..., 'r', 'v', 'g'],
       ['p', 'f', 's', ..., 'h', 'v', 'u']],
      dtype='<U24')

#### Select Columns:

Select the column that corresponds to the attribute name. Actually, we can use this function to verify if the row subset call gave us the right result or not.

In [10]:
data_subset.get_column('cap-color')

array(['b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b',
       'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b', 'b'],
      dtype='<U24')

We can also select multiple columns:

In [13]:
data_subset.get_column(['cap-color', 'label'])

array([['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'e'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'p'],
       ['b', 'e'],
       ['b', 'p'],
       ['b',