In [1]:
import numpy as np

In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from pandas import read_csv

In [3]:
def load_data(path_to_csv, has_header = True):
    '''
    Loads a csv file, the last column is assumed to be the output
    label 'yes'/'no'

    returns: X - numpy array of size (n,m) of input features
             Y - numpy array of output features
    '''

    if has_header:
        data = read_csv(path_to_csv, header='infer')
    else:
        data = read_csv(path_to_csv, header = None)
    data = data.as_matrix()
    X = data[:,1:-1]
    Y = data[:,-1]
    return X,Y

In [59]:
X,Y = load_data("data_1.csv")
Y = Y=='yes'

In [108]:
class FindS:

    _rule = None
    _uniqueShapes = None

    def fit(self,training_data, class_label):
        '''
        Chooses the initial hypothesis from the first positive example
        input: training_data - numpy array (n,m), m is the number of features
                                n is the number of training samples
               class_label - numpy array (n,)
        '''

        num_of_attrib = training_data.shape[1]
        num_of_sample = training_data.shape[0]

        hypothesis = self._init_hypothesis(training_data,class_label)

        # the process of fitting the hypothesis of a concept with Find-S
        # algorithm consist of iterating over the training data and
        # verifying whether each of the attributes of the hypothesis satisfy
        # all positive examples.
        #
        # to find the most general hypothesis you need to iterate over each
        # positive example, iterate over all attributes, in case if the attribute
        # of the hypothesis is different from the attribute of the current
        # positive example we generalize the attribute by replacing it with '?'

        # %%% START YOUR CODE HERE
        n = training_data.shape[1]
        for i in range(class_label.size):
            if class_label[i] == True:
                for j in range(n):
                    if not np.isin(training_data[i,j], hypothesis[j,:]):
                        self.replaceFirstEmpty(hypothesis[j,:], training_data[i,j])
                




        # %%% END YOUR CODE HERE

        self._rule = hypothesis

    def get_rule(self):
        for i in range(self._rule.shape[0]):
            if np.unique(self._rule[i]).size > self._uniqueShapes[i]:
                print('?')
            else:
                print(self._rule[i][self._rule[i]!='-'])
#         return self._rule

    def _init_hypothesis(self,training_data,class_label):
        '''
        Chooses the initial hypothesis from the first positive example
        input: training_data - numpy array (n,m), m is the number of features
                                n is the number of training samples
               class_label - numpy array (n,)
        returns: hypothesis - feature vector of the first positive example
        '''
        hypothesis = None

        # here you need to initialize the hypothesis to the values of the
        # first positive training example

        # %%% START YOUR CODE HERE %%%
        n = training_data.shape[1]
        max_unique = 0
        self._uniqueShapes = np.zeros(n)
        for i in range(n):
            k = np.unique(training_data[:,i]).size
            self._uniqueShapes[i] = k
            if k>max_unique:
                max_unique = k
        hypothesis = np.full((n,max_unique), '-', str)

        




        # %%% END YOUR CODE HERE %%%

        if hypothesis is None:
            raise Exception("No positive example provided")

        return hypothesis

    def predict():
        raise NotImplemented
        
    def replaceFirstEmpty(self, array, value):
        m = array.size
        for i in range(m):
            if array[i]=='-':
                array[i] = value
                break

In [79]:
data = np.loadtxt('mushrooms.csv', dtype=str, skiprows=1, delimiter=',')

In [102]:
Ym = data[:,0]

Ym = Ym=='p'
Xm = data[:,1:]

In [109]:
find_s = FindS()

find_s.fit(Xm,Ym)

print("Final Hypothesis:" )
print(find_s.get_rule())

Final Hypothesis:
['x' 'f' 'b' 'k' 'c']
?
['n' 'w' 'g' 'p' 'y' 'e' 'b' 'c']
?
['p' 'f' 'c' 'y' 'n' 's' 'm']
?
?
?
['k' 'n' 'p' 'w' 'h' 'g' 'u' 'b' 'r' 'y']
?
['e' 'b' '?' 'c']
?
?
['w' 'n' 'b' 'p' 'c' 'y']
['w' 'b' 'p' 'n' 'y' 'c']
?
['w' 'y']
?
['p' 'l' 'e' 'n']
['k' 'n' 'h' 'w' 'r']
['s' 'v' 'y' 'c']
['u' 'g' 'd' 'p' 'l' 'm']
None
