In [2]:
%run dataset.ipynb

In [3]:
class AbaloneDataset(Dataset):
    def __init__(self):
        super(AbaloneDataset, self).__init__('abalone', 'regression')
    
        rows, _ = load_csv('abalone.data.csv')

        xs = np.zeros([len(rows), 10])
        ys = np.zeros([len(rows), 1])

        for n, row in enumerate(rows):
            if row[0] == 'I': xs[n, 0] = 1
            if row[0] == 'M': xs[n, 1] = 1
            if row[0] == 'F': xs[n, 2] = 1
            xs[n, 3:] = row[1:-1]
            ys[n, :] = row[-1:]
        
        self.shuffle_data(xs, ys, 0.8)
        
    def visualize(self, xs, estimates, answers):
        for n in range(len(xs)):
            x, est, ans = xs[n], estimates[n], answers[n]
            xstr = vector_to_str(x, '%4.2f')
            print('{} => 추정 {:4.1f} : 정답 {:4.1f}'.
                  format(xstr, est[0], ans[0]))

In [4]:
class PulsarDataset(Dataset):
    def __init__(self):
        super(PulsarDataset, self).__init__('pulsar', 'binary')
    
        rows, _ = load_csv('pulsar_stars.csv')

        data = np.asarray(rows, dtype='float32')
        self.shuffle_data(data[:,:-1], data[:,-1:], 0.8)
        self.target_names = ['별', '펄서']
        
    def visualize(self, xs, estimates, answers):
        for n in range(len(xs)):
            x, est, ans = xs[n], estimates[n], answers[n]
            xstr = vector_to_str(x, '%5.1f', 3)
            estr = self.target_names[int(round(est[0]))]
            astr = self.target_names[int(round(ans[0]))]
            rstr = 'O'
            if estr != astr: rstr = 'X'
            print('{} => 추정 {}(확률 {:4.2f}) : 정답 {} => {}'. \
                  format(xstr, estr, est[0], astr, rstr))

In [5]:
class SteelDataset(Dataset):
    def __init__(self):
        super(SteelDataset, self).__init__('steel', 'select')
    
        rows, headers = load_csv('faults.csv')

        data = np.asarray(rows, dtype='float32')
        self.shuffle_data(data[:,:-7], data[:,-7:], 0.8)
        
        self.target_names = headers[-7:]
        
    def visualize(self, xs, estimates, answers):
        show_select_results(estimates, answers, self.target_names)

In [6]:
class PulsarSelectDataset(Dataset):
    def __init__(self):
        super(PulsarSelectDataset, self).__init__('pulsarselect', 'select')
    
        rows, _ = load_csv('pulsar_stars.csv')

        data = np.asarray(rows, dtype='float32')
        self.shuffle_data(data[:,:-1], onehot(data[:,-1], 2), 0.8)
        self.target_names = ['별', '펄서']
        
    def visualize(self, xs, estimates, answers):
        show_select_results(estimates, answers, self.target_names)

In [8]:
###########################################################################
class BinaryDataset(Dataset):
    def __init__(self):
        super(BinaryDataset, self).__init__('binary', 'binary')
    
        rows, _ = load_csv('classification.csv')

        data = np.asarray(rows, dtype='float32')
        #파일에서 읽은 각 행의 마지막 원소가 정답 레이블 정보이기 때문에 data[:,:-1]를 지정
        self.shuffle_data(data[:,:-1], data[:,-1:], 0.8)
        self.target_names = ['실패', '성공']
        
    def visualize(self, xs, estimates, answers):
        for n in range(len(xs)):
            x, est, ans = xs[n], estimates[n], answers[n]
            #vector_to_str()함수를 이용해 입력 일부를 문자열로 표현해 어떤 데이터가 입력으로 들어와 처리되는지 볼 수 있게함
            xstr = vector_to_str(x, '%5.1f', 3)
            #반올림 기능을 이용해 추정 결과와 정답을 각각 '실패'와 '성공' 중 하나로 준비한다.
            estr = self.target_names[int(round(est[0]))]
            astr = self.target_names[int(round(ans[0]))]
            rstr = 'O'
            if estr != astr: rstr = 'X'
            print('{} => 추정 {}(확률 {:4.2f}) : 정답 {} => {}'. \
                  format(xstr, estr, est[0], astr, rstr))

In [10]:
class BinarySelectDataset(Dataset):
    def __init__(self):
        super(BinarySelectDataset, self).__init__('binaryselect', 'select')
    
        rows, _ = load_csv('classification.csv')

        data = np.asarray(rows, dtype='float32')
        self.shuffle_data(data[:,:-1], onehot(data[:,-1], 2), 0.8)
        self.target_names = ['실패', '성공']
        
    def visualize(self, xs, estimates, answers):
        show_select_results(estimates, answers, self.target_names)

In [12]:
class CreditCardDataset(Dataset):
    def __init__(self):
        super(CreditCardDataset, self).__init__('creditcard', 'binary')
    
        rows, _ = load_csv('creditcard.csv')

        data = np.asarray(rows, dtype='float32')
        #파일에서 읽은 각 행의 마지막 원소가 정답 레이블 정보이기 때문에 data[:,:-1]를 지정
        self.shuffle_data(data[:,:-1], data[:,-1:], 0.8)
        self.target_names = ['사기X', '사기O']
        
    def visualize(self, xs, estimates, answers):
        for n in range(len(xs)):
            x, est, ans = xs[n], estimates[n], answers[n]
            #vector_to_str()함수를 이용해 입력 일부를 문자열로 표현해 어떤 데이터가 입력으로 들어와 처리되는지 볼 수 있게함
            xstr = vector_to_str(x, '%5.1f', 3)
            #반올림 기능을 이용해 추정 결과와 정답을 각각 '사기X'와 '사기O' 중 하나로 준비한다.
            estr = self.target_names[int(round(est[0]))]
            astr = self.target_names[int(round(ans[0]))]
            rstr = 'O'
            if estr != astr: rstr = 'X'
            print('{} => 추정 {}(확률 {:4.2f}) : 정답 {} => {}'. \
                  format(xstr, estr, est[0], astr, rstr))

In [14]:
class CreditSelectDataset(Dataset):
    def __init__(self):
        super(CreditSelectDataset, self).__init__('creditselect', 'select')
    
        rows, _ = load_csv('creditcard.csv')

        data = np.asarray(rows, dtype='float32')
        self.shuffle_data(data[:,:-1], onehot(data[:,-1], 2), 0.8)
        self.target_names = ['사기X', '사기O']
        
    def visualize(self, xs, estimates, answers):
        show_select_results(estimates, answers, self.target_names)