In [137]:
import pandas as pd

diabetes = pd.read_csv("diabetes.csv")

diabetes

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [138]:
diabetes.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [139]:
def sigmoid(x):
    return 1. / (1. + np.exp(-x))

def numerical_derivative(f,x):
    delta_x = 1e-4
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x)

        x[idx] = float(tmp_val) - delta_x
        fx2 = f(x)

        grad[idx] = (fx1 - fx2) / (2 * delta_x)
        x[idx] = tmp_val
        it.iternext()

    return grad

In [140]:
class LogicGate:
    def __init__(self, gate_name, xdata, tdata):
        self.name = gate_name
        self.xdata = xdata.reshape(614, 8)
        self.tdata = tdata.reshape(614, 1)
        
        self.W2 = np.random.rand(8, 15)
        self.W3 = np.random.rand(15, 3)
        self.W4 = np.random.rand(3, 1)

        self.b2 = np.random.rand(15)
        self.b3 = np.random.rand(3)
        self.b4 = np.random.rand(1)
        
        self.learning_rate = 0.002

    def feed_forward(self):
        delta = 1e-7
        z2 = np.dot(self.xdata, self.W2) + self.b2
        a2 = sigmoid(z2)
        z3 = np.dot(a2, self.W3) + self.b3
        a3 = sigmoid(z3)
        z4 = np.dot(a3, self.W4) + self.b4
        y = a4 = sigmoid(z4)
        return -np.sum(self.tdata * np.log(y + delta) + (1 - self.tdata) * np.log((1 - y) + delta))

    def train(self):
        f = lambda x : self.feed_forward()
        print("Initial loss value = ", self.feed_forward())
        
        for step in range(5555):
            self.W2 -= self.learning_rate * numerical_derivative(f, self.W2)
            self.b2 -= self.learning_rate * numerical_derivative(f, self.b2)
            
            self.W3 -= self.learning_rate * numerical_derivative(f, self.W3)
            self.b3 -= self.learning_rate * numerical_derivative(f, self.b3)

            self.W4 -= self.learning_rate * numerical_derivative(f, self.W4)
            self.b4 -= self.learning_rate * numerical_derivative(f, self.b4)
            
            if (step % 100 == 0):
                print("step = ", step, "loss value = ", self.feed_forward())

    def predict(self, input_data):
        self.xdata = input_data
        z2 = np.dot(self.xdata, self.W2) + self.b2
        a2 = sigmoid(z2)
        z3 = np.dot(a2, self.W3) + self.b3
        a3 = sigmoid(z3)
        z4 = np.dot(a3, self.W4) + self.b4
        y = a4 = sigmoid(z4)
        
        if y >= 0.5:
            result = 1
        else:
            result = 0
            
        return y, result

    def accuracy(self, test_xdata, test_tdata):
        matched_list = []
        not_matched_list = []
        for index in range(len(test_xdata)):
            (real_val, logical_val) = self.predict(test_xdata[index])
            if logical_val == test_tdata[index]:
                matched_list.append(index)
            else:
                not_matched_list.append(index)
        accuracy_val = len(matched_list) / len(test_xdata)
        return accuracy_val

In [141]:
X = diabetes.drop(['Outcome'], axis = 1)
y = diabetes['Outcome']

In [145]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

scaler = StandardScaler()
scaler.fit(X)
X = scaler.transform(X)

In [143]:
from sklearn.model_selection import train_test_split

# 데이터 나누기 (80:20 비율)
train_xdata, test_xdata, train_tdata, test_tdata = train_test_split(X, y, test_size=0.2, random_state=42)

# 결과 확인
print("Train 데이터 개수:", len(train_xdata))
print("Test 데이터 개수:", len(test_xdata))

Train 데이터 개수: 614
Test 데이터 개수: 154


In [144]:
import numpy as np

xdata = np.array(train_xdata)
tdata = np.array(train_tdata)

diabete_obj = LogicGate("DIABETES_GATE", xdata, tdata)
diabete_obj.train()

Initial loss value =  975.0218717877452
step =  0 loss value =  423.143688858214
step =  100 loss value =  396.41380097391414


KeyboardInterrupt: 

In [None]:
test_xdata = np.array(test_xdata)
for input_data in test_xdata:
    (sigmoid_val, logical_val) = diabete_obj.predict(input_data)
    # print(input_data, " = ", logical_val)
    # print(input_data, " = ", sigmoid_val)
print('--------------------------')
test_tdata = np.array(test_tdata)
accuracy_ret = diabete_obj.accuracy(test_xdata, test_tdata)
print('Accuracy => ', accuracy_ret)

In [None]:
pred = []
for data in test_xdata:
    sigmoid_val, logical_val = diabete_obj.predict(data)
    pred.append(logical_val)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(test_tdata, pred)