In [198]:
import numpy as np
import math
import matplotlib.pyplot as plt

In [199]:
def splitData(data, ratio):
    np.random.shuffle(data)
    train_size = int(len(data) * ratio)
    train_set = data[:train_size]
    test_set = data[train_size:]
    return train_set, test_set

In [200]:
def timeToMinutes(time):
    splitted = time.split(":")
    if int(splitted[0]) > 23 or int(splitted[1]) > 59:
        raise ValueError
    return int(splitted[0]) * 60 + int(splitted[1])

In [201]:
def processTime(time):
    minutes = timeToMinutes(time)
    return  math.sin(minutes / (24 * 60)*math.pi)

In [202]:
def loadData(codesToIndexes):
    data = []
    for i in range(1, 70):
        filePrefix = 'Diabetes-Data/data-'
        if i < 10:
            filePrefix += '0'
        fileHandle = open(filePrefix+str(i), 'r')
        for line in fileHandle:
            line = line.strip()
            if line:
                try:
                    parts = line.split('\t')
                    time = processTime(parts[1])
                    code = int(parts[2])
                    if code not in codesToIndexes.keys():
                        raise ValueError
                    number = int(parts[3])
                    data.append([time, code, number])
                except ValueError:
                    pass
                except IndexError:
                    pass
        
        fileHandle.close()
    return data


In [203]:
codesToIndexes = {33:0, 34:1, 35:2, 48:3, 57:4, 58:5, 59:6, 60:7, 61:8, 62:9, 63:10, 64:11, 65:12, 66:13, 67:14, 68:15, 69:16, 70:17, 71:18, 72:19}
data = np.array(loadData(codesToIndexes))
data, test = splitData(data, 0.8)
# print(processTime("00:00"))
# print(processTime("12:00"))
# 33 = Regular insulin dose
# 34 = NPH insulin dose
# 35 = UltraLente insulin dose
# 48 = Unspecified blood glucose measurement
# 57 = Unspecified blood glucose measurement
# 58 = Pre-breakfast blood glucose measurement
# 59 = Post-breakfast blood glucose measurement
# 60 = Pre-lunch blood glucose measurement
# 61 = Post-lunch blood glucose measurement
# 62 = Pre-supper blood glucose measurement
# 63 = Post-supper blood glucose measurement
# 64 = Pre-snack blood glucose measurement
# 65 = Hypoglycemic symptoms
# 66 = Typical meal ingestion
# 67 = More-than-usual meal ingestion
# 68 = Less-than-usual meal ingestion
# 69 = Typical exercise activity
# 70 = More-than-usual exercise activity
# 71 = Less-than-usual exercise activity
# 72 = Unspecified special event
X = data[:, [0, 2]]
X_test = test[:, [0, 2]]
# X = data[:, [2]]
Y = data[:, 1]
Y_test = test[:, 1]
Y_encoded = np.zeros((Y.shape[0], 20))
Y_test_encoded = np.zeros((Y_test.shape[0], 20))
for i, code in enumerate(Y):
    index = codesToIndexes[code]
    Y_encoded[i, index] = 1
for i, code in enumerate(Y_test):
    index = codesToIndexes[code]
    Y_test_encoded[i, index] = 1

In [204]:
def initializeParameters(nX, nH, nY):
    W1 = np.random.randn(nX, nH)
    b1 = np.zeros((1,nH))
    b1 = np.random.randn(1,nH)
    W2 = np.random.randn(nH,nY)
    b2 = np.zeros((1,nY))
    b2 = np.random.randn(1,nY)
    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    return parameters

In [205]:
def costFunction(Y, Y_hat):
    n = Y.shape[0]
    return - np.sum(Y * np.log(Y_hat))


In [206]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

In [207]:
def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z), axis=1, keepdims=True)

In [208]:
def propagate(X, Y, W1, b1, W2, b2):
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = softmax(Z2)
    cost = costFunction(Y, A2)
    # pochodne funkcji straty:
    dA2 = A2 - Y
    dW2 = np.dot(A1.T,dA2) 
    db2 = np.sum(dA2, axis=0,keepdims=True)
    dA1 = np.dot(dA2, W2.T)
    dZ1 = dA1 * (A1 * (1 - A1))
    dW1 = np.dot(X.T, dZ1)
    db1 = np.sum(dZ1, axis=0)
    gradients = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return gradients, cost

In [209]:
def updateParameters(W1, b1, W2, b2, gradients, learningRate):
    W1 = W1 - learningRate * gradients["dW1"]
    b1 = b1 - learningRate * gradients["db1"]
    W2 = W2 - learningRate * gradients["dW2"]
    b2 = b2 - learningRate * gradients["db2"]
    return W1, b1, W2, b2

In [210]:
def optimize(X, Y, W1, b1, W2, b2, learningRate, numberOfIterations, printCost=False):
    costs = []
    for i in range(numberOfIterations):
        gradients, cost = propagate(X, Y, W1, b1, W2, b2)
        W1, b1, W2, b2 = updateParameters(W1, b1, W2, b2, gradients, learningRate)
        costs.append(cost)
        if i % 100 == 0 and printCost:
            print("Cost after iteration {}: {}".format(i, cost))
    parameters = {"W1": W1, "b1": b1, "W2": W2, "b2": b2}
    return parameters, costs

In [211]:
def predict(X, parameters):
    Z1 = np.dot(X, parameters["W1"]) + parameters["b1"]
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, parameters["W2"]) + parameters["b2"]
    A2 = softmax(Z2)
    return np.argmax(A2, axis=1)

In [223]:
parameters = initializeParameters(2, 15, 20)
params, costs = optimize(X,Y_encoded, parameters["W1"], parameters["b1"], parameters["W2"], parameters["b2"], 0.00001, 7000, True)

  return 1 / (1 + np.exp(-Z))


Cost after iteration 0: 149648.3886977894
Cost after iteration 100: 37903.78064005859
Cost after iteration 200: 40096.270301320896
Cost after iteration 300: 39029.42315391776
Cost after iteration 400: 44175.21099511389
Cost after iteration 500: 37074.033848058956
Cost after iteration 600: 34514.85448499656
Cost after iteration 700: 32648.9940280673
Cost after iteration 800: 39412.49400523389
Cost after iteration 900: 37075.23787438951
Cost after iteration 1000: 36392.6930221142
Cost after iteration 1100: 32546.36908784695
Cost after iteration 1200: 30842.27410304198
Cost after iteration 1300: 30223.06602085005
Cost after iteration 1400: 30158.785361785925
Cost after iteration 1500: 29938.074553643426
Cost after iteration 1600: 29727.032683122758
Cost after iteration 1700: 29585.05416301838
Cost after iteration 1800: 29454.55567176825
Cost after iteration 1900: 29310.81410074856
Cost after iteration 2000: 29118.43295863782
Cost after iteration 2100: 29019.59837697008
Cost after iteratio

In [224]:
# parameters = initializeParameters(2, 5, 20)
# params, costs = optimize(X,Y_encoded, parameters["W1"], parameters["b1"], parameters["W2"], parameters["b2"], 0.00001, 100, True)
a = predict(X, params)
i=18
print(a[i])
sum =0
timesGuessedA = {}
timesGuessedY = {}
trafione = {}
for i in range(a.shape[0]):
    timesGuessedA[a[i]] = timesGuessedA.get(a[i], 0) + 1    
    timesGuessedY[np.argmax(Y_encoded[i])] = timesGuessedY.get(np.argmax(Y_encoded[i]), 0) + 1
    if a[i] == np.argmax(Y_encoded[i]):
        trafione[a[i]] = trafione.get(a[i], 0) + 1
        sum += 1

print(sum,a.shape[0])
print(timesGuessedA)
print(timesGuessedY)
print(trafione)
print(sum/a.shape[0]*100)
# e_sum = np.sum(e) / e.shape[0]

# print(e_sum)
# print(np.argmax(Y_encoded[i]))
# print(a.T)

  return 1 / (1 + np.exp(-Z))


0
11656 22988
{0: 8563, 5: 10274, 1: 2894, 9: 227, 14: 353, 12: 677}
{0: 7467, 4: 795, 7: 2224, 9: 2505, 1: 2929, 10: 169, 11: 725, 5: 2781, 14: 261, 3: 1466, 2: 844, 8: 51, 12: 273, 13: 120, 17: 116, 19: 82, 15: 25, 18: 80, 16: 57, 6: 18}
{0: 6929, 5: 2714, 14: 92, 1: 1686, 12: 186, 9: 49}
50.704715503741085


In [225]:
# parameters = initializeParameters(2, 5, 20)
# params, costs = optimize(X,Y_encoded, parameters["W1"], parameters["b1"], parameters["W2"], parameters["b2"], 0.00001, 100, True)
a = predict(X_test, params)
i=18
print(a[i])
# e = np.argmax(Y_encoded, axis=1) -a
sum =0
timesGuessedA = {}
timesGuessedY = {}
trafione = {}
for i in range(a.shape[0]):
    timesGuessedA[a[i]] = timesGuessedA.get(a[i], 0) + 1    
    timesGuessedY[np.argmax(Y_test_encoded[i])] = timesGuessedY.get(np.argmax(Y_test_encoded[i]), 0) + 1
    if a[i] == np.argmax(Y_test_encoded[i]):
        trafione[a[i]] = trafione.get(a[i], 0) + 1
        sum += 1

print(sum,a.shape[0])
print(timesGuessedA)
print(timesGuessedY)
print(trafione)
print(sum/a.shape[0]*100)
# e_sum = np.sum(e) / e.shape[0]

# print(e_sum)
# print(np.argmax(Y_encoded[i]))
# print(a.T)

5
2881 5747
{5: 2561, 1: 737, 0: 2154, 14: 77, 12: 156, 9: 62}
{7: 534, 1: 751, 9: 613, 0: 1879, 10: 50, 14: 65, 4: 194, 5: 698, 11: 179, 2: 209, 3: 393, 8: 15, 18: 18, 17: 23, 16: 11, 19: 12, 12: 58, 6: 2, 13: 34, 15: 9}
{1: 413, 0: 1728, 14: 19, 5: 674, 9: 15, 12: 32}
50.130502871063165


  return 1 / (1 + np.exp(-Z))


$$
\sum_{i=1}^{\infty} \frac{1}{i} =\infty
$$