In [274]:
import numpy as np
import math
import matplotlib.pyplot as plt

In [275]:
def splitData(data, ratio):
    np.random.shuffle(data)
    train_size = int(len(data) * ratio)
    train_set = data[:train_size]
    test_set = data[train_size:]
    return train_set, test_set

In [276]:
def timeToMinutes(time):
    splitted = time.split(":")
    if int(splitted[0]) > 23 or int(splitted[1]) > 59:
        raise ValueError
    return int(splitted[0]) * 60 + int(splitted[1])

In [277]:
def processTime(time):
    minutes = timeToMinutes(time)
    return  math.sin(minutes / (24 * 60)*math.pi)

In [278]:
def loadData(codesToIndexes):
    data = []
    for i in range(1, 70):
        filePrefix = 'Diabetes-Data/data-'
        if i < 10:
            filePrefix += '0'
        fileHandle = open(filePrefix+str(i), 'r')
        for line in fileHandle:
            line = line.strip()
            if line:
                try:
                    parts = line.split('\t')
                    time = processTime(parts[1])
                    code = int(parts[2])
                    if code not in codesToIndexes.keys():
                        raise ValueError
                    number = int(parts[3])
                    data.append([time, code, number])
                except ValueError:
                    pass
                except IndexError:
                    pass
        
        fileHandle.close()
    return data


In [279]:
def encodeOutput(Y,codesToIndexes):
    codesToIndexes = {33:0, 34:1, 35:2, 48:3, 57:4, 58:5, 59:6, 60:7, 61:8, 62:9, 63:10, 64:11, 65:12, 66:13, 67:14, 68:15, 69:16, 70:17, 71:18, 72:19}
    Y_encoded = np.zeros((Y.shape[0], 20))
    for i, code in enumerate(Y):
        index = codesToIndexes[code]
        Y_encoded[i, index] = 1
    return Y_encoded

In [280]:
codesToIndexes = {33:0, 34:1, 35:2, 48:3, 57:4, 58:5, 59:6, 60:7, 61:8, 62:9, 63:10, 64:11, 65:12, 66:13, 67:14, 68:15, 69:16, 70:17, 71:18, 72:19}
data = np.array(loadData(codesToIndexes))
data, test = splitData(data, 0.8)
X = data[:, [0, 2]]
X_test = test[:, [0, 2]]
Y = data[:, 1]
Y_test = test[:, 1]
Y_encoded = encodeOutput(Y,codesToIndexes)
Y_test_encoded = encodeOutput(Y_test,codesToIndexes)

In [281]:
def initializeParameters(nX, nH, nY, numberOfHiddenLayers=1):
    parameters = {}
    if numberOfHiddenLayers < 1:
        raise ValueError   
    if numberOfHiddenLayers == 1:
        parameters["W1"] = np.random.randn(nX, nH)
        parameters["b1"] = np.zeros((1,nH))
    if numberOfHiddenLayers > 1:
        parameters["W1"] = np.random.randn(nX, nH[0])
        parameters["b1"] = np.zeros((1,nH[0]))
        for i in range(2, numberOfHiddenLayers+1):
            parameters["W"+str(i)] = np.random.randn(nH[i-2], nH[i-1])
            parameters["b"+str(i)] = np.zeros((1,nH[i-1]))
    if numberOfHiddenLayers == 1:
        parameters["W"+str(numberOfHiddenLayers+1)] = np.random.randn(nH, nY)
    else:
        parameters["W"+str(numberOfHiddenLayers+1)] = np.random.randn(nH[-1], nY)
    parameters["b"+str(numberOfHiddenLayers+1)] = np.zeros((1,nY))
    return parameters

### Architektura sieci neuronowej

- **Warstwa wejściowa:** N neurony (wejścia)
- **Warstwy ukryte:** L warstw, każda z dowolną liczbą neuronów
- **Warstwa wyjściowa:** K neuronów (klasyfikacja na K kategorii)

### Funkcje aktywacji

- **Warstwy ukryte:** Sigmoid
  - $$\sigma(z) = \frac{1}{1 + e^{-z}}$$
- **Warstwa wyjściowa:** Softmax
  - $$\text{Softmax}(z_i) = \frac{e^{z_i}}{\sum_{j=1}^{K} e^{z_j}}$$

### Propagacja w przód

Dla każdej warstwy l (od 1 do L+1, gdzie L+1 to warstwa wyjściowa):

1. **Sygnał wejściowy:** $$Z^{[l]} = W^{[l]} A^{[l-1]} + b^{[l]}$$
2. **Aktywacja:** 
   - Dla warstw ukrytych: $$A^{[l]} = \sigma(Z^{[l]})$$
   - Dla warstwy wyjściowej: $$A^{[L+1]} = \text{Softmax}(Z^{[L+1]})$$

### Wsteczna propagacja błędu

1. **Błąd na wyjściu (warstwa L+1):**
   - $$\delta^{[L+1]} = A^{[L+1]} - Y$$

Dla każdej warstwy l od L do 1:

2. **Błąd dla warstwy l:**
   - $$\delta^{[l]} = (W^{[l+1]T} \delta^{[l+1]}) \odot \sigma'(Z^{[l]})$$
3. **Gradient dla wag i biasów:**
   - $$\nabla W^{[l]} = \delta^{[l]} A^{[l-1]T}$$
   - $$\nabla b^{[l]} = \sum(\delta^{[l]}, \text{axis} = 0)$$



In [282]:
def costFunction(Y, Y_hat):
    return - np.sum(Y * np.log(Y_hat))

In [283]:
def sigmoid(Z):
    return 1 / (1 + np.exp(-Z))

In [284]:
def softmax(Z):
    return np.exp(Z) / np.sum(np.exp(Z), axis=1, keepdims=True)

In [285]:
def propagate(X, Y, parameters):
    # if len(parameters)//2 > 1:            
    A = X
    history = {"A0": X}
    gradients = {}
    for i in range(1, len(parameters)//2+1):
        Z = np.dot(A, parameters["W"+str(i)]) + parameters["b"+str(i)]
        history["Z"+str(i)] = Z
        if i != len(parameters)//2:
            A = sigmoid(Z)
        else:
            A = softmax(Z)
        history["A"+str(i)] = A
    
    cost = costFunction(Y, A)
    gradients = {}
    
    for i in range(len(parameters)//2, 0, -1):
        if i == len(parameters)//2:
            history["dA"+str(i)]= history["A"+str(i)] - Y
        else:
            history["dA"+str(i)] = np.dot(history["dA"+str(i+1)], parameters["W"+str(i+1)].T) * history["A"+str(i)] * (1 - history["A"+str(i)])
        
        gradients["dW"+str(i)] = np.dot(history["A"+str(i-1)].T,history["dA"+str(i)])
        gradients["db"+str(i)] = np.sum(history["dA"+str(i)], axis=0)
    # if len(parameters)//2 == 1:

    #     Z1 = np.dot(X, parameters["W1"]) + parameters["b1"]
    #     A1 = sigmoid(Z1)
    #     Z2 = np.dot(A1, parameters["W2"]) + parameters["b2"]
    #     A2 = softmax(Z2)
    #     cost = costFunction(Y, A2)
    #     # # pochodne funkcji straty:
    #     dA2 = A2 - Y
    #     dW2 = np.dot(A1.T,dA2) 
    #     db2 = np.sum(dA2, axis=0,keepdims=True)
    #     dA1 = np.dot(dA2, parameters["W2"].T)
    #     dZ1 = dA1 * (A1 * (1 - A1))
    #     dW1 = np.dot(X.T, dZ1)
    #     db1 = np.sum(dZ1, axis=0)
    #     gradients = {"dW1": dW1, "db1": db1, "dW2": dW2, "db2": db2}
    return gradients, cost


### Aktualizacja wag

- Wagi i biasy są aktualizowane za pomocą metody spadku gradientu:
  - $$W^{[l]} = W^{[l]} - \alpha \nabla W^{[l]}$$
  - $$b^{[l]} = b^{[l]} - \alpha \nabla b^{[l]}$$

In [286]:
def updateParameters(parameters, gradients, learningRate):
    for key in parameters.keys():
        parameters[key] += - learningRate * gradients["d"+key]
    return parameters

In [287]:
def optimize(X, Y, parameters, learningRate, numberOfIterations, printCost=False):
    costs = []
    for i in range(numberOfIterations):
        gradients, cost = propagate(X, Y, parameters)
        parameters = updateParameters(parameters, gradients, learningRate)
        costs.append(cost)
        if i % 100 == 0 and printCost:
            print("Cost after iteration {}: {}".format(i, cost))
    return parameters, costs

In [288]:
def predict(X, parameters):
    Z = X
    for i in range(1, len(parameters)//2):
        Z = np.dot(Z, parameters["W"+str(i)]) + parameters["b"+str(i)]
        Z = sigmoid(Z)
    Z = np.dot(Z, parameters["W"+str(len(parameters)//2)]) + parameters["b"+str(len(parameters)//2)]
    Z = softmax(Z)
    return np.argmax(Z, axis=1)

In [289]:
parameters = initializeParameters(2, [5,15], 20,2)
params, costs = optimize(X,Y_encoded, parameters, 0.00001, 50000, True)

  return 1 / (1 + np.exp(-Z))


Cost after iteration 0: 68461.71302725516
Cost after iteration 100: 48895.52312702399
Cost after iteration 200: 46266.51046246252
Cost after iteration 300: 42667.934803006196
Cost after iteration 400: 40625.05827299473
Cost after iteration 500: 37273.30271670071
Cost after iteration 600: 37347.549026134126
Cost after iteration 700: 34393.69309505231
Cost after iteration 800: 35323.62326836101
Cost after iteration 900: 34505.05860423376
Cost after iteration 1000: 33918.27708062716
Cost after iteration 1100: 33010.09122065229
Cost after iteration 1200: 32729.555652650906
Cost after iteration 1300: 31140.27112016399
Cost after iteration 1400: 31479.3323444023
Cost after iteration 1500: 30584.64758055823
Cost after iteration 1600: 30255.041546927267
Cost after iteration 1700: 30136.795054345595
Cost after iteration 1800: 30832.65691638661
Cost after iteration 1900: 29954.141860498145
Cost after iteration 2000: 30835.399294984654
Cost after iteration 2100: 31743.883810432002
Cost after iter

In [296]:
a = predict(X, params)
i=18
print(a[i])
sum =0
timesGuessedA = {}
timesGuessedY = {}
trafione = {}
for i in range(a.shape[0]):
    timesGuessedA[a[i]] = timesGuessedA.get(a[i], 0) + 1    
    timesGuessedY[np.argmax(Y_encoded[i])] = timesGuessedY.get(np.argmax(Y_encoded[i]), 0) + 1
    if a[i] == np.argmax(Y_encoded[i]):
        trafione[a[i]] = trafione.get(a[i], 0) + 1
        sum += 1

print(sum,a.shape[0])
print(timesGuessedA)
print(timesGuessedY)
print(trafione)
print(sum/a.shape[0]*100)

  return 1 / (1 + np.exp(-Z))


1
11604 22988
{1: 3215, 5: 10312, 0: 8292, 14: 743, 7: 152, 12: 274}
{1: 2924, 3: 1487, 2: 852, 0: 7478, 7: 2214, 5: 2770, 9: 2494, 19: 75, 12: 262, 10: 173, 4: 810, 11: 716, 8: 50, 13: 119, 18: 83, 14: 272, 17: 112, 16: 53, 6: 17, 15: 27}
{1: 1797, 0: 6806, 5: 2702, 7: 46, 12: 69, 14: 184}
50.4785105272316


In [295]:
a = predict(X_test, params)
i=18
print(a[i])
sum =0
timesGuessedA = {}
timesGuessedY = {}
trafione = {}
for i in range(a.shape[0]):
    timesGuessedA[a[i]] = timesGuessedA.get(a[i], 0) + 1    
    timesGuessedY[np.argmax(Y_test_encoded[i])] = timesGuessedY.get(np.argmax(Y_test_encoded[i]), 0) + 1
    if a[i] == np.argmax(Y_test_encoded[i]):
        trafione[a[i]] = trafione.get(a[i], 0) + 1
        sum += 1

print(sum,a.shape[0])
print(timesGuessedA)
print(timesGuessedY)
print(trafione)
print(sum/a.shape[0]*100)

0
2924 5747
{5: 2571, 0: 2136, 14: 190, 1: 752, 12: 56, 7: 42}
{5: 709, 1: 756, 0: 1868, 14: 54, 3: 372, 9: 624, 7: 544, 4: 179, 2: 201, 10: 46, 11: 188, 12: 69, 13: 35, 15: 7, 17: 27, 18: 15, 8: 16, 19: 19, 16: 15, 6: 3}
{5: 691, 0: 1729, 14: 42, 1: 435, 12: 16, 7: 11}
50.878719331825295


  return 1 / (1 + np.exp(-Z))


In [294]:
pars = initializeParameters(2, [1,2], 20, 2)
print("done", pars)

done {'W1': array([[ 0.39808898],
       [-0.33005263]]), 'b1': array([[0.]]), 'W2': array([[ 0.24790405, -0.67366659]]), 'b2': array([[0., 0.]]), 'W3': array([[-0.97676816,  0.43539934,  1.23338626, -1.25266705, -0.35052616,
        -0.60873379,  0.12813045,  1.09825451, -0.17759404,  0.49698365,
        -0.05785868, -0.44283365,  1.52300677, -1.10539908, -2.95527893,
         0.62572173, -1.22709148, -1.23186265,  0.52922551, -0.0298159 ],
       [ 1.68951384,  0.37848429, -0.08758142,  0.01171019, -0.47589574,
        -0.93086109, -0.42911548, -1.41237191,  0.98990527, -1.61679561,
        -1.11309222, -0.26369733,  0.28600374,  0.24784717,  0.07841771,
        -0.4659612 ,  0.81590848, -0.51917199, -0.13305246, -0.40752705]]), 'b3': array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]])}
