In [3]:
# Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import h5py
from scipy.special import expit

In [4]:
def load_dataset():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels

    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
    
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
    
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

<h2>Loading the data (cat/non-cat)</h2> 
<p>We added <b>"_orig"</b> at the end of image datasets (train and test) because we are going to preprocess them. <br>After preprocessing, we will end up with train_set_x and test_set_x (the labels train_set_y and test_set_y don't need any preprocessing.<br> Each line of your train_set_x_orig and test_set_x_orig is an array representing an image.</p>

In [5]:
train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()

<h2>How is an image stored on a computer?</h2>
<img style="display:block;" src="./img/1.png" alt="Photo 1"/>
<p>An image is store in the computer in three separate matrices corresponding to the Red, Green, and Blue
color channels of the image. The three matrices have the same size as the image, for example, the
resolution of the cat image is 64 pixels X 64 pixels, the three matrices (RGB) are 64 X 64 each.
The value in a cell represents the pixel intensity which will be used to create a feature vector of ndimension. In pattern recognition and machine learning, a feature vector represents an object, in this
case, a cat or no cat.
To create a feature vector, 𝑥, the pixel intensity values will be “unroll” or “reshape” for each color. The
dimension of the input feature vector 𝑥 is <b>𝑛𝑥</b> = 64 𝑥 64 𝑥 3 = 12288.</p>
<img style="display:block;" src="./img/2.png" alt="Photo 2"/>

In [6]:
m_train = train_set_x_orig.shape[0]
m_test  = test_set_x_orig.shape[0]
num_px  = train_set_x_orig.shape[1]

- m_train (number of training examples)
- m_test (number of test examples)
- num_px (= height = width of a training image)

<h2>Reshape the training and test data sets</h2>
<p>so that images of size (num_px, num_px, 3) are flattened into single vectors of shape (num_px $*$ num_px $*$ 3, 1).</p>
<img src="./img/3.png" style="display: block;width:55%;" alt="Photo 3"/>

In [7]:
train_set_x_flatten = train_set_x_orig.reshape((m_train, -1)).T 
test_set_x_flatten  = test_set_x_orig.reshape((m_test, -1)).T     

<h2>Standardize dataset</h2>
<p>To represent color images, the red, green and blue channels (RGB) must be specified for each pixel, and so the pixel value is actually a vector of three numbers ranging from 0 to 255.

One common preprocessing step in machine learning is to center and standardize your dataset, meaning that you substract the mean of the whole numpy array from each example, and then divide each example by the standard deviation of the whole numpy array. But for picture datasets, it is simpler and more convenient and works almost as well to just divide every row of the dataset by 255 (the maximum value of a pixel channel).</p>

In [8]:
train_set_x = train_set_x_flatten / 255
test_set_x  = test_set_x_flatten / 255

<h2>General Architecture of the learning algorithm</h2>
<p>The following Figure explains why <b>Logistic Regression</b> is actually a very simple <b>Neural Network</b>!.<br>Logistic regression is a learning algorithm used in a supervised learning problem when the output 𝑦 are
all either zero or one. The goal of logistic regression is to minimize the error between its predictions and
training data.</p>
<p><b>Example:</b> <br>Cat vs No - cat
Given an image represented by a feature vector 𝑥, the algorithm will evaluate the probability of a cat
being in that image.</p>
<img src="./img/0.png" style="display: block;width:65%;" alt="Photo 3"/>

<h3 style="text-align:center">Mathematical expression of the algorithm:</h3>
<p style="text-align:center; font-size:20px">𝐺𝑖𝑣𝑒𝑛 {(𝑥
(1)
, 𝑦
(1)
), ⋯ , (𝑥
(𝑚)
, 𝑦
(𝑚)
)}, 𝑤𝑒 𝑤𝑎𝑛𝑡 𝑦̂
(𝑖) ≈ 𝑦
(𝑖)</p>
<p style="text-align:center">${z}^{(i)} = w^{T}.x^{{i}} + b$</p>
<p style="text-align:center">$\hat{y}^{(i)} = \sigma({z}^{(i)}) = \frac{1}{1  +  e^{-{z}^{(i)}}} = P({y}^{(i)} = 1 | x);0 \leq \hat{y}^{(i)} \leq 1 $</p>
<!-- <p style="text-align:center; font-size:15px">
Loss (error) function:<br>
The loss function measures the discrepancy between the prediction (𝑦̂
(𝑖)
) and the desired output (𝑦
(𝑖)
).
In other words, the loss function computes the error for a single training example.</p> -->
<p style="text-align:center">$\mathcal{L}(\hat{y}^{(i)}, y^{(i)}) = - (y^{(i)}\log{\hat{y}^{(i)}} + (1 - y^{(i)})\log({1 - \hat{y}^{(i)}}))$</p>
<p style="text-align:center">$\mathcal{J}(w, b) = \frac {1}{m}\left( \sum_{i=1}^m \mathcal{L}(\hat{y}^{(i)}, y^{(i)}) \right) $</p>
<p style="text-align:center">$\frac{\partial \mathcal{J}(w, b)}{\partial w} = \frac{\partial \mathcal{J}(w, b)}{\partial \mathcal{L}(\hat{y}^{(i)}, y^{(i)})} \times \frac{\mathcal{L}(\hat{y}^{(i)}, y^{(i)})}{\partial \hat{y}} \times \frac{\partial \hat{y}}{\partial z} \times \frac{\partial z}{\partial w} = (\hat{y} - y).x$</p>
<p style="text-align:center">$\frac{\partial \mathcal{J}(w, b)}{\partial b} = \frac{\partial \mathcal{J}(w, b)}{\partial \mathcal{L}(\hat{y}^{(i)}, y^{(i)})} \times \frac{\mathcal{L}(\hat{y}^{(i)}, y^{(i)})}{\partial \hat{y}} \times \frac{\partial \hat{y}}{\partial z} \times \frac{\partial z}{\partial b} = (\hat{y} - y)$</p>

<h2>Implement sigmoid</h2>

In [9]:
def sigmoid(z):
#     return 1 / (1 + np.exp(-z))
    return expit(z)

<h2>Initializing parameters</h2>

In [10]:
def init_with_zero(dim):
    w = np.zeros((dim, 1))
    b = 0
    return w, b

<h2>Forward and Backward propagation</h2>
<p>Forward Propagation:</p>
<ul>
<li>You get X</li>
<li>You compute <span class="MathJax_Preview" style="color: inherit; display: none;"></span><span class="MathJax" id="MathJax-Element-15-Frame" tabindex="0" style="position: relative;" data-mathml="<math xmlns=&quot;http://www.w3.org/1998/Math/MathML&quot;><mi>A</mi><mo>=</mo><mi>&amp;#x03C3;</mi><mo stretchy=&quot;false&quot;>(</mo><msup><mi>w</mi><mi>T</mi></msup><mi>X</mi><mo>+</mo><mi>b</mi><mo stretchy=&quot;false&quot;>)</mo><mo>=</mo><mo stretchy=&quot;false&quot;>(</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mn>1</mn><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo>,</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mn>2</mn><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>m</mi><mo>&amp;#x2212;</mo><mn>1</mn><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo>,</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>m</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo stretchy=&quot;false&quot;>)</mo></math>" role="presentation"><nobr aria-hidden="true"><span class="math" id="MathJax-Span-256" style="width: 22.146em; display: inline-block;"><span style="display: inline-block; position: relative; width: 18.455em; height: 0px; font-size: 120%;"><span style="position: absolute; clip: rect(1.551em, 1018.4em, 2.92em, -999.997em); top: -2.557em; left: 0em;"><span class="mrow" id="MathJax-Span-257"><span class="mi" id="MathJax-Span-258" style="font-family: STIXGeneral-Italic;">A</span><span class="mo" id="MathJax-Span-259" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">=</span><span class="mi" id="MathJax-Span-260" style="font-family: STIXGeneral-Italic; padding-left: 0.301em;">σ<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span class="mo" id="MathJax-Span-261" style="font-family: STIXGeneral-Regular;">(</span><span class="msubsup" id="MathJax-Span-262"><span style="display: inline-block; position: relative; width: 1.193em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.66em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-263" style="font-family: STIXGeneral-Italic;">w</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.658em;"><span class="mi" id="MathJax-Span-264" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">T<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mi" id="MathJax-Span-265" style="font-family: STIXGeneral-Italic;">X<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span class="mo" id="MathJax-Span-266" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">+</span><span class="mi" id="MathJax-Span-267" style="font-family: STIXGeneral-Italic; padding-left: 0.241em;">b</span><span class="mo" id="MathJax-Span-268" style="font-family: STIXGeneral-Regular;">)</span><span class="mo" id="MathJax-Span-269" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">=</span><span class="mo" id="MathJax-Span-270" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">(</span><span class="msubsup" id="MathJax-Span-271"><span style="display: inline-block; position: relative; width: 1.432em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-272" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-273"><span class="mrow" id="MathJax-Span-274"><span class="mo" id="MathJax-Span-275" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mn" id="MathJax-Span-276" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">1</span><span class="mo" id="MathJax-Span-277" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-278" style="font-family: STIXGeneral-Regular;">,</span><span class="msubsup" id="MathJax-Span-279" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 1.432em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-280" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-281"><span class="mrow" id="MathJax-Span-282"><span class="mo" id="MathJax-Span-283" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mn" id="MathJax-Span-284" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">2</span><span class="mo" id="MathJax-Span-285" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-286" style="font-family: STIXGeneral-Regular;">,</span><span class="mo" id="MathJax-Span-287" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">.</span><span class="mo" id="MathJax-Span-288" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">.</span><span class="mo" id="MathJax-Span-289" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">.</span><span class="mo" id="MathJax-Span-290" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">,</span><span class="msubsup" id="MathJax-Span-291" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 2.384em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-292" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-293"><span class="mrow" id="MathJax-Span-294"><span class="mo" id="MathJax-Span-295" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-296" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">m</span><span class="mo" id="MathJax-Span-297" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">−</span><span class="mn" id="MathJax-Span-298" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">1</span><span class="mo" id="MathJax-Span-299" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-300" style="font-family: STIXGeneral-Regular;">,</span><span class="msubsup" id="MathJax-Span-301" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 1.551em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-302" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-303"><span class="mrow" id="MathJax-Span-304"><span class="mo" id="MathJax-Span-305" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-306" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">m</span><span class="mo" id="MathJax-Span-307" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-308" style="font-family: STIXGeneral-Regular;">)</span></span><span style="display: inline-block; width: 0px; height: 2.562em;"></span></span></span><span style="display: inline-block; overflow: hidden; vertical-align: -0.282em; border-left: 0px solid; width: 0px; height: 1.361em;"></span></span></nobr><span class="MJX_Assistive_MathML" role="presentation"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>A</mi><mo>=</mo><mi>σ</mi><mo stretchy="false">(</mo><msup><mi>w</mi><mi>T</mi></msup><mi>X</mi><mo>+</mo><mi>b</mi><mo stretchy="false">)</mo><mo>=</mo><mo stretchy="false">(</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mn>1</mn><mo stretchy="false">)</mo></mrow></msup><mo>,</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mn>2</mn><mo stretchy="false">)</mo></mrow></msup><mo>,</mo><mo>.</mo><mo>.</mo><mo>.</mo><mo>,</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>m</mi><mo>−</mo><mn>1</mn><mo stretchy="false">)</mo></mrow></msup><mo>,</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>m</mi><mo stretchy="false">)</mo></mrow></msup><mo stretchy="false">)</mo></math></span></span><script type="math/tex" id="MathJax-Element-15">A = \sigma(w^T X + b) = (a^{(1)}, a^{(2)}, ..., a^{(m-1)}, a^{(m)})</script></li>
<li>You calculate the cost function: <span class="MathJax_Preview" style="color: inherit; display: none;"></span><span class="MathJax" id="MathJax-Element-16-Frame" tabindex="0" style="position: relative;" data-mathml="<math xmlns=&quot;http://www.w3.org/1998/Math/MathML&quot;><mi>J</mi><mo>=</mo><mo>&amp;#x2212;</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><munderover><mo>&amp;#x2211;</mo><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mi>m</mi></mrow></munderover><msup><mi>y</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mi>log</mi><mo>&amp;#x2061;</mo><mo stretchy=&quot;false&quot;>(</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo stretchy=&quot;false&quot;>)</mo><mo>+</mo><mo stretchy=&quot;false&quot;>(</mo><mn>1</mn><mo>&amp;#x2212;</mo><msup><mi>y</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo stretchy=&quot;false&quot;>)</mo><mi>log</mi><mo>&amp;#x2061;</mo><mo stretchy=&quot;false&quot;>(</mo><mn>1</mn><mo>&amp;#x2212;</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo stretchy=&quot;false&quot;>)</mo></math>" role="presentation"><nobr aria-hidden="true"><span class="math" id="MathJax-Span-309" style="width: 24.11em; display: inline-block;"><span style="display: inline-block; position: relative; width: 20.063em; height: 0px; font-size: 120%;"><span style="position: absolute; clip: rect(1.491em, 1020em, 3.098em, -999.997em); top: -2.557em; left: 0em;"><span class="mrow" id="MathJax-Span-310"><span class="mi" id="MathJax-Span-311" style="font-family: STIXGeneral-Italic;">J<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span class="mo" id="MathJax-Span-312" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">=</span><span class="mo" id="MathJax-Span-313" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">−</span><span class="mfrac" id="MathJax-Span-314"><span style="display: inline-block; position: relative; width: 0.658em; height: 0px; margin-right: 0.122em; margin-left: 0.122em;"><span style="position: absolute; clip: rect(3.336em, 1000.3em, 4.17em, -999.997em); top: -4.402em; left: 50%; margin-left: -0.176em;"><span class="mn" id="MathJax-Span-315" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">1</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.515em, 1000.48em, 4.17em, -999.997em); top: -3.628em; left: 50%; margin-left: -0.235em;"><span class="mi" id="MathJax-Span-316" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">m</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(0.836em, 1000.66em, 1.253em, -999.997em); top: -1.307em; left: 0em;"><span style="display: inline-block; overflow: hidden; vertical-align: 0em; border-top: 1.3px solid; width: 0.658em; height: 0px;"></span><span style="display: inline-block; width: 0px; height: 1.074em;"></span></span></span></span><span class="munderover" id="MathJax-Span-317" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 2.027em; height: 0px;"><span style="position: absolute; clip: rect(3.039em, 1000.84em, 4.408em, -999.997em); top: -3.985em; left: 0em;"><span class="mo" id="MathJax-Span-318" style="font-family: STIXGeneral-Regular; vertical-align: 0.003em;">∑</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.515em, 1000.6em, 4.17em, -999.997em); top: -4.461em; left: 0.896em;"><span class="texatom" id="MathJax-Span-319"><span class="mrow" id="MathJax-Span-320"><span class="mi" id="MathJax-Span-321" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">m</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.336em, 1001.13em, 4.17em, -999.997em); top: -3.688em; left: 0.896em;"><span class="texatom" id="MathJax-Span-322"><span class="mrow" id="MathJax-Span-323"><span class="mi" id="MathJax-Span-324" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-325" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">=</span><span class="mn" id="MathJax-Span-326" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">1</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="msubsup" id="MathJax-Span-327" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 1.193em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.42em, 4.348em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-328" style="font-family: STIXGeneral-Italic;">y</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.42em;"><span class="texatom" id="MathJax-Span-329"><span class="mrow" id="MathJax-Span-330"><span class="mo" id="MathJax-Span-331" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-332" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-333" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mi" id="MathJax-Span-334" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">log</span><span class="mo" id="MathJax-Span-335"></span><span class="mo" id="MathJax-Span-336" style="font-family: STIXGeneral-Regular;">(</span><span class="msubsup" id="MathJax-Span-337"><span style="display: inline-block; position: relative; width: 1.253em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-338" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-339"><span class="mrow" id="MathJax-Span-340"><span class="mo" id="MathJax-Span-341" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-342" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-343" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-344" style="font-family: STIXGeneral-Regular;">)</span><span class="mo" id="MathJax-Span-345" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">+</span><span class="mo" id="MathJax-Span-346" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">(</span><span class="mn" id="MathJax-Span-347" style="font-family: STIXGeneral-Regular;">1</span><span class="mo" id="MathJax-Span-348" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">−</span><span class="msubsup" id="MathJax-Span-349" style="padding-left: 0.241em;"><span style="display: inline-block; position: relative; width: 1.193em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.42em, 4.348em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-350" style="font-family: STIXGeneral-Italic;">y</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.42em;"><span class="texatom" id="MathJax-Span-351"><span class="mrow" id="MathJax-Span-352"><span class="mo" id="MathJax-Span-353" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-354" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-355" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-356" style="font-family: STIXGeneral-Regular;">)</span><span class="mi" id="MathJax-Span-357" style="font-family: STIXGeneral-Regular; padding-left: 0.182em;">log</span><span class="mo" id="MathJax-Span-358"></span><span class="mo" id="MathJax-Span-359" style="font-family: STIXGeneral-Regular;">(</span><span class="mn" id="MathJax-Span-360" style="font-family: STIXGeneral-Regular;">1</span><span class="mo" id="MathJax-Span-361" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">−</span><span class="msubsup" id="MathJax-Span-362" style="padding-left: 0.241em;"><span style="display: inline-block; position: relative; width: 1.253em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-363" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.342em; left: 0.479em;"><span class="texatom" id="MathJax-Span-364"><span class="mrow" id="MathJax-Span-365"><span class="mo" id="MathJax-Span-366" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-367" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-368" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-369" style="font-family: STIXGeneral-Regular;">)</span></span><span style="display: inline-block; width: 0px; height: 2.562em;"></span></span></span><span style="display: inline-block; overflow: hidden; vertical-align: -0.496em; border-left: 0px solid; width: 0px; height: 1.646em;"></span></span></nobr><span class="MJX_Assistive_MathML" role="presentation"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>J</mi><mo>=</mo><mo>−</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><munderover><mo>∑</mo><mrow class="MJX-TeXAtom-ORD"><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mrow class="MJX-TeXAtom-ORD"><mi>m</mi></mrow></munderover><msup><mi>y</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mi>log</mi><mo>⁡</mo><mo stretchy="false">(</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mo stretchy="false">)</mo><mo>+</mo><mo stretchy="false">(</mo><mn>1</mn><mo>−</mo><msup><mi>y</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mo stretchy="false">)</mo><mi>log</mi><mo>⁡</mo><mo stretchy="false">(</mo><mn>1</mn><mo>−</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mo stretchy="false">)</mo></math></span></span><script type="math/tex" id="MathJax-Element-16">J = -\frac{1}{m}\sum_{i=1}^{m}y^{(i)}\log(a^{(i)})+(1-y^{(i)})\log(1-a^{(i)})</script></li>
</ul>
<p>Here are the two formulas you will be using:</p>
<span class="MathJax_Preview" style="color: inherit; display: none;"></span>
<div class="MathJax_Display"><span class="MathJax MathJax_FullWidth" id="MathJax-Element-17-Frame" tabindex="0" style="position: relative;" data-mathml="<math xmlns=&quot;http://www.w3.org/1998/Math/MathML&quot; display=&quot;block&quot;><mtable displaystyle=&quot;true&quot;><mlabeledtr><mtd id=&quot;mjx-eqn-7&quot;><mtext>(7)</mtext></mtd><mtd><mfrac><mrow><mi mathvariant=&quot;normal&quot;>&amp;#x2202;</mi><mi>J</mi></mrow><mrow><mi mathvariant=&quot;normal&quot;>&amp;#x2202;</mi><mi>w</mi></mrow></mfrac><mo>=</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><mi>X</mi><mo stretchy=&quot;false&quot;>(</mo><mi>A</mi><mo>&amp;#x2212;</mo><mi>Y</mi><msup><mo stretchy=&quot;false&quot;>)</mo><mi>T</mi></msup></mtd></mlabeledtr></mtable></math>" role="presentation"><nobr aria-hidden="true"><span class="math" id="MathJax-Span-370" style="width: 100%; display: inline-block; min-width: 12.265em;"><span style="display: inline-block; position: relative; width: 100%; height: 0px; font-size: 120%; min-width: 12.265em;"><span style="position: absolute; clip: rect(2.562em, 1008.1em, 4.943em, -999.997em); top: -3.985em; left: 0em; width: 100%;"><span class="mrow" id="MathJax-Span-371"><span class="mtable" id="MathJax-Span-372" style="min-width: 12.265em;"><span style="display: inline-block; position: relative; width: 100%; height: 0px; min-width: 12.265em;"><span style="display: inline-block; position: absolute; width: 8.098em; height: 0px; clip: rect(-1.426em, 1008.1em, 0.955em, -999.997em); top: 0em; left: 50%; margin-left: -4.045em;"><span style="position: absolute; clip: rect(2.562em, 1008.1em, 4.943em, -999.997em); top: -3.985em; left: 0em;"><span style="display: inline-block; position: relative; width: 8.098em; height: 0px;"><span style="position: absolute; clip: rect(2.443em, 1008.1em, 4.884em, -999.997em); top: -3.926em; left: 50%; margin-left: -4.045em;"><span class="mtd" id="MathJax-Span-376"><span class="mrow" id="MathJax-Span-377"><span class="mfrac" id="MathJax-Span-378"><span style="display: inline-block; position: relative; width: 1.253em; height: 0px; margin-right: 0.122em; margin-left: 0.122em;"><span style="position: absolute; clip: rect(3.158em, 1000.96em, 4.17em, -999.997em); top: -4.64em; left: 50%; margin-left: -0.473em;"><span class="mrow" id="MathJax-Span-379"><span class="mi" id="MathJax-Span-380" style="font-family: STIXGeneral-Regular;">∂</span><span class="mi" id="MathJax-Span-381" style="font-family: STIXGeneral-Italic;">J<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.158em, 1001.13em, 4.17em, -999.997em); top: -3.271em; left: 50%; margin-left: -0.592em;"><span class="mrow" id="MathJax-Span-382"><span class="mi" id="MathJax-Span-383" style="font-family: STIXGeneral-Regular;">∂</span><span class="mi" id="MathJax-Span-384" style="font-family: STIXGeneral-Italic;">w</span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(0.836em, 1001.25em, 1.253em, -999.997em); top: -1.307em; left: 0em;"><span style="display: inline-block; overflow: hidden; vertical-align: 0em; border-top: 1.3px solid; width: 1.253em; height: 0px;"></span><span style="display: inline-block; width: 0px; height: 1.074em;"></span></span></span></span><span class="mo" id="MathJax-Span-385" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">=</span><span class="mfrac" id="MathJax-Span-386" style="padding-left: 0.301em;"><span style="display: inline-block; position: relative; width: 0.836em; height: 0px; margin-right: 0.122em; margin-left: 0.122em;"><span style="position: absolute; clip: rect(3.158em, 1000.42em, 4.17em, -999.997em); top: -4.64em; left: 50%; margin-left: -0.235em;"><span class="mn" id="MathJax-Span-387" style="font-family: STIXGeneral-Regular;">1</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.396em, 1000.72em, 4.17em, -999.997em); top: -3.271em; left: 50%; margin-left: -0.354em;"><span class="mi" id="MathJax-Span-388" style="font-family: STIXGeneral-Italic;">m</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(0.836em, 1000.84em, 1.253em, -999.997em); top: -1.307em; left: 0em;"><span style="display: inline-block; overflow: hidden; vertical-align: 0em; border-top: 1.3px solid; width: 0.836em; height: 0px;"></span><span style="display: inline-block; width: 0px; height: 1.074em;"></span></span></span></span><span class="mi" id="MathJax-Span-389" style="font-family: STIXGeneral-Italic;">X<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span class="mo" id="MathJax-Span-390" style="font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-391" style="font-family: STIXGeneral-Italic;">A</span><span class="mo" id="MathJax-Span-392" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">−</span><span class="mi" id="MathJax-Span-393" style="font-family: STIXGeneral-Italic; padding-left: 0.241em;">Y<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span class="msubsup" id="MathJax-Span-394"><span style="display: inline-block; position: relative; width: 0.836em; height: 0px;"><span style="position: absolute; clip: rect(3.158em, 1000.3em, 4.348em, -999.997em); top: -3.985em; left: 0em;"><span class="mo" id="MathJax-Span-395" style="font-family: STIXGeneral-Regular;">)</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.402em; left: 0.36em;"><span class="mi" id="MathJax-Span-396" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">T<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; position: absolute; width: 1.193em; height: 0px; clip: rect(-0.771em, 1001.13em, 0.42em, -999.997em); top: 0em; right: 0em; margin-right: 0em;"><span style="position: absolute; clip: rect(3.158em, 1001.13em, 4.348em, -999.997em); top: -3.926em; right: 0em;"><span class="mtd" id="mjx-eqn-7"><span class="mrow" id="MathJax-Span-374"><span class="mtext" id="MathJax-Span-375" style="font-family: STIXGeneral-Regular;">(7)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; overflow: hidden; vertical-align: -0.996em; border-left: 0px solid; width: 0px; height: 2.646em;"></span></span></nobr><span class="MJX_Assistive_MathML MJX_Assistive_MathML_Block" role="presentation"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><mtable displaystyle="true"><mlabeledtr><mtd id="mjx-eqn-7"><mtext>(7)</mtext></mtd><mtd><mfrac><mrow><mi mathvariant="normal">∂</mi><mi>J</mi></mrow><mrow><mi mathvariant="normal">∂</mi><mi>w</mi></mrow></mfrac><mo>=</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><mi>X</mi><mo stretchy="false">(</mo><mi>A</mi><mo>−</mo><mi>Y</mi><msup><mo stretchy="false">)</mo><mi>T</mi></msup></mtd></mlabeledtr></mtable></math></span></span></div>
<div class="MathJax_Display"><span class="MathJax MathJax_FullWidth" id="MathJax-Element-18-Frame" tabindex="0" style="position: relative;" data-mathml="<math xmlns=&quot;http://www.w3.org/1998/Math/MathML&quot; display=&quot;block&quot;><mtable displaystyle=&quot;true&quot;><mlabeledtr><mtd id=&quot;mjx-eqn-8&quot;><mtext>(8)</mtext></mtd><mtd><mfrac><mrow><mi mathvariant=&quot;normal&quot;>&amp;#x2202;</mi><mi>J</mi></mrow><mrow><mi mathvariant=&quot;normal&quot;>&amp;#x2202;</mi><mi>b</mi></mrow></mfrac><mo>=</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><munderover><mo>&amp;#x2211;</mo><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>m</mi></munderover><mo stretchy=&quot;false&quot;>(</mo><msup><mi>a</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo>&amp;#x2212;</mo><msup><mi>y</mi><mrow class=&quot;MJX-TeXAtom-ORD&quot;><mo stretchy=&quot;false&quot;>(</mo><mi>i</mi><mo stretchy=&quot;false&quot;>)</mo></mrow></msup><mo stretchy=&quot;false&quot;>)</mo></mtd></mlabeledtr></mtable></math>" role="presentation"><nobr aria-hidden="true"><span class="math" id="MathJax-Span-397" style="width: 100%; display: inline-block; min-width: 13.574em;"><span style="display: inline-block; position: relative; width: 100%; height: 0px; font-size: 120%; min-width: 13.574em;"><span style="position: absolute; clip: rect(2.146em, 1009.41em, 5.36em, -999.997em); top: -3.985em; left: 0em; width: 100%;"><span class="mrow" id="MathJax-Span-398"><span class="mtable" id="MathJax-Span-399" style="min-width: 13.574em;"><span style="display: inline-block; position: relative; width: 100%; height: 0px; min-width: 13.574em;"><span style="display: inline-block; position: absolute; width: 9.467em; height: 0px; clip: rect(-1.842em, 1009.41em, 1.372em, -999.997em); top: 0em; left: 50%; margin-left: -4.699em;"><span style="position: absolute; clip: rect(2.146em, 1009.41em, 5.36em, -999.997em); top: -3.985em; left: 0em;"><span style="display: inline-block; position: relative; width: 9.467em; height: 0px;"><span style="position: absolute; clip: rect(2.205em, 1009.41em, 5.42em, -999.997em); top: -4.045em; left: 50%; margin-left: -4.699em;"><span class="mtd" id="MathJax-Span-403"><span class="mrow" id="MathJax-Span-404"><span class="mfrac" id="MathJax-Span-405"><span style="display: inline-block; position: relative; width: 1.074em; height: 0px; margin-right: 0.122em; margin-left: 0.122em;"><span style="position: absolute; clip: rect(3.158em, 1000.96em, 4.17em, -999.997em); top: -4.64em; left: 50%; margin-left: -0.473em;"><span class="mrow" id="MathJax-Span-406"><span class="mi" id="MathJax-Span-407" style="font-family: STIXGeneral-Regular;">∂</span><span class="mi" id="MathJax-Span-408" style="font-family: STIXGeneral-Italic;">J<span style="display: inline-block; overflow: hidden; height: 1px; width: 0.063em;"></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.158em, 1000.96em, 4.17em, -999.997em); top: -3.271em; left: 50%; margin-left: -0.473em;"><span class="mrow" id="MathJax-Span-409"><span class="mi" id="MathJax-Span-410" style="font-family: STIXGeneral-Regular;">∂</span><span class="mi" id="MathJax-Span-411" style="font-family: STIXGeneral-Italic;">b</span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(0.836em, 1001.07em, 1.253em, -999.997em); top: -1.307em; left: 0em;"><span style="display: inline-block; overflow: hidden; vertical-align: 0em; border-top: 1.3px solid; width: 1.074em; height: 0px;"></span><span style="display: inline-block; width: 0px; height: 1.074em;"></span></span></span></span><span class="mo" id="MathJax-Span-412" style="font-family: STIXGeneral-Regular; padding-left: 0.301em;">=</span><span class="mfrac" id="MathJax-Span-413" style="padding-left: 0.301em;"><span style="display: inline-block; position: relative; width: 0.836em; height: 0px; margin-right: 0.122em; margin-left: 0.122em;"><span style="position: absolute; clip: rect(3.158em, 1000.42em, 4.17em, -999.997em); top: -4.64em; left: 50%; margin-left: -0.235em;"><span class="mn" id="MathJax-Span-414" style="font-family: STIXGeneral-Regular;">1</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.396em, 1000.72em, 4.17em, -999.997em); top: -3.271em; left: 50%; margin-left: -0.354em;"><span class="mi" id="MathJax-Span-415" style="font-family: STIXGeneral-Italic;">m</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(0.836em, 1000.84em, 1.253em, -999.997em); top: -1.307em; left: 0em;"><span style="display: inline-block; overflow: hidden; vertical-align: 0em; border-top: 1.3px solid; width: 0.836em; height: 0px;"></span><span style="display: inline-block; width: 0px; height: 1.074em;"></span></span></span></span><span class="munderover" id="MathJax-Span-416" style="padding-left: 0.182em;"><span style="display: inline-block; position: relative; width: 1.313em; height: 0px;"><span style="position: absolute; clip: rect(2.86em, 1001.19em, 4.646em, -999.997em); top: -3.985em; left: 0em;"><span class="mo" id="MathJax-Span-417" style="font-family: STIXSizeOneSym; vertical-align: -0.533em;">∑</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.336em, 1000.96em, 4.289em, -999.997em); top: -2.854em; left: 0.122em;"><span class="texatom" id="MathJax-Span-418"><span class="mrow" id="MathJax-Span-419"><span class="mi" id="MathJax-Span-420" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-421" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">=</span><span class="mn" id="MathJax-Span-422" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">1</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -5.176em; left: 0.42em;"><span class="mi" id="MathJax-Span-423" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">m</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-424" style="font-family: STIXGeneral-Regular;">(</span><span class="msubsup" id="MathJax-Span-425"><span style="display: inline-block; position: relative; width: 1.253em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.48em, 4.17em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-426" style="font-family: STIXGeneral-Italic;">a</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.402em; left: 0.479em;"><span class="texatom" id="MathJax-Span-427"><span class="mrow" id="MathJax-Span-428"><span class="mo" id="MathJax-Span-429" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-430" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-431" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-432" style="font-family: STIXGeneral-Regular; padding-left: 0.241em;">−</span><span class="msubsup" id="MathJax-Span-433" style="padding-left: 0.241em;"><span style="display: inline-block; position: relative; width: 1.193em; height: 0px;"><span style="position: absolute; clip: rect(3.396em, 1000.42em, 4.348em, -999.997em); top: -3.985em; left: 0em;"><span class="mi" id="MathJax-Span-434" style="font-family: STIXGeneral-Italic;">y</span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span><span style="position: absolute; top: -4.402em; left: 0.42em;"><span class="texatom" id="MathJax-Span-435"><span class="mrow" id="MathJax-Span-436"><span class="mo" id="MathJax-Span-437" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">(</span><span class="mi" id="MathJax-Span-438" style="font-size: 70.7%; font-family: STIXGeneral-Italic;">i</span><span class="mo" id="MathJax-Span-439" style="font-size: 70.7%; font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span><span class="mo" id="MathJax-Span-440" style="font-family: STIXGeneral-Regular;">)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; position: absolute; width: 1.193em; height: 0px; clip: rect(-0.949em, 1001.13em, 0.301em, -999.997em); top: 0em; right: 0em; margin-right: 0em;"><span style="position: absolute; clip: rect(3.158em, 1001.13em, 4.348em, -999.997em); top: -4.045em; right: 0em;"><span class="mtd" id="mjx-eqn-8"><span class="mrow" id="MathJax-Span-401"><span class="mtext" id="MathJax-Span-402" style="font-family: STIXGeneral-Regular;">(8)</span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span></span></span></span><span style="display: inline-block; width: 0px; height: 3.991em;"></span></span></span><span style="display: inline-block; overflow: hidden; vertical-align: -1.496em; border-left: 0px solid; width: 0px; height: 3.575em;"></span></span></nobr><span class="MJX_Assistive_MathML MJX_Assistive_MathML_Block" role="presentation"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><mtable displaystyle="true"><mlabeledtr><mtd id="mjx-eqn-8"><mtext>(8)</mtext></mtd><mtd><mfrac><mrow><mi mathvariant="normal">∂</mi><mi>J</mi></mrow><mrow><mi mathvariant="normal">∂</mi><mi>b</mi></mrow></mfrac><mo>=</mo><mfrac><mn>1</mn><mi>m</mi></mfrac><munderover><mo>∑</mo><mrow class="MJX-TeXAtom-ORD"><mi>i</mi><mo>=</mo><mn>1</mn></mrow><mi>m</mi></munderover><mo stretchy="false">(</mo><msup><mi>a</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mo>−</mo><msup><mi>y</mi><mrow class="MJX-TeXAtom-ORD"><mo stretchy="false">(</mo><mi>i</mi><mo stretchy="false">)</mo></mrow></msup><mo stretchy="false">)</mo></mtd></mlabeledtr></mtable></math></span></span></div>

In [11]:
def propagate(w, b, X, Y):
    # Forward
    m = X.shape[1]
    Z = np.dot(w.T, X) + b
    A = sigmoid(Z)
    epsilon = 1e-5 # to avoiding runtimeError divided by zero in log
    cost = -np.sum((Y*np.log(A + epsilon) + (1 - A)*np.log(1 - A + epsilon)))/m
    # Backward
    dZ = A - Y
    dw = np.dot(X, dZ.T) / m
    db = np.sum(dZ) / m
    
    return cost ,dw, db

<h2>Optimization: Gradient descent</h2>
<p>The goal is to learn w and b by minimizing the cost function J. For a parameter θ, the update rule is θ=θ−α dθ, where α is the learning rate.</p>

In [12]:
def optimize(w, b, X, Y, iteration_num, learning_rate):
    costs = list()
    for i in range(iteration_num):
        cost, dw, db = propagate(w, b, X, Y)
        # Gradient descent
        w = w - learning_rate*dw
        b = b - learning_rate*db
        costs.append(cost)

    return costs, w, b

In [13]:
w, b = init_with_zero(3 * num_px * num_px)
X, Y = train_set_x, train_set_y_orig

In [14]:
costs, w, b = optimize(w, b, X, Y, 1000, 0.01)

<h2>Predict</h2>

In [15]:
def predict(w, b, x):
    return (sigmoid(np.dot(w.T, x) + b) > 0.5).astype(int)

<h2>Merge all functions into a model</h2>

In [26]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def model(x_train, y_train, x_test, y_test, iteration_num, learning_rate):
    w, b = init_with_zero(x_train.shape[0])
    costs, w, b = optimize(w, b, x_train, y_train, iteration_num, learning_rate)
    y_pred = predict(w, b, x_test)[0]
    y_true = y_test[0]
    e = {
            "accuracy" : accuracy_score(y_true, y_pred),
            "precision" : precision_score(y_true, y_pred),
            "recall" : recall_score(y_true, y_pred),
            "f1" : f1_score(y_true, y_pred)
    }

    return costs, y_pred, w, b, iteration_num, learning_rate, e

In [None]:
model(train_set_x, train_set_y_orig, test_set_x, test_set_y_orig, 2000, 0.005)