In [83]:
import datetime

import numpy as np
import pandas as pd
from scipy import optimize

import plotly
import plotly.express as px
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
from plotly.graph_objs.scatter import Line
from plotly.graph_objs import Scatter

In [4]:
data = np.loadtxt('ex2data1.txt', delimiter=',')
data

array([[34.62365962, 78.02469282,  0.        ],
       [30.28671077, 43.89499752,  0.        ],
       [35.84740877, 72.90219803,  0.        ],
       [60.18259939, 86.3085521 ,  1.        ],
       [79.03273605, 75.34437644,  1.        ],
       [45.08327748, 56.31637178,  0.        ],
       [61.10666454, 96.51142588,  1.        ],
       [75.02474557, 46.55401354,  1.        ],
       [76.0987867 , 87.42056972,  1.        ],
       [84.43281996, 43.53339331,  1.        ],
       [95.86155507, 38.22527806,  0.        ],
       [75.01365839, 30.60326323,  0.        ],
       [82.30705337, 76.4819633 ,  1.        ],
       [69.36458876, 97.71869196,  1.        ],
       [39.53833914, 76.03681085,  0.        ],
       [53.97105215, 89.20735014,  1.        ],
       [69.07014406, 52.74046973,  1.        ],
       [67.94685548, 46.67857411,  0.        ],
       [70.66150955, 92.92713789,  1.        ],
       [76.97878373, 47.57596365,  1.        ],
       [67.37202755, 42.83843832,  0.   

In [16]:
px.scatter(data, x=0, y=1, symbol=2, color=2)

In [37]:
X = np.c_[np.ones(data.shape[0]), data[:, [0,1]]]
y = data[:, 2]
m = X.shape[0]
n = X.shape[1] - 1
initial_theta = np.zeros(X.shape[1])

m, n, X, y, initial_theta

(100, 2, array([[ 1.        , 34.62365962, 78.02469282],
        [ 1.        , 30.28671077, 43.89499752],
        [ 1.        , 35.84740877, 72.90219803],
        [ 1.        , 60.18259939, 86.3085521 ],
        [ 1.        , 79.03273605, 75.34437644],
        [ 1.        , 45.08327748, 56.31637178],
        [ 1.        , 61.10666454, 96.51142588],
        [ 1.        , 75.02474557, 46.55401354],
        [ 1.        , 76.0987867 , 87.42056972],
        [ 1.        , 84.43281996, 43.53339331],
        [ 1.        , 95.86155507, 38.22527806],
        [ 1.        , 75.01365839, 30.60326323],
        [ 1.        , 82.30705337, 76.4819633 ],
        [ 1.        , 69.36458876, 97.71869196],
        [ 1.        , 39.53833914, 76.03681085],
        [ 1.        , 53.97105215, 89.20735014],
        [ 1.        , 69.07014406, 52.74046973],
        [ 1.        , 67.94685548, 46.67857411],
        [ 1.        , 70.66150955, 92.92713789],
        [ 1.        , 76.97878373, 47.57596365],
        [ 1.

In [73]:
z = lambda x, th: x @ th
sigmoid = lambda z: 1/(1 + np.exp(-1 * z))
h = lambda x, th: sigmoid(z(x, th))

# J = lambda m, x, th, y: 1/(2 * m) * ( (h(x, theta) - y) ** 2 ).sum()
J = lambda m, x, th, y: 1/m * (-y * np.log(h(x, th)) - (1-y) * np.log(1-h(x, th))).sum()

test_theta = np.array([-24, 0.2, 0.2])
cost = J(m, X, test_theta, y)
print(cost, 'approx 0.218')

# J(m, X, theta, y)

0.21833019382659774 approx 0.218


In [81]:
grad = lambda m, x, th, y: 1/m * x.T @ ( h(x, th) - y )

print(grad(m, X, test_theta, y), 'approx: 0.043 2.566 2.648')

[0.04290299 2.56623412 2.64679737] approx: 0.043 2.566 2.648


In [92]:
## Octave
# theta, cost = fminunc(@(t) (costFunction(t, X, y)), initial_theta, optimset('GradObj', 'on', 'MaxIter', 400))

cost = lambda th, x, y: J(m, x, th, y)
result = optimize.fmin(cost, initial_theta, args=(X, y), full_output=True, disp=True)

theta_opt, j_opt = result[0], result[1]

print(j_opt, 'approx 0.203')
print(theta_opt, 'approx -25.161 0.206 0.201')

Optimization terminated successfully.
         Current function value: 0.203498
         Iterations: 157
         Function evaluations: 287
0.20349770159021513 approx 0.203
[-25.16130062   0.20623142   0.20147143] approx -25.161 0.206 0.201


In [172]:
fig = go.Figure()

neg = data[data[:, 2] == 0]
pos = data[data[:, 2] == 1]

plot_x = np.array([np.min(X[:, 1]), np.max(X[:, 1])])
plot_y = (-theta_opt[0] - theta_opt[1] * plot_x)  / theta_opt[2]

fig.add_trace(Scatter(x=neg[:, 0], y=neg[:,1], mode='markers', marker={'symbol': 0}))
fig.add_trace(Scatter(x=pos[:, 0], y=pos[:,1], mode='markers', marker={'symbol': 100}))
fig.add_trace(Scatter(x=plot_x, y=plot_y))
fig.show()

In [184]:
prob = h(np.array([1, 45, 85]), theta_opt)
print(prob, 'approx 0.775')

pred = h(X, theta_opt)

print(((pred >= 0.5) == y).mean() * 100, 'approx 89.0')

0.7762915904112411 approx 0.775
89.0 approx 89.0
