In [2]:
import pandas as pd
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import numpy as np

In [3]:
df = sns.load_dataset("penguins")
df = df[df["species"] == "Adelie"].dropna()
df

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,Male
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,Female
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,Female
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,Female
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,Male
...,...,...,...,...,...,...,...
147,Adelie,Dream,36.6,18.4,184.0,3475.0,Female
148,Adelie,Dream,36.0,17.8,195.0,3450.0,Female
149,Adelie,Dream,37.8,18.1,193.0,3750.0,Male
150,Adelie,Dream,36.0,17.1,187.0,3700.0,Female


In [4]:
df = sns.load_dataset("penguins")
df = df[df["species"] == "Adelie"].dropna()
df = df[["bill_depth_mm", "flipper_length_mm", "body_mass_g"]]
df

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g
0,18.7,181.0,3750.0
1,17.4,186.0,3800.0
2,18.0,195.0,3250.0
4,19.3,193.0,3450.0
5,20.6,190.0,3650.0
...,...,...,...
147,18.4,184.0,3475.0
148,17.8,195.0,3450.0
149,18.1,193.0,3750.0
150,17.1,187.0,3700.0


In [5]:
X = df[["flipper_length_mm", "body_mass_g"]]
X["bias"] = 1
X

Unnamed: 0,flipper_length_mm,body_mass_g,bias
0,181.0,3750.0,1
1,186.0,3800.0,1
2,195.0,3250.0,1
4,193.0,3450.0,1
5,190.0,3650.0,1
...,...,...,...
147,184.0,3475.0,1
148,195.0,3450.0,1
149,193.0,3750.0,1
150,187.0,3700.0,1


In [6]:
y = df["bill_depth_mm"]
y

Unnamed: 0,bill_depth_mm
0,18.7
1,17.4
2,18.0
4,19.3
5,20.6
...,...
147,18.4
148,17.8
149,18.1
150,17.1


In [7]:
theta_using_normal_equation = np.linalg.inv(X.T @ X) @ X.T @ y
theta_using_normal_equation

Unnamed: 0,0
0,0.009828
1,0.001477
2,11.002995


In [8]:
np.linalg.solve(X.T @ X,X.T @ y)

array([9.82848689e-03, 1.47749591e-03, 1.10029953e+01])

In [9]:
theta_using_normal_equation = np.linalg.inv(X.T @ X) @ X.T @ y

In [10]:
df["pred_bill_depth_mm"] = X.to_numpy() @ theta_using_normal_equation
df

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g,pred_bill_depth_mm
0,18.7,181.0,3750.0,18.322561
1,17.4,186.0,3800.0,18.445578
2,18.0,195.0,3250.0,17.721412
4,19.3,193.0,3450.0,17.997254
5,20.6,190.0,3650.0,18.263268
...,...,...,...,...
147,18.4,184.0,3475.0,17.945735
148,17.8,195.0,3450.0,18.016911
149,18.1,193.0,3750.0,18.440503
150,17.1,187.0,3700.0,18.307657


In [11]:
from sklearn.linear_model import LinearRegression

In [12]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model

In [13]:
model.fit(
    X=df[["flipper_length_mm", "body_mass_g"]],
    y=df["bill_depth_mm"])

In [14]:
model.predict([[185,3750.0]])



array([18.36187501])

In [15]:
df["sklearn_preds"] = model.predict(df[["flipper_length_mm", "body_mass_g"]])
df

Unnamed: 0,bill_depth_mm,flipper_length_mm,body_mass_g,pred_bill_depth_mm,sklearn_preds
0,18.7,181.0,3750.0,18.322561,18.322561
1,17.4,186.0,3800.0,18.445578,18.445578
2,18.0,195.0,3250.0,17.721412,17.721412
4,19.3,193.0,3450.0,17.997254,17.997254
5,20.6,190.0,3650.0,18.263268,18.263268
...,...,...,...,...,...
147,18.4,184.0,3475.0,17.945735,17.945735
148,17.8,195.0,3450.0,18.016911,18.016911
149,18.1,193.0,3750.0,18.440503,18.440503
150,17.1,187.0,3700.0,18.307657,18.307657


In [16]:
model.intercept_

11.002995277447067

In [17]:
model.coef_

array([0.00982849, 0.0014775 ])

In [18]:
theta_using_normal_equation

Unnamed: 0,0
0,0.009828
1,0.001477
2,11.002995


In [19]:
fig = px.scatter_3d(df, x="flipper_length_mm", y="body_mass_g", z="bill_depth_mm")

grid_resolution = 2
(u,v) = np.meshgrid(
    np.linspace(df["flipper_length_mm"].min(), df["flipper_length_mm"].max(), grid_resolution),
    np.linspace(df["body_mass_g"].min(), df["body_mass_g"].max(), grid_resolution))
features = pd.DataFrame({"flipper_length_mm": u.flatten(),
                         "body_mass_g": v.flatten()})
zs = model.predict(features)

fig.add_trace(go.Surface(x=u, y=v, z= zs.reshape(u.shape), opacity=0.9, showscale=False))
fig.update_layout(autosize=False, width=800, height=600)

In [20]:
from sklearn.metrics import mean_squared_error
mean_squared_error(df["bill_depth_mm"], df["sklearn_preds"])

0.9764070438843998

In [21]:
from sklearn.tree import DecisionTreeRegressor

tree_model = DecisionTreeRegressor()

tree_model.fit(
    X= df[["flipper_length_mm", "body_mass_g"]],
    y= df["bill_depth_mm"])

In [22]:
df["sklearn_dt_preds"] = tree_model.predict(df[["flipper_length_mm", "body_mass_g"]])

In [23]:
mean_squared_error(df["bill_depth_mm"], df["sklearn_dt_preds"])

0.051107305936073065

In [24]:
fig = px.scatter_3d(df, x="flipper_length_mm", y="body_mass_g", z="bill_depth_mm")

grid_resolution = 20
(u,v) = np.meshgrid(
    np.linspace(df["flipper_length_mm"].min(), df["flipper_length_mm"].max(), grid_resolution),
    np.linspace(df["body_mass_g"].min(), df["body_mass_g"].max(), grid_resolution))
features = pd.DataFrame({"flipper_length_mm": u.flatten(),
                         "body_mass_g": v.flatten()})
zs = tree_model.predict(features)

fig.add_trace(go.Surface(x=u, y=v, z= zs.reshape(u.shape), opacity=0.9, showscale=False))
fig.update_layout(autosize=False, width=800, height=600)

In [25]:
def f(x):
  return (x**4 - 15*x**3 + 80*x**2 - 180*x + 144)/10

In [26]:
x = np.linspace(1, 6.75, 200)
fig = px.line(y = f(x), x=x)
fig.update_layout(font_size = 16)
fig.update_layout(autosize=False, width=800, height=600)

In [27]:
def simple_minimize(f,xs):
  y = [f(x) for x in xs]
  return xs[np.argmin(y)]

In [28]:
guesses = [5.3, 5.31, 5.32, 5.33, 5.34, 5.35]
simple_minimize(f, guesses)

5.33

In [29]:
xs = np.linspace(1,7,200)
sparse_xs = np.linspace(1.5,6.5,5)

ys = f(xs)
sparse_ys = f(sparse_xs)

fig = px.line(x = xs, y = f(xs))
fig.add_scatter(x=sparse_xs, y=f(sparse_xs), mode="markers", marker_size=16)
fig.update_layout(showlegend=False)
fig.update_layout(autosize=False, width=800, height=600)
fig.show()

In [30]:
from scipy.optimize import minimize

minimize(f, x0=3.5)

  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: -0.13827491292966557
        x: [ 2.393e+00]
      nit: 3
      jac: [ 6.486e-06]
 hess_inv: [[ 7.385e-01]]
     nfev: 20
     njev: 10

In [31]:
def grad_f(x):
  return (1/10) * (4*x**3 - 45*x**2 + 160*x - 180)

In [32]:
f_line = go.Scatter(x=xs, y=f(xs), mode="lines", name="f")
derivative_line = go.Scatter(x=xs, y=grad_f(xs),
                             mode="lines", name="df", line={"dash":"dash"})
roots = np.array([2.3927, 3.5309, 5.3263])
root_markers = go.Scatter(x=np.array(roots), name="df = zero", marker_size = 12)

fig = go.Figure()
fig.add_traces([f_line, derivative_line, root_markers])
fig.update_layout(font_size=20, yaxis_range=[-1, 3])
fig.update_layout(autosize=False, width=800, height=600)
fig.show()

In [33]:
x = 4.3
fig = go.Figure()
fig.add_trace(f_line)

fig.add_trace(go.Scatter(
    x = [x, x - grad_f(x)], y = [f(x), f(x)],
    marker = dict(size=10, symbol='arrow-bar-up', angleref='previous'),
    name="arrow-bar-up"
    ))
fig.add_trace(go.Scatter(x=[x], y=[f(x)],
                         marker_color="green", marker_size=12,
                         mode="markers", name="x0"))
fig.update_layout(font_size=20, yaxis_range=[-1,3])
fig.update_layout(autosize=False, width=800, height=600)
fig

In [34]:
def take_one_step(x, derivative):
  new_x = x - derivative(x)
  return new_x

In [35]:
x = 4.0
steps = [x]
for i in range(10):
  x = take_one_step(x, grad_f)
  steps.append(x)

print(steps)

[4.0, 4.4, 5.0464000000000055, 5.496730601062393, 5.0808624852305115, 5.489980392167775, 5.092824872119241, 5.486755386070718, 5.0984728528436225, 5.485072693208349, 5.101402551267881]


In [36]:
def plot_steps(steps, f = f, f_line = f_line):
  fig = go.Figure()
  fig.add_trace(f_line)
  fig.add_trace(go.Scatter(x= steps, y = [f(s) for s in steps],
                           mode = "lines+markers", line = {"dash" : "dash", "color": "red"},
                           name = "Path",
                           marker_symbol="arrow",
                           marker_angleref="previous",
                           marker_standoff=4,
                           marker_size=16))
  fig.add_trace(go.Scatter(x=steps, y=[f(s) for s in steps],
                           mode = "markers",
                           name = "Path",
                           marker_color = "red",
                           showlegend=False,
                           marker_size=8))
  fig.update_layout(font_size=20)
  fig.update_layout(autosize=False, width=800, height=600)

  return fig

In [37]:
plot_steps(steps)

In [38]:
def take_one_step_lr(x, alpha, derivative):
  new_x = x - alpha * derivative(x)
  return new_x

In [39]:
x = 4.0
steps = [x]
for i in range(15):
  x = take_one_step_lr(x, alpha=0.3, derivative=grad_f)
  print(x)
  steps.append(x)

plot_steps(steps)

4.12
4.267296639999997
4.442725838159953
4.640926244829146
4.846183704850335
5.032118544823421
5.17201478493924
5.2564844894138165
5.297911492494514
5.315427176589101
5.322260602055931
5.324832983472768
5.325787650752968
5.3261400404400865
5.326269854338316


In [40]:
def gradient_descent(grad_f, initial_guess, alpha, n):
  guesses = [initial_guess]
  current_guess = initial_guess
  while len(guesses) < n:
    current_guess = current_guess - alpha * grad_f(current_guess)
    guesses.append(current_guess)

  return np.array(guesses)

In [41]:
trajectory = gradient_descent(grad_f, 1.6, 0.75, 20)
print(trajectory)
plot_steps(trajectory)

[1.6        3.3112     3.18920918 3.01472352 2.79207742 2.56776716
 2.42826486 2.39421613 2.39274816 2.39274798 2.39274798 2.39274798
 2.39274798 2.39274798 2.39274798 2.39274798 2.39274798 2.39274798
 2.39274798 2.39274798]


In [42]:
trajectory = gradient_descent(grad_f, 6, 0.75, 20)
print(trajectory)
plot_steps(trajectory)

[6.         4.2        4.6086     5.12279483 5.38817984 5.28497822
 5.34793725 5.31315502 5.33375146 5.32197109 5.32885604 5.32488006
 5.32719254 5.32585303 5.32663079 5.32617982 5.32644152 5.32628973
 5.32637779 5.32632671]


In [43]:
df = sns.load_dataset("tips")
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [44]:
x = df["total_bill"]
y_obs = df["tip"]

In [45]:
def mse_single_arg(theta_1):
  y_hat = theta_1 * x
  return np.mean((y_hat - y_obs) ** 2)

thetas = np.linspace(-1.5, 1, 100)

mse_line = go.Scatter(x = thetas, y = [mse_single_arg(theta_1) for theta_1 in thetas], mode = "lines", name = "MSE")
fig = go.Figure()
fig.add_trace(mse_line)
fig.update_layout(autosize=False, width=800, height=600, xaxis_title="theta_1", yaxis_title="MSE")

In [46]:
def grad_mse_single_arg(theta_1):
  y_hat = theta_1 * x
  return np.mean(-2 * (y_obs - y_hat) * x)

In [47]:
trajectory = gradient_descent(grad_mse_single_arg, -0.5, 0.0001, 100)
print(f"Final guess for theta_1: {trajectory[-1]}")
plot_steps(trajectory, mse_single_arg, mse_line)

Final guess for theta_1: 0.14369554654231262


In [48]:
tips_with_bias = df.copy()
tips_with_bias["bias"] = 1
tips_with_bias = tips_with_bias[["bias", "total_bill"]]
tips_with_bias.head()

Unnamed: 0,bias,total_bill
0,1,16.99
1,1,10.34
2,1,21.01
3,1,23.68
4,1,24.59


In [49]:
X = tips_with_bias
y = df["tip"]

In [50]:
def mse_loss(theta):
  y_hat = X @ theta
  return np.mean((y - y_hat) ** 2)

In [53]:
import plotly.graph_objects as go

uvalues = np.linspace(-1, 5, 20)
vvalues = np.linspace(-0.1, 0.35, 20)
(u,v) = np.meshgrid(uvalues, vvalues)
thetas = np.vstack((u.flatten(), v.flatten()))

MSE = np.array([mse_loss(t) for t in thetas.T])

loss_surface = go.Surface(x=u,
                          y=v, z=np.reshape(MSE, u.shape),
                          contours = {"z": {"show": True, "start": 0, "end": 50, "size":2, "color":"white"}})
ind = np.argmin(MSE)
optimal_point = go.Scatter3d(name="Optimal Point",
x = [thetas.T[ind, 0]], y = [thetas.T[ind, 1]],
z = [MSE[ind]],
marker = dict(size=10, color="red"))

fig = go.Figure(data=[loss_surface, optimal_point])
fig.update_layout(scene = dict(
    xaxis_title = "theta0",
    yaxis_title = "theta1",
    zaxis_title = "MSE"), autosize=False, width=800, height=600)

fig.show(),


(None,)

In [54]:
contour = go.Contour(x=u[0], y=v[:, 0], z=np.reshape(MSE, u.shape),
                     contours=dict(start=0, end=70, size=2))
fig = go.Figure(contour)
fig.update_layout(
    xaxis_title = "theta0",
    yaxis_title = "theta1", autosize=False, width=800, height=600)

fig.show()

In [55]:
def mse_gradient(theta):
  x1 = X.iloc[:, 1]
  dth0 = np.mean(-2 * (y - (theta[0] + theta[1]*x1)))
  dth1 = np.mean(-2 * (y - (theta[0] + theta[1]*x1)) * x1)
  return np.array([dth0, dth1])

In [56]:
guesses = gradient_descent(mse_gradient, np.array([1, .5]), 0.001, 10000)

pd.DataFrame(guesses, columns=["theta_0", "theta_1"]).tail(10)

Unnamed: 0,theta_0,theta_1
9990,0.922487,0.104931
9991,0.922486,0.104931
9992,0.922485,0.104931
9993,0.922484,0.104931
9994,0.922484,0.104931
9995,0.922483,0.104931
9996,0.922482,0.104931
9997,0.922481,0.104931
9998,0.922481,0.104931
9999,0.92248,0.104932


In [57]:
minimize(mse_loss, x0 = [0,0])

  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 1.0360194420114932
        x: [ 9.203e-01  1.050e-01]
      nit: 3
      jac: [-4.470e-08 -2.980e-08]
 hess_inv: [[ 2.980e+00 -1.253e-01]
            [-1.253e-01  6.335e-03]]
     nfev: 15
     njev: 5

In [58]:
minimize(mse_loss, x0 = [0,0], jac=mse_gradient)

  message: Optimization terminated successfully.
  success: True
   status: 0
      fun: 1.036019442011377
        x: [ 9.203e-01  1.050e-01]
      nit: 3
      jac: [ 1.456e-16 -5.358e-15]
 hess_inv: [[ 2.980e+00 -1.253e-01]
            [-1.253e-01  6.335e-03]]
     nfev: 5
     njev: 5

In [59]:
def mse_gradient(theta, X, y):
  x0 = X.iloc[:, 0]
  x1 = X.iloc[:, 1]
  dth0 = np.mean(-2 * (y - theta[0]*x0 - theta[1]*x1) * x0)
  dth1 = np.mean(-2 * (y - theta[0]*x0 - theta[1]*x1) * x1)
  return np.array([dth0, dth1])

In [60]:
def sgd(grad, X, y, initial_theta, eta = 0.3, max_iter=5000, batch_size=50):
  theta = initial_theta
  thetas = [theta]
  n = len(X)
  for t in range(1, max_iter):
    X_sample = X.sample(batch_size)
    y_sample = y.loc[X_sample.index]
    theta = theta - eta/t * grad(theta, X_sample, y_sample)
    thetas.append(theta)
  return thetas

In [61]:
thetas = sgd(mse_gradient, X, y,
             initial_theta = np.array([1, .5]),
             eta = 0.001,
             max_iter = 10000,
             batch_size=1)
thetas[-5:]

[array([0.98310537, 0.10143783]),
 array([0.98310539, 0.10143816]),
 array([0.98310603, 0.10145284]),
 array([0.98310595, 0.10145159]),
 array([0.98310603, 0.1014531 ])]