In [66]:
import numpy as np
from typing import Any, Union, List, Tuple

import numpy.typing as npt
import plotly.graph_objects as go
import plotly.express as px

In [3]:
isinstance(4.0, float)

True

In [4]:
def assert_np_shape(a, shape: Union[List, Tuple]) -> bool:
    if len(a.shape) != len(shape):
        return False

    for dim in zip(a.shape, shape):
        if isinstance(dim[1], int) is False:
            continue
        if dim[0] != dim[1]:
            return False

    return True


a = np.array([[1, 2], [3, 4]])
assert assert_np_shape(a, [2, 2])
assert assert_np_shape(a, [2, "n"])
# assert assert_np_shape(a, [2,])

# Part 1

## 1.1 Linear Regression

### Q1

In [191]:
x = np.array([[i] for i in [1, 2, 3, 4]])
print(x, x.shape)

# define x as a column vector

[[1]
 [2]
 [3]
 [4]] (4, 1)


In [192]:
y = np.array([[i] for i in [3, 2, 0, 5]])
print(y, y.shape)

# define y as a column vector

[[3]
 [2]
 [0]
 [5]] (4, 1)


In [193]:
def make_linear_X_matrix(x, k: int):
    """
    make_linear_X_matrix make the X matrix of shape (m, k)

    depending on the basis function defined by k the number of columns
    of matrix will vary

    :param x: np.array of shape (m,1)
    :type x: np.array
    :param k: parameter for basis function
        k=1 {1}
        k=2 {1, x^2}
        k=3 {1, x^2, x^3}
        k=4 {1, x^2, x^3, x^4}
    :type k: int
    :return: Matrix X of shape (m,k)
    :rtype: np.array
    """

    assert assert_np_shape(x, ["n", 1])

    X = np.ones(shape=(x.shape[0], k))

    for col in range(k):
        X[:, col] *= np.power(x[:, 0], col)

    assert assert_np_shape(X, ["n", k])
    return X


X = make_linear_X_matrix(x, k=3)

print(X, X.shape)

[[ 1.  1.  1.]
 [ 1.  2.  4.]
 [ 1.  3.  9.]
 [ 1.  4. 16.]] (4, 3)


In [194]:
def write_equation(k: int, W) -> str:
    """
    write_equation write equation in proper format

    equations as a1*x + a2*x^2 + a3*x^3 ...

    :param k: (k-1) is the maximum power of x
    :type k: int
    :param W: Weight vector has the shape (k, 1)
    :type W: np.array
    :return: equation as a string
    :rtype: str
    """
    assert assert_np_shape(W, [k, 1])
    W = W[:, 0]
    equation = f"{W[0]:.02f}"

    for x_pow in range(1, k):
        if W[x_pow] < 0:
            equation += f" {W[x_pow]:.02f} x^{x_pow}"
        else:
            equation += f" +{W[x_pow]:.02f} x^{x_pow}"
    return equation

In [195]:
def calculate_MSE(orignal_y, predicted_y) -> float:
    """
    calculate_MSE Calculate mean squared error

    first calculate the Sum squared error
    then mean squared error

    :param orignal_y: orignal y vector, shape should be (n, 1)
    :type orignal_y: np.array
    :param predicted_y: predicted y vector, shape should be (n, 1)
    :type predicted_y: np.array
    :decimals: decimals to round off to while returning
    :return: return the Mean squared error rounded up to 3 decimal places
    :rtype: float
    """
    assert assert_np_shape(orignal_y, predicted_y.shape)

    SE = orignal_y - predicted_y
    SSE = np.sum(SE**2)

    MSE = SSE / np.size(orignal_y)

    return MSE


calculate_MSE(
    orignal_y=np.array([1, 1.3, 1]), predicted_y=np.array([1.2, 1.4, 1.5])
)  # return 0.1

0.09999999999999998

#### a

#### b
 
#### c

In [196]:
org_pts = go.Figure(
    data=go.Scatter(
        x=x[:, 0],
        y=y[:, 0],
        mode="markers",
        hovertemplate="<br>x=%{x}</br>y=%{y}<extra></extra>",
        name="data points",
    )
)

org_pts.update_layout(title="Data points")
org_pts.show()

MSE_k = []

for k in range(1, 5):
    X = make_linear_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ y

    pts_x = np.linspace(0, 6, 100, dtype=float).reshape((100, 1))
    pts_X = make_linear_X_matrix(pts_x, k=k)
    pts_y = pts_X @ W

    equation = write_equation(k, W)

    X = make_linear_X_matrix(x, k=k)
    predicted_y = X @ W

    MSE_k.append(calculate_MSE(y, predicted_y))
    print(f"k={k} : ", equation)
    print(f"k={k} : MSE {MSE_k[-1]}")

    org_pts.add_trace(
        go.Scatter(
            x=pts_x[:, 0], y=pts_y[:, 0], name="k=" + str(k), hovertemplate=f"k={k}"
        )
    )


org_pts.update_layout(
    title="Fitted polynomials", yaxis_range=[-5, 8], xaxis_range=[-0, 5]
)

org_pts.show()

MSE_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 5, 1),
        y=MSE_k,
        mode="markers",
        hovertemplate="<br>x=%{x}</br>y=%{y}<extra></extra>",
    )
)
MSE_vs_k.update_layout(
    title="MSE vs k",
    xaxis_title="k",
    yaxis_title="MSE",
)
MSE_vs_k.show()

k=1 :  2.50
k=1 : MSE 3.25
k=2 :  1.50 +0.40 x^1
k=2 : MSE 3.05
k=3 :  9.00 -7.10 x^1 +1.50 x^2
k=3 : MSE 0.7999999999999984
k=4 :  -5.00 +15.17 x^1 -8.50 x^2 +1.33 x^3
k=4 : MSE 1.1533033153905203e-22


### Q2

#### a.i

In [175]:
def g_func(x, sigma: float):
    """
    g_func= sin^2(2 pi x) + noise

    noise is given by a random variable epsilon randomly distributed
    in with mean 0 and SD sigma

    :param x: vector, shape (n,1)
    :type x: np.array
    :param sigma: standard deviation of normal distribution for noise
    :type sigma: float
    :return: vector of shape same as x, (n,1)
    :rtype: np.array
    """

    assert assert_np_shape(x, ["n", 1])

    epsilon = np.random.normal(loc=0.0, scale=sigma, size=x.shape)

    g = (np.sin(2 * np.pi * x) ** 2) + epsilon

    assert assert_np_shape(g, x.shape)

    return g

In [179]:
x = np.random.random_sample((30, 1))
g = g_func(x, 0.07)


sin2_points = go.Figure(
    data=go.Scatter(
        x=x[:, 0],
        y=g[:, 0],
        mode="markers",
        name="data point",
        hovertemplate="<br>x=%{x}</br>y=%{y}<extra></extra>",
    )
)

In [180]:
x_pts = np.linspace(0, 1, 100)
x_pts = x_pts.reshape((x_pts.size, 1))

g_pts = g_func(x_pts, 0.0)

sin2_points.add_trace(go.Scatter(x=x_pts[:, 0], y=g_pts[:, 0], name="g(x) w/o noise"))

sin2_points.show()

#### a.ii

In [181]:
sin2_points = go.Figure(
    data=go.Scatter(
        x=x[:, 0],
        y=g[:, 0],
        mode="markers",
        name="data point",
        hovertemplate="<br>x=%{x}</br>y=%{y}<extra></extra>",
    )
)

for k in [2, 5, 10, 14, 15, 18, 30]:
    X = make_linear_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # find W using orgnal data points

    pts_x = np.linspace(0, 1, 100, dtype=float).reshape((100, 1))
    pts_X = make_linear_X_matrix(pts_x, k=k)
    pts_y = pts_X @ W

    sin2_points.add_trace(
        go.Scatter(
            x=pts_x[:, 0],
            y=pts_y[:, 0],
            name="k=" + str(k),
            hovertemplate=f"k={k}",
            mode="lines",
        )
    )
sin2_points.update_layout(yaxis_range=[-0.5, 1.5], xaxis_range=[-0.1, 1.1])
sin2_points.show()

#### b


In [182]:
train_err = []

for k in range(1, 19):
    X = make_linear_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points

    predicted_y = X @ W

    train_err.append(calculate_MSE(g, predicted_y))

In [183]:
train_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(train_err),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

train_err_vs_k.update_layout(title="Ln(train_error) vs K")
train_err_vs_k.show()

#### c

In [184]:
test_x = np.random.random_sample((1000, 1))
test_g = g_func(test_x, 0.07)

In [185]:
test_err = []

for k in range(1, 19):
    X = make_linear_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points

    X = make_linear_X_matrix(test_x, k=k)

    predicted_y = X @ W

    test_err.append(calculate_MSE(test_g, predicted_y))

In [186]:
test_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(test_err),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

test_err_vs_k.show()

#### d

In [187]:
test_err = np.zeros((100, 18), dtype=float)
train_err = np.zeros((100, 18), dtype=float)

for k in range(1, 19):
    for step in range(100):
        x = np.random.random_sample((30, 1))
        g = g_func(x, 0.07)
        X = make_linear_X_matrix(x, k=k)

        W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points
        predicted_y = X @ W

        train_err[step, k - 1] = calculate_MSE(g, predicted_y)

        test_x = np.random.random_sample((1000, 1))
        test_g = g_func(test_x, 0.07)

        X = make_linear_X_matrix(test_x, k=k)

        predicted_y = X @ W

        test_err[step, k - 1] = calculate_MSE(test_g, predicted_y)

In [188]:
train_err_avg = np.average(train_err, axis=0)
test_err_avg = np.average(test_err, axis=0)

In [189]:
train_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(train_err_avg),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

train_err_vs_k.update_layout(title="Ln(train_error) vs K")
train_err_vs_k.show()

In [190]:
test_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(test_err_avg),
        mode="markers",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

test_err_vs_k.show()

### Q3

In [128]:
def make_sin_X_matrix(x, k: int):
    assert assert_np_shape(x, ["n", 1])

    X = np.ones(shape=(x.shape[0], k))

    for col in range(k):
        X[:, col] = np.sin((col + 1) * np.pi * x[:, 0])

    assert assert_np_shape(X, [x.size, k])
    return X


print(make_sin_X_matrix(x, 4).shape)

(30, 4)


#### a

In [130]:
x = np.random.random_sample((30, 1))
g = g_func(x, 0.07)

In [133]:
sin2_points = go.Figure(
    data=go.Scatter(
        x=x[:, 0],
        y=g[:, 0],
        mode="markers",
        name="data point",
        hovertemplate="<br>x=%{x}</br>y=%{y}<extra></extra>",
    )
)

for k in [2, 5, 10, 14, 15, 18]:
    X = make_sin_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # find W using orgnal data points

    pts_x = np.linspace(0, 1, 100, dtype=float).reshape((100, 1))
    pts_X = make_sin_X_matrix(pts_x, k=k)
    pts_y = pts_X @ W

    sin2_points.add_trace(
        go.Scatter(
            x=pts_x[:, 0],
            y=pts_y[:, 0],
            name="k=" + str(k),
            hovertemplate=f"k={k}",
            mode="lines",
        )
    )
sin2_points.update_layout(yaxis_range=[-0.5, 1.5], xaxis_range=[-0.1, 1.1])
sin2_points.show()

In [131]:
train_err = []

for k in range(1, 19):
    X = make_sin_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points

    predicted_y = X @ W

    train_err.append(calculate_MSE(g, predicted_y))

In [132]:
train_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(train_err),
        mode="markers",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

train_err_vs_k.update_layout(title="Ln(train_error) vs K")
train_err_vs_k.show()

#### b

In [135]:
test_x = np.random.random_sample((1000, 1))
test_g = g_func(test_x, 0.07)

In [136]:
test_err = []

for k in range(1, 19):
    X = make_sin_X_matrix(x, k=k)

    W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points

    X = make_sin_X_matrix(test_x, k=k)

    predicted_y = X @ W

    test_err.append(calculate_MSE(test_g, predicted_y))

In [143]:
test_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(test_err),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

test_err_vs_k.update_layout(title="Log(test_err) vs k")

test_err_vs_k.show()

In [144]:
test_err = np.zeros((100, 18), dtype=float)
train_err = np.zeros((100, 18), dtype=float)

for k in range(1, 19):
    for step in range(100):
        x = np.random.random_sample((30, 1))
        g = g_func(x, 0.07)
        X = make_sin_X_matrix(x, k=k)

        W = np.linalg.pinv(X.T @ X) @ X.T @ g  # calculate W using orignal points
        predicted_y = X @ W

        train_err[step, k - 1] = calculate_MSE(g, predicted_y)

        test_x = np.random.random_sample((1000, 1))
        test_g = g_func(test_x, 0.07)

        X = make_sin_X_matrix(test_x, k=k)

        predicted_y = X @ W

        test_err[step, k - 1] = calculate_MSE(test_g, predicted_y)

In [145]:
train_err_avg = np.average(train_err, axis=0)
test_err_avg = np.average(test_err, axis=0)

In [146]:
train_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(train_err_avg),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

train_err_vs_k.update_layout(title="Ln(train_error) vs K")
train_err_vs_k.show()

In [148]:
test_err_vs_k = go.Figure(
    data=go.Scatter(
        x=np.arange(1, 19, 1),
        y=np.log(test_err_avg),
        mode="markers+lines",
        name="data point",
        hovertemplate="<br>k=%{x}</br>y=%{y}<extra></extra>",
    )
)

test_err_vs_k.show()

### K-NN

In [13]:
x = np.random.random(size=(100, 2))
print(x[::20, :], x.shape)

[[0.80224664 0.01671817]
 [0.46352839 0.70010123]
 [0.16514985 0.85272295]
 [0.71622864 0.59354046]
 [0.62479305 0.49346975]] (100, 2)


In [29]:
y = np.random.randint(0, 2, size=(100, 1))

print(y[::20, :], y.shape)

[[0]
 [0]
 [0]
 [0]
 [0]] (100, 1)


In [44]:
import numpy as np
import plotly.graph_objects as go

# Define a color map for binary values of y
color_map = {0: "black", 1: "red"}

# Create a list of colors based on binary values of y
colors = [color_map[val] for val in y[:, 0]]

# Create a scatter plot
scatter = go.Scatter(
    x=x[:, 0],
    y=x[:, 1],
    mode="markers",
    marker=dict(color=colors),
    hovertemplate="<b>X1</b>: %{x:.2f}<br><b>X2</b>: %{y:.2f}<br><extra></extra>",
)

# Create the layout
layout = go.Layout(title="|S|= 100", xaxis=dict(title="x_1"), yaxis=dict(title="x_2"))

# Create the figure and add the scatter trace
fig = go.Figure(data=[scatter], layout=layout)

# Show the plot
fig.show()

In [119]:
test = 4

In [89]:
a = 12

In [90]:
b = 23

In [91]:
c = 34

In [92]:
d = 34

In [93]:
e = 23

In [94]:
f = 23

In [15]:
from nptyping import NDArray, Int, Shape


def test(a: NDArray[Shape["2, 2"], Int]):
    print(a)


test(np.array([1, 1, 2, 3.8, 3]))

[1.  1.  2.  3.8 3. ]


In [18]:
isinstance(np.array([[1, 2], [3, 4]]), NDArray[Shape["2, 2"], Int])

True