In [None]:
# Question: 1
# Split (speaker-disjoint, stratified)

# Train: 60 speakers

# Validation (Dev-GEN): 20 speakers

# Test (Test-GEN): 20 speakers

# (Any 60/20/20 split is fine; choose speakers so the phone inventory and demographics are balanced across splits.)

# If you have session metadata (5 days/person), keep all days of each speaker together within a split to avoid session leakage (same mic/room/noise profile).

# Sampling & balance

# Within each split, ensure phone coverage is adequate (e.g., class-balanced sampling or loss reweighting for the 44 phones).

# Use speaker-balanced batching during training (draw mini-batches across many speakers, not dominated by a few).

# Leakage controls

# Compute any global transforms (e.g., feature normalization, CMVN, PCA) on train only.

# Hyperparameter selection and early stopping use Dev-GEN only.

# Final numbers reported on Test-GEN once.

In [None]:
# Question: 2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# Points
pos = np.array([[1,2],[1,4],[5,4]])
neg = np.array([[3,1],[3,2]])

# Plot
plt.figure(figsize=(6,5))
plt.scatter(pos[:,0], pos[:,1], color='blue', label='Positive (+)')
plt.scatter(neg[:,0], neg[:,1], color='red', label='Negative (-)')

# Annotate
for i, p in enumerate(pos): plt.text(p[0]+0.1,p[1],'P'+str(i+1))
for i, n in enumerate(neg): plt.text(n[0]+0.1,n[1],'N'+str(i+1))

plt.title("1-NN decision regions (conceptual)")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.legend()
plt.grid(True)
plt.show()

from sklearn.preprocessing import MinMaxScaler
from scipy.spatial import distance

X = np.array([
    [100,2],[100,4],[500,4],[300,1],[300,2]   # training
])
y = np.array(['+','+','+','-','-'])
query = np.array([[500,1]])

# Distances before scaling
dist_raw = [distance.euclidean(query[0],x) for x in X]
print("Nearest (before scaling):", y[np.argmin(dist_raw)])

# Scale [0,1] per feature
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
query_scaled = scaler.transform(query)

# Distances after scaling
dist_scaled = [distance.euclidean(query_scaled[0],x) for x in X_scaled]
print("Nearest (after scaling):", y[np.argmin(dist_scaled)])

# Example: distance using only observed features
def masked_distance(a, b):
    mask = ~np.isnan(a) & ~np.isnan(b)
    if mask.sum() == 0: return np.inf
    return np.linalg.norm(a[mask]-b[mask]) / np.sqrt(mask.sum())

test = np.array([np.nan, 2])  # missing first feature
for xi, label in zip(X, y):
    print(label, masked_distance(test, xi))

# Training data
pos = np.array([[1,2], [1,4], [5,4]])
neg = np.array([[3,1], [3,2]])
X = np.vstack([pos, neg])
y = np.hstack([np.ones(len(pos)), -np.ones(len(neg))])

# 1-NN classifier
knn = KNeighborsClassifier(n_neighbors=1, metric="euclidean")
knn.fit(X, y)

# Grid for decision regions
x_min, x_max = X[:,0].min()-1, X[:,0].max()+1
y_min, y_max = X[:,1].min()-1, X[:,1].max()+1
xx, yy = np.meshgrid(
    np.linspace(x_min, x_max, 400),
    np.linspace(y_min, y_max, 400)
)
Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]).reshape(xx.shape)

# Plot (one figure; matplotlib only; default colors)
plt.figure(figsize=(6,5))
plt.contourf(xx, yy, Z, alpha=0.25)                 # decision regions
plt.scatter(pos[:,0], pos[:,1], marker='o', label='Positive (+)')
plt.scatter(neg[:,0], neg[:,1], marker='x', label='Negative (-)')
plt.xlabel("x1"); plt.ylabel("x2"); plt.legend(); plt.grid(True)
plt.title("1-NN decision boundary (true regions)")
plt.show()

# Try a few new points visually / numerically
new_pts = np.array([[0.5,2.0],[3.0,1.6],[4.8,3.9]])
preds = knn.predict(new_pts)
list(zip(map(tuple,new_pts), preds))


In [None]:
# Question: 3
##1
# Evaluating h(x) on each (x, y) in D_TR and D_TE will give predictions
# that can be compared to the true labels to compute empirical errors.
#
# However, this evaluation is not strictly necessary to *know*
# that test error will likely be higher.
#
# Reasoning:
# - The Perceptron is trained specifically to minimize training mistakes.
# - Therefore, its performance (accuracy) on D_TR will almost always be
#   better than or equal to its performance on D_TE.
# - So even before testing, we know:
#        Expected(Test_Error) ≥ Expected(Training_Error)
#
# Still, computing h(x) for both sets is useful for *quantitative comparison*,
# i.e., measuring the actual error gap.
#
# Example (conceptual):
#    train_pred = sign(w @ x_train.T)
#    test_pred  = sign(w @ x_test.T)
#    train_error = np.mean(train_pred != y_train)
#    test_error  = np.mean(test_pred  != y_test)
#    print("Train error:", train_error, "Test error:", test_error)

##2
# The Perceptron algorithm updates weights only when a training sample
# is *misclassified*, i.e., when y_i * (w · x_i) ≤ 0.
#
# Pseudocode reminder:
#    for (x_i, y_i) in D_TR:
#        if y_i * (w · x_i) ≤ 0:
#            w ← w + y_i * x_i
#
# This means:
# - Each update fixes a mistake.
# - The algorithm continues looping until it finds *no misclassified* examples.
# - If data is linearly separable → convergence → zero training error.
#
# Therefore, the final Perceptron satisfies:
#       y_i * (w · x_i) > 0   for all i in D_TR
#       → Training_Error = 0
#
# No need to explicitly compute it — the algorithm’s stopping condition
# already guarantees that the final model classifies all training examples
# correctly.
#
# (If the data is *not* linearly separable, the Perceptron never converges,
#  and training error does not reach zero — in that case, one monitors the
#  number of mistakes per epoch instead.)

In [None]:
# Question: 4
import numpy as np

# Each row: (x, y, count)
updates = [
    (np.array([0, 0, 0, 0, 4]), +1, 2),
    (np.array([0, 0, 6, 5, 0]), +1, 1),
    (np.array([3, 0, 0, 0, 0]), -1, 1),
    (np.array([0, 9, 3, 6, 0]), -1, 1),
    (np.array([0, 1, 0, 2, 5]), -1, 1),
]

w = np.zeros(5, dtype=int)

# Accumulate all counted updates
for x, y, c in updates:
    w += c * y * x

print("Final weight vector w =", tuple(w))

In [None]:
# Qauestion: 5
# Perceptron convergence demo with step-by-step boundary visualization.
# This cell generates:
# 1) A scatter plot of the dataset
# 2) An animated GIF of the decision boundary after each perceptron update
# 3) First and final boundary snapshots as static figures
#
# Notes for grading:
# - matplotlib only (no seaborn)
# - one chart per figure
# - no explicit color selections; we distinguish classes by markers
#
# Output files saved under /mnt/data/

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation, PillowWriter
from pathlib import Path

rng = np.random.default_rng(0)

# -----------------------
# 1) Create a tiny separable 2D dataset
# -----------------------
# Positive cluster centered near (2, 2), negative near (-2, -2)
pos = rng.normal(loc=(2.0, 2.0), scale=0.6, size=(12, 2))
neg = rng.normal(loc=(-2.0, -2.0), scale=0.6, size=(12, 2))

X = np.vstack([pos, neg])
y = np.hstack([np.ones(len(pos)), -np.ones(len(neg))])  # labels in {+1, -1}

# Shuffle to simulate online learning
perm = rng.permutation(len(X))
X, y = X[perm], y[perm]

# -----------------------
# 2) Perceptron training (online). Track states after every update.
# -----------------------
def perceptron_train(X, y, max_epochs=20):
    """
    Standard online perceptron with bias. 
    Update w, b whenever y_i * (w·x_i + b) <= 0.
    Returns a history list of (w, b) *after each update*.
    """
    w = np.zeros(X.shape[1], dtype=float)  # 2D
    b = 0.0
    history = [(w.copy(), b)]
    for epoch in range(max_epochs):
        mistakes = 0
        for xi, yi in zip(X, y):
            margin = yi * (np.dot(w, xi) + b)
            if margin <= 0:
                w += yi * xi
                b += yi
                mistakes += 1
                history.append((w.copy(), b))
        if mistakes == 0:  # converged
            break
    return w, b, history

w_final, b_final, hist = perceptron_train(X, y, max_epochs=50)

# -----------------------
# 3) Helper to plot data and boundary for a given (w, b)
# -----------------------
def plot_boundary(ax, w, b, xlim=(-4, 4), ylim=(-4, 4), title=None):
    ax.scatter(pos[:,0], pos[:,1], marker='o', label='Positive (+)')
    ax.scatter(neg[:,0], neg[:,1], marker='x', label='Negative (-)')
    ax.set_xlim(*xlim)
    ax.set_ylim(*ylim)
    ax.set_xlabel("x1")
    ax.set_ylabel("x2")
    ax.grid(True)
    if w[1] != 0:
        xs = np.linspace(xlim[0], xlim[1], 200)
        ys = -(w[0]*xs + b)/w[1]
        ax.plot(xs, ys, linestyle='-')
    else:
        # vertical line x = -b/w1 (if w1 != 0)
        if w[0] != 0:
            xline = -b/w[0]
            ax.axvline(xline, linestyle='-')
    ax.legend(loc='best')
    if title:
        ax.set_title(title)

# -----------------------
# 4) Scatter plot of raw data
# -----------------------
plt.figure()
ax = plt.gca()
plot_boundary(ax, np.array([1.0, 0.0]), 0.0, title="Dataset (reference line only)")
plt.show()

# -----------------------
# 5) Animation: boundary after each update
# -----------------------
frames_dir = Path("/mnt/data/perceptron_frames")
frames_dir.mkdir(parents=True, exist_ok=True)

fig = plt.figure()
ax = plt.gca()

def init():
    ax.clear()
    plot_boundary(ax, hist[0][0], hist[0][1], title="Perceptron boundary (update 0)")
    return (ax,)

def update(frame_idx):
    ax.clear()
    w,b = hist[frame_idx]
    plot_boundary(ax, w, b, title=f"Perceptron boundary (update {frame_idx})")
    return (ax,)

anim = FuncAnimation(fig, update, frames=len(hist), init_func=init, blit=False, interval=700, repeat=False)

gif_path = Path("/mnt/data/perceptron_convergence.gif")
anim.save(gif_path, writer=PillowWriter(fps=1))

plt.close(fig)  # close the animation figure to avoid extra blank output

# -----------------------
# 6) First and final boundary snapshots as static figures
# -----------------------
plt.figure()
ax = plt.gca()
plot_boundary(ax, hist[0][0], hist[0][1], title="Start (update 0)")
plt.show()

plt.figure()
ax = plt.gca()
plot_boundary(ax, w_final, b_final, title="Converged separator (final)")
plt.show()

(gif_path.as_posix(), len(hist), (w_final, b_final))
Result
('/mnt/data/perceptron_convergence.gif',
 2,
 (array([1.9228792 , 2.81987808]), 1.0))