PROBLEM 5

In [None]:
import numpy as np

def E(u,v): #define error surface
  return (u * np.exp(v) - 2 * v * np.exp(-u)) ** 2

def grad_E(u,v): #calculate the gradient of E
  chain = u * np.exp(v) - 2 * v * np.exp(-u)
  dE_du = 2 * chain * (np.exp(v) + 2 * v * np.exp(-u)) #partial with respect to u
  dE_dv = 2 * chain * (u * np.exp(v) - 2 * np.exp(-u)) #partial with respect to v
  return dE_du, dE_dv

u, v = 1.0, 1.0 #set u and v
eta = 0.1 #set eta
target_error = 1e-14 #target error is 10^-14
iterations = 0

while (True):
  error = E(u,v)
  if (error < target_error): #break if error is less than the target error
    break
  dE_du, dE_dv = grad_E(u, v) #find the gradient
  u -= eta * dE_du #calculate new u and v values after gradient descent
  v -= eta * dE_dv
  iterations += 1

print(f'Iterations: {iterations}')

Iterations: 10


PROBLEM 6

In [None]:
print(f'(u, v) = {u}, {v}')

(u, v) = 0.04473629039778207, 0.023958714099141746


PROBLEM 7

In [None]:
num_iterations = 15 #15 full iterations or 30 steps
u, v = 1.0, 1.0
eta = 0.1

for _ in range(num_iterations):
  dE_du, dE_dv = grad_E(u, v)
  u -= eta * dE_du #step 1, adjusting u for gradient descent
  error = E(u,v)
  dE_du, dE_dv = grad_E(u, v)
  v -= eta * dE_dv #step 2, adjusting v for gradient descent
  error = E(u,v) #calculate new error

print(f'Error after {num_iterations} iterations: {error}')

Error after 15 iterations: 0.13981379199615315


PROBLEM 8

In [None]:
import numpy as np

N = 100 #number of training points
nu = 0.01 #learning rate nu
epsilon = 0.01 #convergance error
space = (-1, 1)  #define the space for generating points

def generate_line(): #randomly generate line by picking two points
    p1 = np.random.uniform(*space, 2)
    p2 = np.random.uniform(*space, 2)
    slope = (p2[1] - p1[1]) / (p2[0] - p1[0]) if p2[0] != p1[0] else np.inf
    b = p1[1] - slope * p1[0]
    return slope, b

def target_function(x, slope, b): #find the correct sign/label for points
    return 1 if x[1] > (slope * x[0] + b) else -1

def generate_training_data(N, slope, b): #randomly generate training data
    data = []
    for _ in range(N):
        x = np.random.uniform(-1, 1, 2)
        y = target_function(x, slope, b) #label the points correctly
        data.append((x, y))
    return data

#calculate the gradient: -(y_n * x_n)/(1 + e^{-y_n * w^T * x_n})
def gradient(s, y, w):
    vec = [y * 1.0] + [y * x * 1.0 for x in s]
    x_n = np.array([1] + list(s))  #include the bias term
    new_w = np.array(w) #weight vector
    d = (1.0 + np.exp(y * np.dot(x_n, new_w)))  #denominator of gradient
    return [-1.0 * x / d for x in vec]

def converged(v1, v2, epsilon): #test if weights converged
    return np.linalg.norm(np.array(v2) - np.array(v1)) < epsilon

#function to update weights: w(t+1) = w(t) - nu * gradient
def update_weights(w, g, nu):
    return [w[i] - nu * g[i] for i in range(len(w))]

#function to perform stochastic gradient descent
def sgd(data, epsilon, nu):
    w = [0] * (len(data[0][0]) + 1)  #including bias term
    converge = False
    epochs = 0
    idx_order = list(range(len(data)))

    while not converge:
        old_w = np.copy(w)
        np.random.shuffle(idx_order)  #shuffle the index order
        for i in idx_order:
            s = data[i][0]
            y = data[i][1]
            grad = gradient(s, y, w)
            w = update_weights(w, grad, nu)
        epochs += 1
        converge = converged(w, old_w, epsilon)
    return epochs, w

#calculate cross entropy error: log(1 + e^{-y_n * w^T * x_n})
def cross_entropy_error(sample, y, w):
    x_n = np.array([1] + list(sample))  #include the bias term
    new_w = np.array(w)
    return np.log(1.0 + np.exp(-y * np.dot(x_n, new_w)))

#calculate average E_out: 1/N * sum cross entropy error
def compute_E_out(data, w):
    ce = 0
    for d in data:
        x, y = d[0], d[1]
        ce += cross_entropy_error(x, y, w)
    return ce / len(data)

total_epochs = 0
total_E_out = 0
runs = 100 #avg over 100 runs

for _ in range(runs): #run over 100 runs
    slope, b = generate_line()
    training_data = generate_training_data(N, slope, b)
    epochs, w = sgd(training_data, epsilon, nu)
    out_sample_data = generate_training_data(10000, slope, b)
    E_out = compute_E_out(out_sample_data, w)
    total_epochs += epochs
    total_E_out += E_out

avg_epochs = total_epochs / runs #find avg number of epochs (prob 9)
avg_E_out = total_E_out / runs #find avg E_out result over 100 runs (prob 8)
print(f'Avg epochs: {avg_epochs}')
print(f'Avg E_out: {avg_E_out}')


[(array([0.02349469, 0.4328599 ]), 1), (array([0.42950064, 0.78801388]), 1), (array([ 0.40329629, -0.99085697]), 1), (array([0.33826029, 0.08682359]), 1), (array([0.13131723, 0.90477883]), 1), (array([0.78433276, 0.95531496]), 1), (array([-0.68512118,  0.32896758]), -1), (array([-0.95845166,  0.70130455]), -1), (array([-0.05376457, -0.42150367]), -1), (array([0.20566899, 0.27994551]), 1), (array([ 0.04955063, -0.88381012]), -1), (array([-0.46991512,  0.08771095]), -1), (array([-0.98152041, -0.30138969]), -1), (array([0.4077043 , 0.51226894]), 1), (array([ 0.99744233, -0.39556814]), 1), (array([-0.92360277,  0.36401419]), -1), (array([-0.95079624,  0.771999  ]), -1), (array([0.84447429, 0.58260253]), 1), (array([ 0.97052497, -0.75314282]), 1), (array([-0.17419443,  0.55930534]), 1), (array([0.62538867, 0.11302967]), 1), (array([-0.24124706,  0.14970595]), -1), (array([-0.4594264 ,  0.51614698]), -1), (array([ 0.74572417, -0.49623519]), 1), (array([0.63729488, 0.19354368]), 1), (array([-

KeyboardInterrupt: 

In [None]:
from os import sched_getscheduler
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import log_loss

space = [-1, 1]
N = 100 #number of training points
iterations = 100
nu = 0.01 #learning rate
test_size = 10000
num_epochs = 100


def generate_line():
  p1 = np.random.uniform(*space, 2)
  p2 = np.random.uniform(*space, 2)
  slope = (p2[1] - p1[1]) / (p2[0] - p1[0]) if p2[0] != p1[0] else np.inf
  b = p1[1] - slope * p1[0]
  return lambda x: slope * x + b

def prepare_data(N, target_function):
  def target(point):
      return target_function(point[0])

  X = []
  Y = []
  for i in range(N):
    new_pt = np.random.uniform(*space, 2)  #generate random points
    y = target(new_pt)
    difference = y - new_pt[1]

    X.append(new_pt)
    Y.append(difference)

  targets = np.sign(Y)  #convert differences to -1 or 1
  return X, targets

def sgd(X, y, nu, num_epochs):
  model = SGDClassifier(loss = "log_loss", learning_rate="constant", eta0=nu, max_iter=1, warm_start=True)
  for epoch in range(num_epochs):
    model.partial_fit(X, y, classes=np.array([-1, 1]))
  return model

def calc_e_out(model, test_size, target_function):
  X_test, y_test = prepare_data(test_size, target_function)
  y_prob = model.predict_proba(X_test)
  return log_loss(y_test, y_prob)


e_out_total = 0
target_function = generate_line()
for run in range(iterations):
  X_train, y_train = prepare_data(N, target_function)
  model = sgd(X_train, y_train, nu, num_epochs)
  e_out = calc_e_out(model, test_size, target_function)
  e_out_total += e_out

avg_e_out = e_out_total / iterations

print(f'Average E_out: {avg_e_out}')

Average E_out: 0.16323845994612346


In [None]:
import numpy as np

# Define the space for random sampling
space = [-1, 1]

# Function to define a random boundary line
def generate_line():
    p1 = np.random.uniform(*space, 2)
    p2 = np.random.uniform(*space, 2)
    slope = (p2[1] - p1[1]) / (p2[0] - p1[0]) if p2[0] != p1[0] else np.inf
    b = p1[1] - slope * p1[0]
    return lambda x: slope * x + b

# Prepare training data based on the generated line
def prepare_data(N, target_function):
    X = []
    Y = []
    for _ in range(N):
        new_pt = np.random.uniform(*space, 2)
        y = target_function(new_pt[0])
        difference = y - new_pt[1]
        X.append(new_pt)
        Y.append(difference)

    targets = np.sign(Y)
    return np.array(X), targets

# Logistic function
def logistic_func(z):
    """Logistic function."""
    return 1 / (1 + np.exp(-z))

#train logistic regression using stochastic gradient descent
def train_logistic_regression(X, y, learning_rate=0.01, tol=0.01, max_iter=1000):
    weights = np.zeros(X.shape[1])
    N = len(y)

    for epoch in range(max_iter):
        weights_prev = weights.copy()
        #random permutation of indices for SGD
        indices = np.random.permutation(N)
        for i in indices:
            z = np.dot(X[i], weights)
            prediction = logistic_func(z)
            error = y[i] - prediction
            weights += learning_rate * error * X[i]  #update weights

        #check for convergence
        if np.linalg.norm(weights - weights_prev) < tol:
            break

    return weights

#calculate cross-entropy loss
def cross_entropy_loss(X, y, weights):
    predictions = logistic_func(np.dot(X, weights))
    return -np.mean(y * np.log(predictions + 1e-15) + (1 - y) * np.log(1 - predictions + 1e-15))

# Evaluate model performance on a separate test dataset
def evaluate_model(weights, num_test_points=1000, target_function=None):
    X_test = np.random.uniform(*space, (num_test_points, 2))
    #generate test labels based on the target function
    Y_test = np.sign(target_function(X_test[:, 0]) - X_test[:, 1])
    predictions = np.where(logistic_func(np.dot(X_test, weights)) >= 0.5, 1, -1)
    return np.mean(predictions != Y_test)  #error rate


N = 100  #number of training points
num_runs = 100  #number of experiments
errors = []

target_function = generate_line()
for _ in range(num_runs):
    X, Y = prepare_data(N, target_function)
    weights = train_logistic_regression(X, Y)  #train the model
    E_out = evaluate_model(weights, target_function=target_function)  #evaluate the model
    errors.append(E_out)

average_error = np.mean(errors)
print(f"Average E_out over {num_runs} runs: {average_error:.6f}")


Average E_out over 100 runs: 0.152280


In [None]:
import numpy as np

# Define the space for random sampling
space = [-1, 1]

def generate_line():
    """Generate a line based on two random points."""
    p1 = np.random.uniform(*space, 2)
    p2 = np.random.uniform(*space, 2)
    slope = (p2[1] - p1[1]) / (p2[0] - p1[0]) if p2[0] != p1[0] else np.inf
    b = p1[1] - slope * p1[0]
    return lambda x: slope * x + b

def prepare_data(N, target_function):
    """Prepare random data points and their corresponding labels."""
    X = []
    Y = []
    for _ in range(N):
        new_pt = np.random.uniform(*space, 2)  # Generate random points
        y = target_function(new_pt[0])  # Get the y-value from the target function
        difference = y - new_pt[1]  # Compute difference from line
        X.append(new_pt)
        Y.append(difference)

    targets = np.sign(Y)  # Convert differences to -1 or 1
    return np.array(X), targets

def logistic_func(z):
    """Logistic function."""
    return 1 / (1 + np.exp(-z))

def train_logistic_regression(X, y, learning_rate=0.01, tol=0.01, max_iter=10000):
    """Train logistic regression using Stochastic Gradient Descent."""
    weights = np.zeros(X.shape[1])  # Initialize weights
    N = len(y)  # Number of samples
    epoch_count = 0  # To count the number of epochs

    for epoch in range(max_iter):
        epoch_count += 1
        # Random permutation of indices
        indices = np.random.permutation(N)
        for i in indices:
            z = np.dot(X[i], weights)  # Compute linear combination
            prediction = logistic_func(z)  # Apply logistic function
            error = y[i] - prediction  # Calculate error
            weights += learning_rate * error * X[i]  # Update weights

        # Check for convergence
        if np.linalg.norm(weights) < tol:
            break

    return epoch_count

def main(num_runs=10, N=100):
    """Run multiple experiments to measure average epochs for convergence."""
    epoch_counts = []

    for _ in range(num_runs):
        target_function = generate_line()  # Generate a new target line
        X, Y = prepare_data(N, target_function)  # Prepare data
        epochs = train_logistic_regression(X, Y)  # Train the model
        print(epochs)
        epoch_counts.append(epochs)  # Store the epoch count for this run

    average_epochs = np.mean(epoch_counts)  # Calculate average epochs
    print(f"Average number of epochs for convergence: {average_epochs:.2f}")

# Run the main function
main()


  return 1 / (1 + np.exp(-z))


10000


KeyboardInterrupt: 