In [146]:
import random
import numpy as np
from numpy.linalg import inv

In [132]:
def generate_sine_point():
    '''Generate a random (x, y) pair where y = sin(pi * x), and x lies in [-1, 1].'''
    x = random.uniform(-1, 1)
    y = np.sin(np.pi * x)
    return [x, y]

# Problem 4

In [133]:
NUM_TRIALS = 1000000

slope_sum = 0

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()

    # Find the linear regression weight (slope of the line). The formula below
    # corresponds to linear regression with two points and one parameter.
    slope = (p1[0]*p1[1] + p2[0]*p2[1]) / (p1[0]**2 + p2[0]**2)
    
    slope_sum += slope
    
slope_sum / NUM_TRIALS

1.4295550856546624

# Problem 6

In [138]:
# Variance calculation

NUM_TRIALS = 100000

div_integral_sum = 0

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()

    # Find the linear regression weight (slope of the line). The formula below
    # corresponds to linear regression with two points and one parameter.
    slope = (p1[0]*p1[1] + p2[0]*p2[1]) / (p1[0]**2 + p2[0]**2)
    
    # This is the integral of the squared difference between this fit line
    # and g_bar(x) (from #4), divided by the length of the interval [-1, 1].
    div_integral = (1/3) * (slope - 1.43) ** 2
    
    div_integral_sum += div_integral
    

# Get the average    
div_integral_sum / NUM_TRIALS

0.23603895844768086

# Problem 7

## Choice d : h(x) = ax^2

In [237]:
NUM_TRIALS = 10000000

weights_sum = np.zeros(1)

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()
    
    # Do linear regression using ax^2 model
    X = np.array([[p1[0]**2], [p2[0]**2]])
    Y = np.array([p1[1], p2[1]])
    pseudo_inv = np.dot(inv((np.dot(X.T, X))), X.T)
    weights = np.dot(pseudo_inv, Y)
    
    weights_sum += weights
    
weights_sum / NUM_TRIALS

array([-0.00234729])

In [241]:
NUM_TRIALS = 1000000

var_sum = 0

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()

    # Do linear regression using ax^2 model
    X = np.array([[p1[0]**2], [p2[0]**2]])
    Y = np.array([p1[1], p2[1]])
    pseudo_inv = np.dot(inv((np.dot(X.T, X))), X.T)
    weights = np.dot(pseudo_inv, Y)
    
    div_integral = (1/5) * (weights[0] + .00234729)**2
    var_sum += div_integral
    
var_sum / NUM_TRIALS

21.500791584731559

In [226]:
import matplotlib.pyplot as plt
x = np.linspace(-1, 1, 100)
y = weights[0] * x**2
plt.figure()
plt.plot(x,y)
plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'ro')
plt.axis([-1, 1, -1, 1])
plt.show()

## Choice e : h(x) = ax^2 + b

In [245]:
NUM_TRIALS = 1000000

weights_sum = np.zeros(2)

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()
    
    # Do linear regression using ax^2 + b model
    X = np.array([[1., p1[0]**2], [1., p2[0]**2]])
    Y = np.array([p1[1], p2[1]])
    pseudo_inv = np.dot(inv((np.dot(X.T, X))), X.T)
    weights = np.dot(pseudo_inv, Y)
    
    weights_sum += weights
    
weights_sum / NUM_TRIALS

array([-0.0287591 , -2.17788409])

In [247]:
NUM_TRIALS = 1000000

weights_sum = np.zeros(2)

for i in range(NUM_TRIALS):
    # Generate two random points
    p1 = generate_sine_point()
    p2 = generate_sine_point()
    
    # Do linear regression using ax^2 + b model
    X = np.array([[1., p1[0]**2], [1., p2[0]**2]])
    Y = np.array([p1[1], p2[1]])
    pseudo_inv = np.dot(inv((np.dot(X.T, X))), X.T)
    weights = np.dot(pseudo_inv, Y)
    
    weights_sum += weights
    
weights_sum / NUM_TRIALS

array([-0.24475387,  4.19007489])