In [40]:
import random
import numpy as np
import math
import pandas as pd
from sklearn.metrics import mean_squared_error
import numdifftools as nd
import inspect
from matplotlib import pyplot as plt
import statistics

# (A)

Assumptions:
- X distributed uniformly from (0,1) INCLUSIVE

In [41]:
def getData(N, var=1.0):
    
    if(N<1 or var<0):
        return "ERROR"
    
    
    xes = []
    yes = []
    zes = []
    
    for i in range(N):
        x = random.uniform(0, 1)
        z = np.random.normal(loc=0.0, scale=math.sqrt(var))
        y = math.cos(2*math.pi*x)+z
        
        xes.append(x)
        yes.append(y)
        #zes.append(z)
        
    #df = pd.DataFrame(list(zip(xes,yes,zes)), columns =['X', 'Y','Z'])
    df = pd.DataFrame(list(zip(xes,yes)), columns =['X', 'Y'])
    
    return df
        

In [42]:
# CHECK

myDf = getData(10,2.0)

myDf

Unnamed: 0,X,Y
0,0.349532,-1.283764
1,0.468292,-0.753571
2,0.018244,0.482211
3,0.937028,1.055965
4,0.498127,-0.745031
5,0.825555,0.842701
6,0.814513,0.675893
7,0.16883,1.641141
8,0.876287,0.634693
9,0.925371,3.069076


# (B)

In [43]:
def getPolynomialY(poly_coefs, x):
    
    y = 0
    
    for i in range(len(poly_coefs)):
        y += poly_coefs[i] * (x**i)
        
    return y

In [44]:
def getPrediction(coefs, xes):
    
    y_preds = []
    
    for x in xes:
        y_preds.append(getPolynomialY(coefs,x))
        
    return y_preds

In [45]:
# CHECK

# y = 2 + 3x + (-5)x^2 + 0x^3 + 6x^4
# Online Calculator: (2,84) , (-0.1, 1.6506)

print("x = 2, y =(true) 84 =(pred) ", getPolynomialY([2, 3, -5, 0, 6], 2))
print("x = (-0.1), y =(true) 1.6506 =(pred) ", getPolynomialY([2, 3, -5, 0, 6], -0.1))

x = 2, y =(true) 84 =(pred)  84
x = (-0.1), y =(true) 1.6506 =(pred)  1.6505999999999998


In [46]:
def getMSE(df, poly_coefs):
    
    N = len(df['X'])
    
    y_preds = getPrediction(poly_coefs, df['X'])
        
    y_residual = df['Y'] - y_preds
    
    return np.sum(np.dot(y_residual.T , y_residual)) / len(df['Y'] - y_residual)

In [47]:
# CHECK

Y_pred = []

for i in range(len(myDf['X'])):
    Y_pred.append( getPolynomialY([1,2,3], myDf['X'][i]) )

print(getMSE(myDf,[1,2,3]), "(pred)=(true)", mean_squared_error(myDf['Y'],Y_pred))

11.022662706627695 (pred)=(true) 11.022662706627695


# (C)

In [48]:
def gradientDescent(theta, df, d, learning_rate, epochs, details=False):
    
    N = len(df['X'])
    
    # UPDATE THETA EPOCHS TIMES ______________________________
    
    for k in range(epochs):
        
        y_preds = getPrediction(theta, df['X'])
        
        derivate = (np.dot( (y_preds - df['Y'] ), df['X'] ) * 2) / len(df['Y'])
        
        theta = theta - learning_rate * derivate
        
        if details: print("Epoch: "+str(k)+", Coefs: ", theta, "MSE: "+ str(round(getMSE(df, theta), 5)))
        
    return theta
    
#coefs = gradientDescent(df = myDf, d = 2, epochs = 500, details=True)

In [49]:
def stochasticGradientDescent(theta, df, d, learning_rate, epochs, details=False):
        
    N = len(df['X'])
    
    # UPDATE THETA EPOCHS TIMES ______________________________
    
    for k in range(epochs):
        
        randomindex = random.randint(0, N-1)
        X = df['X'][randomindex]
        Y = df['Y'][randomindex]
        
        y_preds = getPolynomialY(theta, X)
        
        derivate = (( (y_preds - Y ) * X ) * 2)
        
        theta = theta - learning_rate * derivate
        
        if details: print("Epoch: "+str(k)+", Coefs: ", theta, "MSE: "+ str(round(getMSE(df, theta), 5)))
        
    return theta
    
#coefs = stochasticGradientDescent(df = myDf, d = 2, epochs = 1500, details=True)

In [50]:
def miniBatchStochasticGradientDescent(theta, df, d, learning_rate, mini_batch_size, epochs, details=False):
    
    N = len(df['X'])
        
    # UPDATE THETA EPOCHS TIMES ______________________________
    
    for k in range(epochs):
        
        batchIndices = random.sample(range(0, N), mini_batch_size)
            
        X = df['X'][batchIndices]
        Y = df['Y'][batchIndices]
        
        y_preds = getPrediction(theta, X)
        
        derivate = (np.dot( (y_preds - Y ), X ) * 2) / len(Y)
        
        theta = theta - learning_rate * derivate
        
        if details: print("Epoch: "+str(k)+", Coefs: ", theta, "MSE: "+ str(round(getMSE(df, theta), 5)))
        
    return theta
    
# coefs = miniBatchStochasticGradientDescent(df = myDf, d = 2, mini_batch_size = 2, epochs = 500, details=True)

In [51]:
def Learn_Coefs(df, d, gd_type, learning_rate, mini_batch_size, epochs, details=False):
        
    theta = []
    
    # START FROM A RANDOM NON-ZERO THETA _____________________
    
    for i in range(d+1):
        num = 0
        while num==0:
            num = random.randint(-15.0,15)
        theta.append(num)
    
    if details: print("Random Coefs: ", theta, "MSE: "+ str(round(getMSE(df, theta), 5)))
        
    if (gd_type == 'GD'):
        
        estim_poly_coefs = gradientDescent(theta, df, d, learning_rate, epochs, details)
    
    elif (gd_type == 'SGD'):
        
        estim_poly_coefs = stochasticGradientDescent(theta, df, d, learning_rate, epochs, details)
    
    elif (gd_type == 'mini-batched-SGD'):
        
        estim_poly_coefs = miniBatchStochasticGradientDescent(theta, df, d, learning_rate, mini_batch_size, epochs, details)
    else:
        return "ERROR"
    
    return estim_poly_coefs
    

In [52]:
def fitData(df, d, var, gd_type='GD', learning_rate=0.01, mini_batch_size = 10, epochs = 1500, testing_size=2000, details=False):

    # (1)
    estim_poly_coefs = Learn_Coefs(df, d, gd_type, learning_rate, mini_batch_size, epochs, details)
    
    # (2)
    E_in = getMSE(df, estim_poly_coefs)
    
    #(3)
    E_out = getMSE(getData(testing_size, var), estim_poly_coefs)
        
    return (estim_poly_coefs, E_in, E_out)

In [53]:
# Check

# GD
fitData(df=myDf, d=2, gd_type='GD', learning_rate=0.01, testing_size=2000, epochs = 10, var=2.0, details=True)

Random Coefs:  [1, 12, -11] MSE: 10.04324
Epoch: 0, Coefs:  [  0.97117851  11.97117851 -11.02882149] MSE: 9.76038
Epoch: 1, Coefs:  [  0.94316474  11.94316474 -11.05683526] MSE: 9.49272
Epoch: 2, Coefs:  [  0.91593603  11.91593603 -11.08406397] MSE: 9.23942
Epoch: 3, Coefs:  [  0.8894704  11.8894704 -11.1105296] MSE: 8.9997
Epoch: 4, Coefs:  [  0.86374645  11.86374645 -11.13625355] MSE: 8.77282
Epoch: 5, Coefs:  [  0.8387434  11.8387434 -11.1612566] MSE: 8.55809
Epoch: 6, Coefs:  [  0.81444106  11.81444106 -11.18555894] MSE: 8.35484
Epoch: 7, Coefs:  [  0.79081977  11.79081977 -11.20918023] MSE: 8.16245
Epoch: 8, Coefs:  [  0.76786046  11.76786046 -11.23213954] MSE: 7.98032
Epoch: 9, Coefs:  [  0.74554457  11.74554457 -11.25445543] MSE: 7.80792


(array([  0.74554457,  11.74554457, -11.25445543]),
 7.807915191000899,
 12.731921818456648)

In [54]:
# Check

# SGD
fitData(df=myDf, d=2, gd_type='SGD', learning_rate=0.01, testing_size=2000, epochs = 10, var=2.0, details=True)

Random Coefs:  [-3, 14, 8] MSE: 112.2669
Epoch: 0, Coefs:  [-2.99882358 14.00117642  8.00117642] MSE: 112.31636
Epoch: 1, Coefs:  [-3.05563714 13.94436286  7.94436286] MSE: 109.9421
Epoch: 2, Coefs:  [-3.05444004 13.94555996  7.94555996] MSE: 109.99182
Epoch: 3, Coefs:  [-3.04729963 13.95270037  7.95270037] MSE: 110.28868
Epoch: 4, Coefs:  [-3.34627646 13.65372354  7.65372354] MSE: 98.25676
Epoch: 5, Coefs:  [-3.33795618 13.66204382  7.66204382] MSE: 98.58057
Epoch: 6, Coefs:  [-3.57478717 13.42521283  7.42521283] MSE: 89.61047
Epoch: 7, Coefs:  [-3.59791691 13.40208309  7.40208309] MSE: 88.76185
Epoch: 8, Coefs:  [-3.65430226 13.34569774  7.34569774] MSE: 86.71354
Epoch: 9, Coefs:  [-3.70075551 13.29924449  7.29924449] MSE: 85.04782


(array([-3.70075551, 13.29924449,  7.29924449]),
 85.04781686249831,
 66.45557822808877)

In [55]:
# Check

# Mini-Batch SGD
fitData(df=myDf, d=2, gd_type='mini-batched-SGD', learning_rate=0.01, mini_batch_size=2, epochs = 10, testing_size=2000, var=2.0, details=True)

Random Coefs:  [12, -3, -7] MSE: 58.9354
Epoch: 0, Coefs:  [11.96845449 -3.03154551 -7.03154551] MSE: 58.25615
Epoch: 1, Coefs:  [11.95013631 -3.04986369 -7.04986369] MSE: 57.86588
Epoch: 2, Coefs:  [11.87937032 -3.12062968 -7.12062968] MSE: 56.38696
Epoch: 3, Coefs:  [11.83727586 -3.16272414 -7.16272414] MSE: 55.52892
Epoch: 4, Coefs:  [11.80399222 -3.19600778 -7.19600778] MSE: 54.86191
Epoch: 5, Coefs:  [11.74368416 -3.25631584 -7.25631584] MSE: 53.67908
Epoch: 6, Coefs:  [11.69083449 -3.30916551 -7.30916551] MSE: 52.66981
Epoch: 7, Coefs:  [11.66292204 -3.33707796 -7.33707796] MSE: 52.14705
Epoch: 8, Coefs:  [11.59075967 -3.40924033 -7.40924033] MSE: 50.82849
Epoch: 9, Coefs:  [11.53145934 -3.46854066 -7.46854066] MSE: 49.7805


(array([11.53145934, -3.46854066, -7.46854066]),
 49.78050483672055,
 66.24530823266197)

# (D)

In [57]:
def experiment(N, d, var, M=50, details=False):
    
    training_data_df = getData(N,var)
    
    polynomials = []
    E_ins = []
    E_outs = []
    
    if details: print("_____________________________________________________________________ START TRIALS\n")
    
    for i in range(M):
        if details: print("______________________________________ M="+str(i))
        (p,ein,eout) = fitData(training_data_df, d, var, gd_type='GD', learning_rate=0.01, epochs=1500, testing_size=2000, details=False)
        polynomials.append(p)
        E_ins.append(ein)
        E_outs.append(eout)
        
        if details: print("FINAL MODEL: Coefs: ", p, " E_in: "+ str(ein)+" E_out: "+str(eout))
        
    if details: print("\n_____________________________________________________________________ AVERAGE POLYNOMIALS\n")
    
    E_in_bar = statistics.mean(E_ins)
    E_out_bar = statistics.mean(E_outs)
    
    average_polynomial = []
    transpose_coefs = np.array(polynomials).T.tolist()
    
    for i in range(d+1):
        average_polynomial.append(statistics.mean(transpose_coefs[i]))
        
    if details: print("AVERAGE MODEL: Coefs: ", average_polynomial, " E_in_bar: "+ str(round(E_in_bar,5))+" E_out_bar: "+str(round(E_out_bar,5)))
        
        
    if details: print("\n_____________________________________________________________________ TESTING\n")
        
    testing_data_df = getData(2000, var)
    
    E_bias = getMSE(testing_data_df, average_polynomial)
    
    if details: print("E_bias: "+str(E_bias))
    
    return (E_in_bar, E_out_bar, E_bias)

experiment(200, 2, 1.0, 50, True)

_____________________________________________________________________ START TRIALS

______________________________________ M=0
FINAL MODEL: Coefs:  [ 7.06557929 -4.93442071 -7.93442071]  E_in: 20.438940593213687 E_out: 20.631228281244933
______________________________________ M=1
FINAL MODEL: Coefs:  [-0.65029208  1.34970792 -0.65029208]  E_in: 1.4973059788885983 E_out: 1.5820899234987582
______________________________________ M=2
FINAL MODEL: Coefs:  [-0.7218937 -0.7218937  2.2781063]  E_in: 1.4180182714698844 E_out: 1.5711783512906805
______________________________________ M=3
FINAL MODEL: Coefs:  [ 3.40009509 -2.59990491 -3.59990491]  E_in: 6.156283381369592 E_out: 6.092923771694247
______________________________________ M=4
FINAL MODEL: Coefs:  [  9.45332247 -15.54667753   1.45332247]  E_in: 21.99488149916926 E_out: 21.87385832924212
______________________________________ M=5
FINAL MODEL: Coefs:  [-5.08558223  6.91441777  0.91441777]  E_in: 7.1585786577753865 E_out: 8.0813824873614

(11.679086341675387, 12.192989589483538, 1.4490109103204705)

# (E)

In [None]:
Nes = [2,5,10,20,50,100,200]
Des = [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20]
Vares = [0.01, 0.1, 1]