### 2. Linear Regression

### 3. Ridge Regression

In [2]:
import numpy as np

#### 3.1

In [90]:
class L2NormPenaltyNode(object):
    """ Node computing l2_reg * ||w||^2 for scalars l2_reg and vector w"""
    def __init__(self, l2_reg, w, node_name):
        """ 
        Parameters:
        l2_reg: a scalar value >=0 (not a node)
        w: a node for which w.out is a numpy vector
        node_name: node's name (a string)
        """
        self.node_name = node_name
        self.out = None
        self.d_out = None
        self.l2_reg = np.array(l2_reg)
        self.w = w
        self.w_2n=None #2 norm of w
        
        ## TODO
    def forward(self):
        self.w_2n=np.dot(self.w.out,self.w.out)
        self.out=self.l2_reg*self.w_2n
        self.d_out=np.zeros(self.out.shape)
        return(self.out)

    def backward(self):
        #d_l2=self.d_out*self.w_2n
        d_w=2*self.d_out*self.l2_reg*self.w.out
        #self.l2_reg.d_out+=d_l2 #This should not be used
        self.w.d_out+=d_w
        return(self.d_out)

    def get_predecessors(self):
        return([self.w])

#### 3.2

In [88]:
class SumNode(object):
    """ Node computing a + b, for numpy arrays a and b"""
    def __init__(self, a, b, node_name):
        """ 
        Parameters:
        a: node for which a.out is a numpy array
        b: node for which b.out is a numpy array of the same shape as a
        node_name: node's name (a string)
        """
        self.node_name=node_name
        self.out=None
        self.d_out=None
        self.a=a
        self.b=b

    def forward(self):
        self.out=self.a.out+self.b.out
        self.d_out=np.zeros(self.out.shape)
        return(self.out)

    def backward(self):
        d_a=self.d_out
        d_b=self.d_out
        self.a.d_out+=d_a
        self.b.d_out+=d_b
        return(self.d_out)

    def get_predecessors(self):
        return([self.a,self.b])

#### 3.3

In [None]:
class RidgeRegression(BaseEstimator, RegressorMixin):
    """ Ridge regression with computation graph """
    def __init__(self, l2_reg=1, step_size=.005,  max_num_epochs = 5000):
        self.max_num_epochs = max_num_epochs
        self.step_size = step_size

        # Build computation graph
        self.x = nodes.ValueNode(node_name="x") # to hold a vector input
        self.y = nodes.ValueNode(node_name="y") # to hold a scalar response
        self.w = nodes.ValueNode(node_name="w") # to hold the parameter vector
        self.b = nodes.ValueNode(node_name="b") # to hold the bias parameter (scalar)
        self.l2_reg=l2_reg
        self.prediction = nodes.VectorScalarAffineNode(x=self.x, w=self.w, b=self.b,
                                                 node_name="prediction")
        
        self.obj_reg = nodes.SquaredL2DistanceNode(a=self.prediction, b=self.y,
                                               node_name="square loss")
        self.obj_norm = nodes.L2NormPenaltyNode(l2_reg=self.l2_reg, w=self.w,
                                               node_name="l2 penalty")
        self.objective = nodes.SumNode(a=self.obj_reg, b=self.obj_norm,
                                               node_name="penalized sq loss")
        # TODO
        self.inputs = [self.x]
        self.outcomes = [self.y]
        self.parameters = [self.w, self.b]
        
        self.graph = graph.ComputationGraphFunction(self.inputs, self.outcomes,
                                                          self.parameters, self.prediction,
                                                          self.objective)


In [None]:
python ridge_regression.t.py

In [None]:
DEBUG: (Node l2 norm node) Max rel error for partial deriv w.r.t. w is 1.86168633954e-09.
.DEBUG: (Node sum node) Max rel error for partial deriv w.r.t. a is 5.83867107099e-10.
DEBUG: (Node sum node) Max rel error for partial deriv w.r.t. b is 5.83867107099e-10.
.DEBUG: (Parameter w) Max rel error for partial deriv 2.40473602971e-09.
DEBUG: (Parameter b) Max rel error for partial deriv 4.08892766281e-10.
.
----------------------------------------------------------------------
Ran 3 tests in 0.016s

OK

In [None]:
python ridge_regression.py

In [None]:
Epoch  0 : Ave objective= 1.2045169452292717  Ave training loss:  0.6444665010240027
Epoch  50 : Ave objective= 0.31970141440380045  Ave training loss:  0.21304486662737573
Epoch  100 : Ave objective= 0.3171781211595713  Ave training loss:  0.20234219039651524
Epoch  150 : Ave objective= 0.31792200647600677  Ave training loss:  0.1986326085417778
Epoch  200 : Ave objective= 0.31555364999794794  Ave training loss:  0.19733622523602157
Epoch  250 : Ave objective= 0.3141917354032541  Ave training loss:  0.1970300851709407
Epoch  300 : Ave objective= 0.3149599113865569  Ave training loss:  0.1972961300431405
Epoch  350 : Ave objective= 0.31539761927136983  Ave training loss:  0.1969424759746572
Epoch  400 : Ave objective= 0.3149355825113064  Ave training loss:  0.19695536368414363
Epoch  450 : Ave objective= 0.3149257723553543  Ave training loss:  0.19755819198555208
Epoch  500 : Ave objective= 0.3143748880174142  Ave training loss:  0.19825061129835508
Epoch  550 : Ave objective= 0.3144449852609848  Ave training loss:  0.19719584311125074
Epoch  600 : Ave objective= 0.31062507629508707  Ave training loss:  0.199299239418904
Epoch  650 : Ave objective= 0.3117255410064467  Ave training loss:  0.2005090397559372
Epoch  700 : Ave objective= 0.30942263153701055  Ave training loss:  0.19807146833640776
Epoch  750 : Ave objective= 0.3124756055212761  Ave training loss:  0.19810663127619815
Epoch  800 : Ave objective= 0.3095932965062739  Ave training loss:  0.19786532628508663
Epoch  850 : Ave objective= 0.3109500662818454  Ave training loss:  0.19914652632701726
Epoch  900 : Ave objective= 0.3088824887795819  Ave training loss:  0.19950281000881248
Epoch  950 : Ave objective= 0.3099324519403396  Ave training loss:  0.20001079238592748
Epoch  1000 : Ave objective= 0.31049802921951913  Ave training loss:  0.20021843414608909
Epoch  1050 : Ave objective= 0.30684866969137653  Ave training loss:  0.20302648176139956
Epoch  1100 : Ave objective= 0.3109847974070922  Ave training loss:  0.19809911357653875
Epoch  1150 : Ave objective= 0.3097733823270748  Ave training loss:  0.1982866757298536
Epoch  1200 : Ave objective= 0.3109282768552757  Ave training loss:  0.1991049843180615
Epoch  1250 : Ave objective= 0.3095459233898459  Ave training loss:  0.1994572176136299
Epoch  1300 : Ave objective= 0.30813565030902507  Ave training loss:  0.1983379068750207
Epoch  1350 : Ave objective= 0.3089614635741731  Ave training loss:  0.19885213130554774
Epoch  1400 : Ave objective= 0.3094836073944941  Ave training loss:  0.1994393284855249
Epoch  1450 : Ave objective= 0.30974514255077745  Ave training loss:  0.1985814980869038
Epoch  1500 : Ave objective= 0.30814208396790493  Ave training loss:  0.20292043273591498
Epoch  1550 : Ave objective= 0.30886846758298  Ave training loss:  0.1987748096584506
Epoch  1600 : Ave objective= 0.3087791955246176  Ave training loss:  0.19899396054691262
Epoch  1650 : Ave objective= 0.3084285432777255  Ave training loss:  0.19920623144507374
Epoch  1700 : Ave objective= 0.30527483110332737  Ave training loss:  0.20599363046845545
Epoch  1750 : Ave objective= 0.3063505827764839  Ave training loss:  0.19909656658496974
Epoch  1800 : Ave objective= 0.307740643727055  Ave training loss:  0.20010771479242043
Epoch  1850 : Ave objective= 0.3069013027168506  Ave training loss:  0.20006260929216207
Epoch  1900 : Ave objective= 0.306720099029395  Ave training loss:  0.19931586647963237
Epoch  1950 : Ave objective= 0.3074130285913045  Ave training loss:  0.19997364404047796
Epoch  2000 : Ave objective= 0.3069375911843509  Ave training loss:  0.20417287414101845
Epoch  0 : Ave objective= 0.6682955044844806  Ave training loss:  0.31977715108244087
Epoch  50 : Ave objective= 0.11150390042003569  Ave training loss:  0.12731521620285755
Epoch  100 : Ave objective= 0.09147737645588784  Ave training loss:  0.0689850604182719
Epoch  150 : Ave objective= 0.07631798774387988  Ave training loss:  0.05493596042933009
Epoch  200 : Ave objective= 0.06198226405821661  Ave training loss:  0.04477411610249885
Epoch  250 : Ave objective= 0.05783183246203141  Ave training loss:  0.051748462490947576
Epoch  300 : Ave objective= 0.05086408060533934  Ave training loss:  0.03584877582512448
Epoch  350 : Ave objective= 0.047121533896515855  Ave training loss:  0.032499869020959284
Epoch  400 : Ave objective= 0.03874982561360256  Ave training loss:  0.03047541608130463
Epoch  450 : Ave objective= 0.03534880889514072  Ave training loss:  0.027313390005803614
Epoch  500 : Ave objective= 0.03539027486531891  Ave training loss:  0.037540046563635784

Conclusion: \\
When l2_reg=1, average training error is L=0.20417287414101845, with step size=0.00005, 2000 epoches;\\
When l2_reg=0, average training error is L=0.037540046563635784, with step size=0.0005, 500 epoches;.

![RR](Figure_3_3.png)

### 4. Multilayer Perceptron

#### 4.1.1
1.Solution:\\
Consider $y_r, \forall r \in (1,...,m)$:\\
$$y_r =\sum_{j=1}^{d} W_{rj}x_{j}+b_r$$
So $\frac{\partial{y_r}}{\partial{W_{kj}}}=x_{j}\ne{0}$ only when $k=r$.\\
So for $\frac{\partial{J}}{\partial{W_{ij}}}=\sum_{r=1}^{m}\frac{\partial{J}}{\partial{y_{r}}}\frac{\partial{y_r}}{\partial{W_{ij}}}$, only the item with $r=i$ remains not "$0$". We get:\\
$$\frac{\partial{J}}{\partial{W_{ij}}}=\frac{\partial{J}}{\partial{y_{i}}}\frac{\partial{y_i}}{\partial{W_{ij}}}=\frac{\partial{J}}{\partial{y_{i}}}x_{j}$$

2.Solution:\\
Based on the rules given in question, item in row i and column j should have expression $\frac{\partial{J}}{\partial{y_{i}}}x_{j}$, which is product of $\frac{\partial{J}}{\partial{y}}[i]$ and $x[j]$.\\
So the whole matrix $\frac{\partial{J}}{\partial{W}}$ could be expressed as:\\
$$\frac{\partial{J}}{\partial{W}}=\frac{\partial{J}}{\partial{y}}\otimes x$$
Where $\otimes$ means outer product.

3.Solution:\\
For $\forall i \in (1,...,d)$, \\
$$\frac{\partial{J}}{\partial{x_{i}}}=\sum_{k=1}^{m}\frac{\partial{J}}{\partial{y_{k}}}\frac{\partial{y_k}}{\partial{x_{i}}}=\sum_{k=1}^{m}\frac{\partial{J}}{\partial{y_{k}}}W_{ki}$$
So we can rewrite the expression as:\\
$$\frac{\partial{J}}{\partial{x_{i}}}=W_{,i}^{T}(\frac{\partial{J}}{\partial{y}})$$
In which $W_{,i}$ is ith column of W.\\
Thus we get:\\
$$\frac{\partial{J}}{\partial{x}}=W^{T}(\frac{\partial{J}}{\partial{y}})$$

4.Solution:\\
Since $b_i$ only appears in $y_i$, for $\forall{i} \in (1,...,m)$,\\
$$\frac{\partial{J}}{\partial{b_{i}}}=\frac{\partial{J}}{\partial{y_{i}}} \frac{\partial{y_{i}}}{\partial{b_{i}}}=\frac{\partial{J}}{\partial{y_{i}}}$$
So $\frac{\partial{J}}{\partial{b}}=\frac{\partial{J}}{\partial{y}}$

#### 4.1.2

1.Solution:\\
For $\forall i$ which is a legal entry of $A$, we have:\\
$$\frac{\partial{J}}{\partial{A_{i}}}=\frac{\partial{J}}{\partial{S_{i}}}\frac{d\sigma(A_{i})}{d{A_{i}}}$$
Considering $i$th entry of $\sigma^{\prime}(A)$ is $\frac{d\sigma(A_{i})}{d{A_{i}}}$ and $i$th entry of $\frac{\partial{J}}{\partial{S}}$ is $\frac{\partial{J}}{\partial{S_{i}}}$, we get:\\
$$\frac{\partial{J}}{\partial{A}}=\frac{\partial{J}}{\partial{S}}\odot{}\sigma^{\prime}(A)$$

#### 4.2

#### 4.2.1

In [None]:
class AffineNode(object):
    """Node implementing affine transformation (W,x,b)-->Wx+b, where W is a matrix,
    and x and b are vectors
        Parameters:
        W: node for which W.out is a numpy array of shape (m,d)
        x: node for which x.out is a numpy array of shape (d)
        b: node for which b.out is a numpy array of shape (m) (i.e. vector of length m)
    """
    ## TODO
    def __init__(self,W,x,b,node_name):
        self.node_name=node_name
        self.W=W
        self.x=x
        self.b=b
        self.out=None
        self.d_out=None
        
    def forward(self):
        self.out=np.dot(self.W.out,self.x.out)+self.b.out
        self.d_out=np.zeros(self.out.shape)
        return(self.out)
    
    def backward(self):
        d_W=np.outer(self.d_out,x.out)
        d_x=np.dot(W.out.T,self.d_out)
        d_b=self.d_out
        self.W.d_out+=d_W
        self.x.d_out+=d_x
        self.b.d_out+=d_b
        return(self.d_out)
    
    def get_predecessors(self):
        return([self.W,self.x,self.b])

#### 4.2.2

In [1]:
class TanhNode(object):
    """Node tanh(a), where tanh is applied elementwise to the array a
        Parameters:
        a: node for which a.out is a numpy array
    """
    ## TODO
    def __init__(self,a,node_name):
        self.node_name=node_name
        self.a=a
        self.out=None
        self.d_out=None
        self.tanh_a=None
        
    def forward(self):
        self.tanh_a=np.tanh(a.out)
        self.out=self.tanh_a
        self.d_out=np.zeros(self.out.shape)
        return(self.out)
    
    def backward(self):
        d_a=self.d_out*(1-self.tanh_a**2) ##this might cause problem
        self.a.d_out+=d_a
        return(self.d_out)
    
    def get_predecessors(self):
        return([self.a])
        

#### 4.2.3

In [None]:
class MLPRegression(BaseEstimator, RegressorMixin):
    """ MLP regression with computation graph """
    def __init__(self, num_hidden_units=10, step_size=.005, init_param_scale=0.01, max_num_epochs = 5000):
        self.num_hidden_units = num_hidden_units
        self.init_param_scale = 0.01
        self.max_num_epochs = max_num_epochs
        self.step_size = step_size

        # Build computation graph
        self.x = nodes.ValueNode(node_name="x") # to hold a vector input
        self.y = nodes.ValueNode(node_name="y") # to hold a scalar response
        self.w1= nodes.ValueNode(node_name="w1")
        self.w2= nodes.ValueNode(node_name="w2")
        self.b1= nodes.ValueNode(node_name="b1")
        self.b2= nodes.ValueNode(node_name="b2")
        
        ## TODO
        self.hid_1 = nodes.AffineNode(W=self.w1,x=self.x,b=self.b1,
                                      node_name="hidden1")
        self.hid_2 = nodes.TanhNode(a=self.hid_1,
                                    node_name="hidden2")
        
        self.prediction = nodes.VectorScalarAffineNode(x=self.hid_2, w=self.w2, b=self.b2,
                                                       node_name="prediction")
        
        self.objective = nodes.SquaredL2DistanceNode(a=self.prediction, b=self.y,
                                                     node_name="objective")
        # TODO
        self.inputs = [self.x]
        self.outcomes = [self.y]
        self.parameters = [self.w1, self.b1,self.w2,self.b2]
        
        self.graph = graph.ComputationGraphFunction(self.inputs, self.outcomes,
                                                          self.parameters, self.prediction,
                                                          self.objective)

        
        
        
        

    def fit(self, X, y):
        num_instances, num_ftrs = X.shape
        y = y.reshape(-1)

        ## TODO: Initialize parameters (small random numbers -- not all 0, to break symmetry )
        s = self.init_param_scale
        init_values = {"w1": np.random.uniform(-s,s,[self.num_hidden_units,num_ftrs]), 
                       "b1": np.random.uniform(-s,s,self.num_hidden_units),
                       "w2": np.random.uniform(-s,s,[self.num_hidden_units]),
                       "b2": np.array(np.random.uniform(-s,s))}

        self.graph.set_parameters(init_values)

In [None]:
python mlp_regression.t.py

In [None]:
DEBUG: (Node affine) Max rel error for partial deriv w.r.t. W is 1.4262984833515724e-08.
DEBUG: (Node affine) Max rel error for partial deriv w.r.t. x is 8.223950139381322e-10.
DEBUG: (Node affine) Max rel error for partial deriv w.r.t. b is 5.838672051389452e-10.
.DEBUG: (Node tanh) Max rel error for partial deriv w.r.t. a is 3.080940169299588e-09.
.DEBUG: (Parameter w1) Max rel error for partial deriv 2.567900441046858e-08.
DEBUG: (Parameter b1) Max rel error for partial deriv 3.25437097957294e-09.
DEBUG: (Parameter w2) Max rel error for partial deriv 1.3454349429215027e-09.
DEBUG: (Parameter b2) Max rel error for partial deriv 2.675577833559226e-11.
.
----------------------------------------------------------------------
Ran 3 tests in 0.034s

OK

In [None]:
python mlp_regression.py

In [None]:
Epoch  0 : Ave objective= 3.1591014751860076  Ave training loss:  2.7362316589545106
Epoch  50 : Ave objective= 0.7677350306022097  Ave training loss:  0.7597321419094898
Epoch  100 : Ave objective= 0.7461508386700006  Ave training loss:  0.7383849331389437
Epoch  150 : Ave objective= 0.719311408427905  Ave training loss:  0.7128619465324326
Epoch  200 : Ave objective= 0.6980911075326794  Ave training loss:  0.6913386280159942
Epoch  250 : Ave objective= 0.6830844325382043  Ave training loss:  0.6762676687496626
Epoch  300 : Ave objective= 0.6714060188571502  Ave training loss:  0.6634604246237308
Epoch  350 : Ave objective= 0.6541378542243406  Ave training loss:  0.6450006018953023
Epoch  400 : Ave objective= 0.6200928004489193  Ave training loss:  0.6082772788821091
Epoch  450 : Ave objective= 0.5545834370722189  Ave training loss:  0.5412474669166243
Epoch  500 : Ave objective= 0.4580432769599757  Ave training loss:  0.45666634644796106
Epoch  550 : Ave objective= 0.38070944512720273  Ave training loss:  0.3658963600990613
Epoch  600 : Ave objective= 0.3378834629111795  Ave training loss:  0.32479274088199434
Epoch  650 : Ave objective= 0.3175627603487417  Ave training loss:  0.3051663258487649
Epoch  700 : Ave objective= 0.30666700002034897  Ave training loss:  0.29380197216678694
Epoch  750 : Ave objective= 0.29646349086808316  Ave training loss:  0.28612347707394126
Epoch  800 : Ave objective= 0.29103442246795913  Ave training loss:  0.27976206489276484
Epoch  850 : Ave objective= 0.2824046999110614  Ave training loss:  0.282547929657588
Epoch  900 : Ave objective= 0.2793105988587881  Ave training loss:  0.2715173228187371
Epoch  950 : Ave objective= 0.27608032367616686  Ave training loss:  0.26767653188257556
Epoch  1000 : Ave objective= 0.2751000909381586  Ave training loss:  0.26416295953010066
Epoch  1050 : Ave objective= 0.2681974433967226  Ave training loss:  0.26725609468174455
Epoch  1100 : Ave objective= 0.2696226075116502  Ave training loss:  0.2591101217527617
Epoch  1150 : Ave objective= 0.2607295925635616  Ave training loss:  0.2666777085632656
Epoch  1200 : Ave objective= 0.2656294144535176  Ave training loss:  0.2552224969126479
Epoch  1250 : Ave objective= 0.26235826647843574  Ave training loss:  0.2548450287733928
Epoch  1300 : Ave objective= 0.25976796325219437  Ave training loss:  0.25216323080752223
Epoch  1350 : Ave objective= 0.261221434512845  Ave training loss:  0.25027387653820304
Epoch  1400 : Ave objective= 0.2574181243331116  Ave training loss:  0.24914241014187624
Epoch  1450 : Ave objective= 0.2549377809310199  Ave training loss:  0.24872880997017105
Epoch  1500 : Ave objective= 0.2556442907263385  Ave training loss:  0.24600707423543533
Epoch  1550 : Ave objective= 0.25247666423693926  Ave training loss:  0.24514991456690893
Epoch  1600 : Ave objective= 0.25280092843280466  Ave training loss:  0.24446438491356356
Epoch  1650 : Ave objective= 0.25205544380484335  Ave training loss:  0.24244241849597511
Epoch  1700 : Ave objective= 0.2492364668785854  Ave training loss:  0.2414931271124457
Epoch  1750 : Ave objective= 0.24948454109422394  Ave training loss:  0.2403078583712665
Epoch  1800 : Ave objective= 0.24907863996663449  Ave training loss:  0.2395164444195047
Epoch  1850 : Ave objective= 0.24720406224290262  Ave training loss:  0.23833205163304017
Epoch  1900 : Ave objective= 0.2467927813499694  Ave training loss:  0.2374864407913448
Epoch  1950 : Ave objective= 0.24615715588218148  Ave training loss:  0.2375493489361685
Epoch  2000 : Ave objective= 0.24557643554055558  Ave training loss:  0.2361426301619667
Epoch  2050 : Ave objective= 0.24444023044003804  Ave training loss:  0.23517585750580308
Epoch  2100 : Ave objective= 0.234992340921919  Ave training loss:  0.256770928611745
Epoch  2150 : Ave objective= 0.24097537283035275  Ave training loss:  0.23957957896011148
Epoch  2200 : Ave objective= 0.2424078299557825  Ave training loss:  0.23278531929323765
Epoch  2250 : Ave objective= 0.24151120404933007  Ave training loss:  0.23273181967510606
Epoch  2300 : Ave objective= 0.24064912747130932  Ave training loss:  0.2314191431602656
Epoch  2350 : Ave objective= 0.24017443533055066  Ave training loss:  0.2311178773721087
Epoch  2400 : Ave objective= 0.23766120070278934  Ave training loss:  0.23610280426471905
Epoch  2450 : Ave objective= 0.23907538550925864  Ave training loss:  0.23029465255453144
Epoch  2500 : Ave objective= 0.23872708102801674  Ave training loss:  0.22939404702984154
Epoch  2550 : Ave objective= 0.23875667347935967  Ave training loss:  0.22903072133273322
Epoch  2600 : Ave objective= 0.23825894895085173  Ave training loss:  0.22891609779976763
Epoch  2650 : Ave objective= 0.23645260183189015  Ave training loss:  0.2278801900016521
Epoch  2700 : Ave objective= 0.23688191813881349  Ave training loss:  0.22872950884388285
Epoch  2750 : Ave objective= 0.2368524019353053  Ave training loss:  0.2267836969537594
Epoch  2800 : Ave objective= 0.23613543373853713  Ave training loss:  0.22639368249372263
Epoch  2850 : Ave objective= 0.23206251338006062  Ave training loss:  0.2293831843240886
Epoch  2900 : Ave objective= 0.2359771883386923  Ave training loss:  0.2277829995363427
Epoch  2950 : Ave objective= 0.23590991816118645  Ave training loss:  0.22493178307132158
Epoch  3000 : Ave objective= 0.23542061499215244  Ave training loss:  0.2260919038845546
Epoch  3050 : Ave objective= 0.2342423764505519  Ave training loss:  0.22727633857190604
Epoch  3100 : Ave objective= 0.2334156628172298  Ave training loss:  0.2243824089692325
Epoch  3150 : Ave objective= 0.23437685086322335  Ave training loss:  0.22398457898870341
Epoch  3200 : Ave objective= 0.23209204812936374  Ave training loss:  0.2235162857803013
Epoch  3250 : Ave objective= 0.22997572847212325  Ave training loss:  0.22415495816414613
Epoch  3300 : Ave objective= 0.22991050713614317  Ave training loss:  0.22442779815868424
Epoch  3350 : Ave objective= 0.23005510607554158  Ave training loss:  0.22224845621985825
Epoch  3400 : Ave objective= 0.23028696498496384  Ave training loss:  0.22140010145520012
Epoch  3450 : Ave objective= 0.22931852362344685  Ave training loss:  0.22902067015465444
Epoch  3500 : Ave objective= 0.22957896508995426  Ave training loss:  0.22267647851399947
Epoch  3550 : Ave objective= 0.22706335651349627  Ave training loss:  0.23117005125160314
Epoch  3600 : Ave objective= 0.23048217661074774  Ave training loss:  0.22046462503550546
Epoch  3650 : Ave objective= 0.22940338477432742  Ave training loss:  0.22017283439342297
Epoch  3700 : Ave objective= 0.22637328399642595  Ave training loss:  0.22040402012195418
Epoch  3750 : Ave objective= 0.2244326384162156  Ave training loss:  0.22710892098956006
Epoch  3800 : Ave objective= 0.22655219698981288  Ave training loss:  0.22084215594422518
Epoch  3850 : Ave objective= 0.22877055614824993  Ave training loss:  0.2182947968361353
Epoch  3900 : Ave objective= 0.2263906386312643  Ave training loss:  0.21947057783264134
Epoch  3950 : Ave objective= 0.22371801690593807  Ave training loss:  0.21935790183164802
Epoch  4000 : Ave objective= 0.22753503254247312  Ave training loss:  0.21863518284860783
Epoch  4050 : Ave objective= 0.22490957323261643  Ave training loss:  0.21723571613570591
Epoch  4100 : Ave objective= 0.2246669690937707  Ave training loss:  0.22011200907616793
Epoch  4150 : Ave objective= 0.22614676264411315  Ave training loss:  0.21587162046843894
Epoch  4200 : Ave objective= 0.22066982660672643  Ave training loss:  0.2324253937044383
Epoch  4250 : Ave objective= 0.2246824147153844  Ave training loss:  0.21780994388517072
Epoch  4300 : Ave objective= 0.22442736211454073  Ave training loss:  0.21492488122364378
Epoch  4350 : Ave objective= 0.22396937597380454  Ave training loss:  0.2146660699525994
Epoch  4400 : Ave objective= 0.220481878901154  Ave training loss:  0.21993178013200865
Epoch  4450 : Ave objective= 0.22353240666437516  Ave training loss:  0.21657812054073208
Epoch  4500 : Ave objective= 0.22350226396873993  Ave training loss:  0.21339918081826498
Epoch  4550 : Ave objective= 0.22054503158845887  Ave training loss:  0.21603987294123603
Epoch  4600 : Ave objective= 0.22247053435413858  Ave training loss:  0.2130819919645474
Epoch  4650 : Ave objective= 0.2171009682815295  Ave training loss:  0.21875913020778456
Epoch  4700 : Ave objective= 0.2204143408087334  Ave training loss:  0.2151291276816613
Epoch  4750 : Ave objective= 0.2193323869075738  Ave training loss:  0.2190749525253178
Epoch  4800 : Ave objective= 0.22149604139809578  Ave training loss:  0.211560455500563
Epoch  4850 : Ave objective= 0.22138332401187275  Ave training loss:  0.21138296555540767
Epoch  4900 : Ave objective= 0.22023782115347598  Ave training loss:  0.2125850615323971
Epoch  4950 : Ave objective= 0.21989305650334864  Ave training loss:  0.21275587960182216
Epoch  5000 : Ave objective= 0.2194938001229822  Ave training loss:  0.21088639477651444
Epoch  0 : Ave objective= 2.5391353049254115  Ave training loss:  1.185301968326381
Epoch  50 : Ave objective= 0.11578914352391281  Ave training loss:  0.1180625025132354
Epoch  100 : Ave objective= 0.09267584418118249  Ave training loss:  0.07897920418454332
Epoch  150 : Ave objective= 0.07791311716751997  Ave training loss:  0.060736271339982564
Epoch  200 : Ave objective= 0.06728620454979899  Ave training loss:  0.052556983728144095
Epoch  250 : Ave objective= 0.06105088153134284  Ave training loss:  0.05030195651642041
Epoch  300 : Ave objective= 0.05004994044825338  Ave training loss:  0.06764223540773093
Epoch  350 : Ave objective= 0.04745820564971187  Ave training loss:  0.03316839555456963
Epoch  400 : Ave objective= 0.046591797226093655  Ave training loss:  0.028595487466373817
Epoch  450 : Ave objective= 0.03590279129011667  Ave training loss:  0.052615892978353146
Epoch  500 : Ave objective= 0.03484356303899753  Ave training loss:  0.027027353055170858

Conclusion: \\
When initial parameter sample threshold=0.0005, average training error is L=0.21088639477651444 with step size=0.001, 5000 epoches;\\
When initial parameter sample threshold=0.01, average training error is L=0.027027353055170858 with step size=0.0005, 500 epoches.

![MLP fitting](Figure_4_2_3.png)

In [None]:
while True:
    print('Thank you all staff in DS-GA 1003 ML!!')

In [3]:
import numpy as np
np.random.uniform(-0.1,0.1,[5])

array([ 0.00329486, -0.03578585,  0.00206359, -0.05185712,  0.06492482])