In [7]:
'''
Test Case
---------
>>> prng = np.random.RandomState(0)
>>> N = 100

>>> true_w_F = np.asarray([1.1, -2.2, 3.3])
>>> true_b = 0.0
>>> x_NF = prng.randn(N, 3)
>>> y_N = true_b + np.dot(x_NF, true_w_F) + 0.03 * prng.randn(N)

>>> linear_regr = LeastSquaresLinearRegressor()
>>> linear_regr.fit(x_NF, y_N)

>>> yhat_N = linear_regr.predict(x_NF)
>>> np.set_printoptions(precision=3, formatter={'float':lambda x: '% .3f' % x})
>>> print(linear_regr.w_F)
[ 1.099 -2.202  3.301]
>>> print(np.asarray([linear_regr.b]))
[-0.005]
'''

import numpy as np
# No other imports allowed!

class LeastSquaresLinearRegressor(object):
    ''' A linear regression model with sklearn-like API

    Fit by solving the "least squares" optimization problem.

    Attributes
    ----------
    * self.w_F : 1D numpy array, size n_features (= F)
        vector of weights, one value for each feature
    * self.b : float
        scalar real-valued bias or "intercept"
    '''

    def __init__(self):
        ''' Constructor of an sklearn-like regressor

        Should do nothing. Attributes are only set after calling 'fit'.
        '''
        # Leave this alone
        pass

    def fit(self, x_NF, y_N):
        ''' Compute and store weights that solve least-squares problem.

        Args
        ----
        x_NF : 2D numpy array, shape (n_examples, n_features) = (N, F)
            Input measurements ("features") for all examples in train set.
            Each row is a feature vector for one example.
        y_N : 1D numpy array, shape (n_examples,) = (N,)
            Response measurements for all examples in train set.
            Each row is a feature vector for one example.

        Returns
        -------
        Nothing. 

        Post-Condition
        --------------
        Internal attributes updated:
        * self.w_F (vector of weights for each feature)
        * self.b (scalar real bias, if desired)

        Notes
        -----
        The least-squares optimization problem is:
        
        .. math:
            \min_{w \in \mathbb{R}^F, b \in \mathbb{R}}
                \sum_{n=1}^N (y_n - b - \sum_f x_{nf} w_f)^2
        '''      
        N, F = x_NF.shape
        n_samples = y_N.shape[0]
        
        # Hint: Use np.linalg.solve
        # Using np.linalg.inv may cause issues (see day03 lab) 
        assert N > 0, "Cannot fit model with no samples."
        assert F > 0, "Cannot fit model with no features."
        assert n_samples == N, "The number of samples must be equal for the data points and target vars."
        
        # Estimating weights
        xtilde_NG = np.hstack([x_NF, np.ones((N, 1))])
        xTx_GG = np.dot(xtilde_NG.T, xtilde_NG)
        theta_G1 = np.linalg.solve(xTx_GG, np.dot(xtilde_NG.T, y_N))
        # Estimate bias
        self.w_F = theta_G1[:-1]
        self.b = theta_G1[-1]
        
        
        
        pass


    def predict(self, x_MF):
        ''' Make predictions given input features for M examples

        Args
        ----
        x_MF : 2D numpy array, shape (n_examples, n_features) (M, F)
            Input measurements ("features") for all examples of interest.
            Each row is a feature vector for one example.

        Returns
        -------
        yhat_M : 1D array, size M
            Each value is the predicted scalar for one example
        '''
        yhat_M = np.array([np.dot(self.w_F, x_MF[i]) + self.b for i in range(x_MF.shape[0])])
        
        # TODO FIX ME
        return yhat_M




def test_on_toy_data(N=100):
    '''
    Simple example use case
    With toy dataset with N=100 examples
    created via a known linear regression model plus small noise
    '''
    prng = np.random.RandomState(1)

    true_w_F = np.asarray([1.1, -2.2, 3.3, -4.4])
    true_b = 2.0
    x_NF = prng.randn(N, 4)
    y_N = true_b + np.dot(x_NF, true_w_F) + 0.03 * prng.randn(N)

    linear_regr = LeastSquaresLinearRegressor()
    linear_regr.fit(x_NF, y_N)

    yhat_N = linear_regr.predict(x_NF)

    np.set_printoptions(precision=3, formatter={'float':lambda x: '% .3f' % x})

    print("True weights")
    print(true_w_F)
    print("Estimated weights")
    print(linear_regr.w_F)

    print("True intercept")
    print(np.asarray([true_b]))
    print("Estimated intercept")
    print(np.asarray([linear_regr.b]))
    print(f"True y - shape {y_N.shape}")
    print(y_N,)
    print(f"Estimated Y - shape {yhat_N.shape}")
    print(yhat_N)

if __name__ == '__main__':
    test_on_toy_data()


True weights
[ 1.100 -2.200  3.300 -4.400]
Estimated weights
[ 1.095 -2.207  3.305 -4.404]
True intercept
[ 2.000]
Estimated intercept
[ 2.002]
True y - shape (100,)
[ 8.072  17.125  16.800  11.108  1.304 -0.962  8.143 -0.023  0.958  2.501
 -6.179 -5.092  3.368 -1.761 -2.150  1.365 -0.273  4.964  10.197 -2.815
  1.002 -1.870  2.780  4.027 -3.530 -2.464  7.049 -4.272  11.753  7.401
 -0.737  1.154  0.867  4.792  7.566  1.583 -0.171  10.475  0.253  7.160
  5.764  5.194  2.446  6.509  7.738 -4.108  4.009  4.703  10.975 -0.138
 -10.727  2.913  1.316 -3.441  7.752  7.308 -5.681  1.611  3.794 -7.009
  8.265  3.529  6.773  0.488  7.054  9.741 -0.699 -0.062  1.837 -9.309
 -1.614  0.874  0.222  5.125 -5.011  8.630  1.165  6.326  11.370  4.236
  8.188 -7.031 -4.319  8.668  5.108 -4.131  2.107  4.431  0.094  4.041
 -8.647  14.101  4.052  7.432 -2.907 -0.272  3.490 -10.408 -6.475 -11.490]
Estimated Y - shape (100,)
[ 8.109  17.146  16.806  11.087  1.323 -0.962  8.211 -0.000  0.952  2.479
 -6.172 -5

  ''' Compute and store weights that solve least-squares problem.
