In [14]:
import numpy as np

In [15]:
def power_sum(l, r, p=1.0):
    """
        input: l, r - integers, p - float
        returns sum of p-powers of integers from [l, r]
        
        example: power_sum(2, 4, 2.0) == 2 ** 2.0 + 3 ** 2.0 + 4 ** 2.0 = 4.0 + 9.0 + 16.0 = 29.0
    """
    return sum([i**p for i in range(l, r+1)])

print(power_sum(2, 4, 2.0))

29.0


In [16]:
def solve_equation(a, b, c):
    """
        input: a, b, c - integers
        returns float solutions x of the following equation: a x ** 2 + b x + c == 0
            In case of two diffrent solution returns tuple / list (x1, x2)
            In case of one solution returns one float
            In case of no float solutions return None 
            In case of infinity number of solutions returns 'inf'
    """
    D = b**2 - 4.*a*c
    if a or b or c:
        if a:
            if D > 0:
                return ((-b-np.sqrt(D))/(2*a), (-b+np.sqrt(D))/(2*a))
            elif D == 0:
                return -b/(2*a)
            else:
                return None
        elif b:
            return -c / float(b)
        else:
            return None
    else:
        return np.inf

print(solve_equation(0, 0, -1))

None


In [17]:
def replace_outliers(x, std_mul=3.0):
    """
        input: x - numpy vector, std_mul - positive float
        returns copy of x with all outliers (elements, which are beyond std_mul * (standart deviation) from mean)
        replaced with mean  
    """
    mean, std = np.mean(x), np.std(x)
    left_boundary, right_boundary = mean - std_mul * std, mean + std_mul * std
    return np.array([ mean if element < left_boundary or element > right_boundary else element for element in x ])

x = np.random.normal(0, 2, 10000)
x.sort()
print(replace_outliers(x)[:10])

[ 0.03552231  0.03552231  0.03552231  0.03552231  0.03552231  0.03552231
  0.03552231  0.03552231  0.03552231  0.03552231]


In [18]:
def get_eigenvector(A, alpha):
    """
        input: A - square numpy matrix, alpha - float
        returns numpy vector - any eigenvector of A corresponding to eigenvalue alpha, 
                or None if alpha is not an eigenvalue.
    """
    eigenvalues, eigenmatrix = np.linalg.eig(A)
    if alpha in eigenvalues:
        return eigenmatrix[:,0][0]
    else:
        return None
matr = [[-1., -6.],
       [ 2.,  6.]],
print(get_eigenvector(matr, 2.))

[-0.89442719  0.83205029]


In [19]:
def discrete_sampler(p):
    """
        input: p - numpy vector of probability (non-negative, sums to 1)
        returns integer from 0 to len(p) - 1, each integer i is returned with probability p[i] 
    """
    return np.random.choice(a=range(len(p)), size=1, p=p)[0]

p = [0.2, 0.8]
pull = []
MAX = 10000
for i in range(MAX):
    pull.append(discrete_sampler(p))

print(pull.count(0)/MAX, pull.count(1)/MAX)

0.1996 0.8004


In [20]:
def gaussian_log_likelihood(x, mu=0.0, sigma=1.0):
    """
        input: x - numpy vector, mu - float, sigma - positive float
        returns log p(x| mu, sigma) - log-likelihood of x dataset 
        in univariate gaussian model with mean mu and standart deviation sigma
    """
    return -sum([ (xi-mu)**2 for xi in x ]) / (2*sigma**2) + 0.5*len(x)*np.log(2*np.pi*sigma**2)

x = np.random.normal(-0.1, 0.9, 1000)
print(gaussian_log_likelihood(x, mu=0.0, sigma=1.0))

485.702451274


In [21]:
def gradient_approx(f, x0, eps=1e-8):
    """
        input: f - callable, function of vector x. x0 - numpy vector, eps - float, represents step for x_i
        returns numpy vector - gradient of f in x0 calculated with finite difference method 
        (for reference use https://en.wikipedia.org/wiki/Numerical_differentiation, search for "first-order divided difference")
    """
    return (f(x0+eps) - f(x0))/eps

x0 = np.array([5, 5])
print(gradient_approx(np.sin, x0))

[ 0.28366218  0.28366218]


In [22]:
def gradient_method(f, x0, n_steps=1000, learning_rate=1e-4, eps=1e-8):
    """
        input: f - function of x. x0 - numpy vector, n_steps - integer, learning rate, eps - float.
        returns tuple (f^*, x^*), where x^* is local minimum point, found after n_steps of gradient descent, 
                                        f^* - resulting function value.
        Impletent gradient descent method, given in the lecture. 
        For gradient use finite difference approximation with eps step.
    """
    x = x0
    for epoch in range(n_steps):
        x = x - learning_rate * gradient_approx(f, x, eps)
        
    return (f(x), x)

def rosen(x):
    """The Rosenbrock function"""
    return sum(100.0*(x[1:]-x[:-1]**2)**2 + (1-x[:-1])**2)
               
x0 = np.array([1.5, 1.5])
print(gradient_method(rosen, x0))

(2.2756071493751382e-15, array([ 1.,  1.]))


In [23]:
def linear_regression_predict(w, b, X):
    """
        input: w - numpy vector of M weights, b - bias, X - numpy matrix N x M (object-feature matrix), 
        N - number of objects, M - number of features.
        returns numpy vector of predictions of linear regression model for X
        https://xkcd.com/1725/
    """
    return w.reshape(1, -1).dot(X) + b.reshape(1,-1)

w = np.array([1,2,3])
X = np.array([[1, 1, 1, 1],\
             [1, 1, 1, 1],\
             [1, 1, 1, 1]])
b = np.array([1,1,1,1])
             
print(linear_regression_predict(w, b, X))

[[7 7 7 7]]


In [24]:
def mean_squared_error(y_true, y_pred):
    """
        input: two numpy vectors of object targets and model predictions.
        return mse
    """
    return sum((y_true-y_pred)**2) / len(y_true)

y_true = np.array([1,2,3])
y_pred = np.array([4,5,6])
print(mean_squared_error(y_true, y_pred))

9.0


In [25]:
def linear_regression_mse_gradient(w, b, X, y_true):
    """
        input: w, b - weights and bias of a linear regression model,
                X - object-feature matrix, y_true - targets.
        returns gradient of linear regression model mean squared error w.r.t (with respect to) w and b
    """
    y_pred = linear_regression_predict(w, b, X)
    y_true = y_true.reshape(1,-1)
    n = y_true.shape[1]
    return np.array([ sum([ (y_pred[0,j] - y_true[0,j]) * X[i,j] for j in range(n) ]) / n for i in range(X.shape[0]) ])

w = np.array([1,2,3])
X = np.array([[1, 1, 1, 1],\
             [1, 1, 1, 1],\
             [1, 1, 1, 1]])
b = np.array([1,1,1,1])
y_true = np.array([1,1,1,1])
print(linear_regression_mse_gradient(w, b, X, y_true))

[ 6.  6.  6.]
