# Sixth homework for computational mathematics course

In [24]:
import numpy as np
import pylab as plt
import pandas as pd

from sklearn.metrics import mean_squared_error as mse
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, ShuffleSplit
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso, SGDRegressor

## Exercise 1

$$
\frac{\partial log(P)}{\partial \sigma} = \sum_i \frac{\partial log(p_i)}{\partial \sigma} = \sum_i (\frac{(x_i-\mu)^2}{\sigma^3} - \frac{1}{\sigma}) = 0
$$
$$
\frac{\partial log(P)}{\partial \mu} = \sum_i \frac{\partial log(p_i)}{\partial \mu} = \sum_i \frac{x_i-\mu}{\sigma^2} = 0
$$

$$
\mu = \frac{\sum_i x_i}{N}
$$

$$
\sigma = \sqrt{\frac{\sum_i (x_i - \mu)^2}{N}}
$$

## Exercise 2

$$
P(\lambda|m) = \frac{P_{\lambda}(m)\cdot P(\lambda)}{\int P_{\lambda'}(m)\cdot P(\lambda')d\lambda'} = \frac{\frac{\lambda^m}{m!}e^{-\lambda}}{\int\frac{\lambda^m}{m!}e^{-\lambda}d\lambda'} = \frac{\lambda^me^{-\lambda}}{\Gamma(m+1)} = \frac{\lambda^me^{-\lambda}}{m!}
$$
$$
P(\lambda|m, m') = \frac{P_{\lambda}(m)\cdot P_{\lambda}(m')\cdot P(\lambda)}{\int P_{\lambda'}(m)\cdot P_{\lambda'}(m')\cdot P(\lambda')d\lambda'} = \frac{\frac{\lambda^{(m+m')}}{m!\cdot m'!}e^{-2\lambda}}{\int\frac{\lambda^{(m+m')}}{m!\cdot m'!}e^{-2\lambda}d\lambda'} =  2^{(m+m')}\frac{\lambda^{(m+m')}e^{-2\lambda}}{(m+m')!}
$$

## Exercise 3

A - Petya is sick; B - test is positive:
$$
P(A|B) = \frac{P(B|A)\cdot P(A)}{P(B|A)\cdot P(A) + P(B|!A)\cdot P(!A)} = \frac{0.99\cdot10^{-5}}{0.99\cdot10^{-5} + 0.01\cdot(1-10^{-5})} = 0.1\%
$$

## Exercise 4

$$
p(x) = \frac{1}{Z}e^{-\frac{x^TAx}{2}}
$$

## Exercise 5

In [45]:
theta = 1.24

X = np.linspace(0, 3, 1000).reshape(-1, 1)
y = np.sin(theta*X)

In [60]:
def cross_val(cv, alphas, steps, X, Y):
    best = []
    
    gd_mse = pd.DataFrame(columns=['alpha', 'step', 'mse', 'theta'])
    
    rs = ShuffleSplit(n_splits=cv, random_state=45)
    for alpha in alphas:
        for step in steps:
            errors = []
            for Xtest_index in rs.split(X):
                X_train, X_test = X[Xtest_index[0]], X[Xtest_index[1]]
                y_train, y_test = Y[Xtest_index[0]], Y[Xtest_index[1]]
                
                fun = lambda theta: step*mse(y_train, np.sin(theta*X_train), squared=False) + step*alpha*np.abs(theta)
                res = minimize(fun, 1, method='BFGS')
                errors.append(mse(y_test, np.sin(res.x*X_test), squared=False))
            new_row = pd.Series({'alpha': alpha, 'step': step, 'mse': min(errors), 'theta': res.x})
            gd_mse = pd.concat([gd_mse, new_row.to_frame().T], ignore_index=True)
    gd_mse = gd_mse.sort_values(by=['mse'], ascending=True, ignore_index=True)
    return gd_mse.iloc[0]           

In [62]:
cv = 3
alphas = np.linspace(1e-5, 1e-3, 5)
steps = np.linspace(1e-3, 1e-1, 5)
cross_val(cv, alphas, steps, X, Y)

alpha                0.000258
step                   0.0505
mse                       0.0
theta    [1.2399999981798788]
Name: 0, dtype: object

## Exercise 6

In [6]:
df = pd.read_csv('./Steels_kaggle.csv')

X = df[[' C', ' Si', ' Mn', ' P', ' S', ' Ni', ' Cr', ' Mo',
       ' Cu', 'V', ' Al', ' N', ' Temperature (°C)']]
Y = df[' 0.2% Proof Stress (MPa)']

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [9]:
alphas = np.linspace(1e-1, 100, 30)

for alpha in alphas:
    lasso = Lasso(alpha)
    lasso.fit(X_train, y_train)
    print("For alpha =", round(alpha, 3), ":", lasso.coef_)

For alpha = 0.1 : [ -0.39858646   8.60751172  43.42987316 -10.48910529  -0.
  16.74422098  11.92172192  16.70484622  -6.58473399  65.82482567
  15.20034942  -6.52048216 -64.58762278]
For alpha = 3.545 : [ -0.           7.34774404  34.48156118  -5.06301505  -0.
  13.16247989   0.43034795  17.76702573  -0.          67.39796577
  18.41505653  -0.         -60.38261788]
For alpha = 6.99 : [ -0.           5.63528444  28.99163353  -1.77646184  -0.
  15.39085719   0.           8.72437093  -0.          69.84062603
  14.15260356  -0.         -56.52826551]
For alpha = 10.434 : [ -0.           3.83078037  24.39144352  -0.          -0.
  16.89800088   0.           0.          -0.          71.94900224
  10.08680775  -0.         -52.72446168]
For alpha = 13.879 : [ -0.           1.43583532  24.56101659  -0.          -0.
  14.34535678   0.           0.          -0.          69.29747351
   7.62623997  -0.         -49.0470367 ]
For alpha = 17.324 : [ -0.           0.          24.67224048  -0.          -

## Exercise 7

$$
|| y - Xw||_2 \rightarrow min
$$
$$
s.t. ||w||_1 \leq C
$$
Lagrange multipliers method:
$$
L(w, \lambda) = ||y - Xw||_2 - \lambda(C - ||w||_1)
$$
The original problem is dual to the following problem:
$$
g(\lambda) = \max_{\lambda > 0} \left[\min_{\omega}\left(||y - Xw||_2 + \lambda ||w||_1\right) - \lambda C\right]
$$
which is equivalent to
$$
\max_{\lambda > 0}\min_{\omega}\left(||y - Xw||_2 + \lambda ||w||_1 - \lambda C\right)
$$
which is equivalent to
$$
\min_{\omega}\max_{\lambda > 0}\left(||y - Xw||_2 + \lambda ||w||_1 - \lambda C\right)
$$
Finally, for some lambda, the task turned into
$$
\min_{\omega}\left(||y - Xw||_2 + \lambda ||w||_1 - \lambda C\right)
$$
which is equivalent to
$$
\min_{\omega}\left(||y - Xw||_2 + \lambda ||w||_1\right)
$$