# ML HW1

In [66]:
import numpy as np
import pandas as pd

data_x_df = pd.read_csv('X.csv')
data_t_df = pd.read_csv('T.csv')
data_df = pd.concat([data_x_df, data_t_df], axis=1)
data_df

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
1,7.8,0.880,0.00,2.6,0.098,25.0,67.0,0.99680,3.20,0.68,9.8,5
2,7.8,0.760,0.04,2.3,0.092,15.0,54.0,0.99700,3.26,0.65,9.8,5
3,11.2,0.280,0.56,1.9,0.075,17.0,60.0,0.99800,3.16,0.58,9.8,6
4,7.4,0.700,0.00,1.9,0.076,11.0,34.0,0.99780,3.51,0.56,9.4,5
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,6.2,0.600,0.08,2.0,0.090,32.0,44.0,0.99490,3.45,0.58,10.5,5
1595,5.9,0.550,0.10,2.2,0.062,39.0,51.0,0.99512,3.52,0.76,11.2,6
1596,6.3,0.510,0.13,2.3,0.076,29.0,40.0,0.99574,3.42,0.75,11.0,6
1597,5.9,0.645,0.12,2.0,0.075,32.0,44.0,0.99547,3.57,0.71,10.2,5


In [67]:
dataset = np.array(data_df)
np.random.shuffle(dataset)

train_ratio = 0.9
train_size = int(len(dataset) * train_ratio)

train_feature = dataset[:train_size, :-1]
test_feature = dataset[train_size:, :-1]
train_target = dataset[:train_size, -1]
test_target = dataset[train_size:, -1]

## 1. Feature Selection

In [68]:
def optimal_weight(phi, target):
    return np.linalg.pinv(phi) @ target

### M = 1

$$ y(\textbf{x}, \textbf{w}) = \textit{w}_{0} + \sum_{\textit{i} = 1}^{D} \textit{w}_{i}\textit{x}_{i}  = \textbf{w}^{T} \phi(x) $$
where
$$ \textbf{w} = \begin{pmatrix} \textit{w}_{0} & \textit{w}_{1} & ... & \textit{w}_{D} \end{pmatrix}^{T} $$
$$ \phi(x) = \begin{pmatrix} 1 & \textit{x}_{1} & ... & \textit{x}_{D}  \end{pmatrix}^{T} $$

In [69]:
def phi_m1(features):
    """Computes phi matrix with m = 1.
    
    Args:
        features: np.ndarray with shape (n_samples, n_features)
        
    Returns:
        phi: np.ndarray with shape (n_samples, 1 + n_features)
    """
    m_0 = np.ones((features.shape[0], 1))  # 1
    m_1 = features  # x_1, x_2, ..., x_D
    return np.concatenate((m_0, m_1), axis=-1)

In [70]:
train_phi_m1 = phi_m1(train_feature)
test_phi_m1 = phi_m1(test_feature)

w_m1 = optimal_weight(train_phi_m1, train_target)


### M = 2

$$ y(\textbf{x}, \textbf{w}) = \textit{w}_{0} + \sum_{\textit{i} = 1}^{D} \textit{w}_{i}\textit{x}_{i} + \sum_{\textit{i} = 1}^{D} \sum_{\textit{j} = 1}^{D} \textit{w}_{ij}\textit{x}_{i}\textit{x}_{j} = \textbf{w}^{T} \phi(x) $$
where
$$ \textbf{w} = \begin{pmatrix} \textit{w}_{0} & \textit{w}_{1} & ... & \textit{w}_{D} & \textit{w}_{11} & \textit{w}_{12} & ... & \textit{w}_{DD} \end{pmatrix}^{T} $$
$$ \phi(x) = \begin{pmatrix} 1 & \textit{x}_{1} & ... & \textit{x}_{D} & \textit{x}_{1}\textit{x}_{1} & \textit{x}_{1}\textit{x}_{2} & ... & \textit{x}_{D}\textit{x}_{D} \end{pmatrix}^{T} $$

In [71]:
def phi_m2(features):
    """Computes phi matrix with m = 2.
    
    Args:
        features: np.ndarray with shape (n_samples, n_features)
        
    Returns:
        phi: np.ndarray with shape (n_samples, 1 + n_features + n_features ** 2)
    """
    m_0 = np.ones((features.shape[0], 1))  # 1
    m_1 = features  # x_1, x_2, ..., x_D

    # x_1^2, x_1x_2, ..., x_1x_D, x_2^2, x_2x_3, ..., x_2x_D, ..., x_D^2
    m_2 = np.expand_dims(features, axis=-1) @ np.expand_dims(features, axis=-2)
    m_2 = m_2.reshape(m_2.shape[0], -1)
    return np.concatenate((m_0, m_1, m_2), axis=-1)

In [72]:
train_phi_m2 = phi_m2(train_feature)
test_phi_m2 = phi_m2(test_feature)

w_m2 = optimal_weight(train_phi_m2, train_target)

### Root Mean Square Error

In [73]:
def root_mean_square_error(y, t):
    return np.sqrt(np.mean((y - t)**2))


train_rms_m1 = root_mean_square_error(train_phi_m1 @ w_m1, train_target)
test_rms_m1 = root_mean_square_error(test_phi_m1 @ w_m1, test_target)

train_rms_m2 = root_mean_square_error(train_phi_m2 @ w_m2, train_target)
test_rms_m2 = root_mean_square_error(test_phi_m2 @ w_m2, test_target)

In [74]:
print(f'train rms m1: {train_rms_m1}')
print(f'test rms m1: {test_rms_m1}')
print(f'train rms m2: {train_rms_m2}')
print(f'test rms m2: {test_rms_m2}')

train rms m1: 0.6504869669728687
test rms m1: 0.6052192313519279
train rms m2: 0.6127405219494931
test rms m2: 0.5713169976163589


### Weight Analysis of M = 1

I used the weight of the model to analyze the importance of each feature. The weight of the model is the coefficient of each feature. The larger the weight is, the more important the feature is.

> The weight with index 0 is not considered as a feature because it is the bias of the model.

In [75]:
print(f'w_m1: {w_m1}')
abs_importance = np.abs(w_m1[1:])
importance_rank = np.argsort(abs_importance)[::-1]
print(f'Rank of features\' contribution: {importance_rank}')
print(f'The most important feature: {data_df.columns[importance_rank[0]]}')

w_m1: [ 1.99516583e+01  1.38867580e-02 -1.05943997e+00 -1.33469037e-01
  6.26850093e-03 -2.02141901e+00  5.21098146e-03 -3.06596138e-03
 -1.56233043e+01 -4.82241150e-01  8.46718435e-01  2.87947663e-01]
Rank of features' contribution: [ 7  4  1  9  8 10  2  0  3  5  6]
The most important feature: density


## 2. Maximum Likelihood Approach

In [76]:
def get_mean_and_std(features):
    """Computes the mean and standard deviation of each feature.
    
    Args:
        features: np.ndarray with shape (n_samples, n_features)
        
    Returns:
        mean: np.ndarray with shape (1, n_features)
        std: np.ndarray with shape (1, n_features)
    """
    mean = np.mean(features, axis=0, keepdims=True)
    std = np.std(features, axis=0, keepdims=True)
    return mean, std


def phi_gaussian(features, mean, std):
    """Computes the Gaussian phi.
    
    Args:
        features: np.ndarray with shape (n_samples, n_features)
        mean: np.ndarray with shape (1, n_features)
        std: np.ndarray with shape (1, n_features)
        
    Returns:
        phi: np.ndarray with shape (n_samples, 1 + n_features)
    """
    gaussians = np.exp(-((features - mean) / std)**2 / 2)
    return np.concatenate((np.ones((features.shape[0], 1)), gaussians), axis=-1)

In [77]:
train_mean, train_std = get_mean_and_std(train_feature)
train_phi_gaussian = phi_gaussian(train_feature, train_mean, train_std)
test_phi_gaussian = phi_gaussian(test_feature, train_mean, train_std)

w_gaussian = optimal_weight(train_phi_gaussian, train_target)

In [78]:
train_rms_gaussian = root_mean_square_error(train_phi_gaussian @ w_gaussian, train_target)
test_rms_gaussian = root_mean_square_error(test_phi_gaussian @ w_gaussian, test_target)

print(f'train rms gaussian: {train_rms_gaussian}')
print(f'test rms gaussian: {test_rms_gaussian}')

train rms gaussian: 0.7740550371102947
test rms gaussian: 0.720759769359166


## 3. Maximum A Posteriori Approach