# Lasso with High Missing Rate

In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

%matplotlib inline
%load_ext autoreload
%autoreload 1

matplotlib.rcParams['font.family'] = 'IPAexGothic'

%config InlineBackend.figure_formats = {'png', 'retina'}


Bad key ""backend" on line 1 in
/Users/masui/.matplotlib/matplotlibrc.
You probably need to get an updated matplotlibrc file from
https://github.com/matplotlib/matplotlib/blob/v3.2.1/matplotlibrc.template
or from the matplotlib source distribution


In [2]:
import sys
sys.path += ['..']

from spmimage.linear_model.hmlasso import HMLasso



ModuleNotFoundError: No module named 'sklearn.externals.joblib'

## Boston House-Price Dataset

In [None]:
from sklearn.datasets import load_boston

boston = load_boston()
X = boston.data
y = boston.target
pd.DataFrame(X[:10], columns=boston.feature_names)

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# split data
test_size = 0.2
random_state = 0
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_size, random_state=random_state
)

y_train_dual = np.array([y_train, y_train+1]).T

# standardize data
X_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
X_test = X_scaler.transform(X_test)
y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(y_train.reshape(-1, 1)).flatten()
y_test = y_scaler.transform(y_test.reshape(-1, 1)).flatten()

## Lasso Regression

### sklearn Lasso

In [None]:
from sklearn.linear_model import Lasso

alpha = 0.1
num_test = 30

lasso = Lasso(alpha=alpha, fit_intercept=True)

In [None]:
%%time
lasso.fit(X_train, y_train)

In [None]:
plt.figure(figsize=(10, 3))
plt.subplot(1,2,1)
plt.plot(lasso.coef_, '.-')
plt.grid()
plt.title('Coefficient')
plt.subplot(1,2,2)
plt.plot(y_test[:num_test], label='data')
plt.plot(lasso.predict(X_test[:num_test]), label='prediction')
# plt.plot(y[:num_test], label='data')
# plt.plot(lasso.predict(X[:num_test]), label='prediction')
plt.legend()
plt.grid()
plt.title(f'Prediction Error: {lasso.score(X_test[:num_test], y_test[:num_test])}')
plt.show()

### HMLasso with no missed data

Given no missed data, then perform as Lasso.

In [None]:
hmlasso = HMLasso(alpha=alpha, mu_coef=1.0, mu_cov=0.1)

In [None]:
%%time
hmlasso.fit(X_train, y_train)
# hmlasso.fit(X, y)

In [None]:
plt.figure(figsize=(10, 3))
plt.subplot(1,2,1)
plt.plot(hmlasso.coef_, '.-')
plt.grid()
plt.title('Coefficient')
plt.subplot(1,2,2)
plt.plot(y_test[:num_test], label='data')
plt.plot(hmlasso.predict(X_test[:num_test]), label='prediction')
# plt.plot(y[:num_test], label='data')
# plt.plot(hmlasso.predict(X[:num_test]), label='prediction')
plt.legend()
plt.grid()
plt.title(f'Prediction Error: {hmlasso.score(X_test[:num_test], y_test[:num_test])}')
# plt.title(f'Prediction Error: {hmlasso.score(X[:num_test], y[:num_test])}')
plt.show()

### HMLasso with randomly missed data

In [None]:
X_randomly_missed = np.copy(X_train)
np.random.seed(0)
rand = np.random.rand(*X_train.shape)

# 25% missed
X_randomly_missed[rand > 0.75] = np.nan

In [None]:
%%time
hmlasso.fit(X_randomly_missed, y_train)

In [None]:
plt.figure(figsize=(10, 3))
plt.subplot(1,2,1)
plt.plot(hmlasso.coef_, '.-')
plt.grid()
plt.title('Coefficient')
plt.subplot(1,2,2)
plt.plot(y_test[:num_test], label='data')
plt.plot(hmlasso.predict(X_test[:num_test]), label='prediction')
plt.legend()
plt.grid()
plt.title(f'Prediction Error: {hmlasso.score(X_test[:num_test], y_test[:num_test])}')
plt.show()

### HMLasso with center missed data

In [None]:
n_samples, n_features = X_train.shape
X_missed = np.copy(X_train)

# center 25% missed
X_missed[int(0.25*n_samples):int(0.75*n_samples), int(0.25*n_features):int(0.75*n_features)] = np.nan

In [None]:
%%time
hmlasso.fit(X_missed, y_train)

In [None]:
plt.figure(figsize=(10, 3))
plt.subplot(1,2,1)
plt.plot(hmlasso.coef_, '.-')
plt.grid()
plt.title('Coefficient')
plt.subplot(1,2,2)
plt.plot(y_test[:num_test], label='data')
plt.plot(hmlasso.predict(X_test[:num_test]), label='prediction')
plt.legend()
plt.grid()
plt.title(f'Prediction Error: {hmlasso.score(X_test[:num_test], y_test[:num_test])}')
plt.show()