# Robustness

## 1.1 강건성이 필요한 이유

In [None]:
from random import random
from random import randint
from random import seed
from numpy import arange
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import HuberRegressor
from matplotlib import pyplot as plt

def get_dataset():
    X, y = make_regression(n_samples=100, n_features=1, tail_strength=0.9, effective_rank=1, n_informative=1, noise=3, bias=50, random_state=1)
    seed(1)
    for i in range(10):
        factor = randint(2, 4)
        if random() > 0.5:
            X[i] += factor * X.std()
        else:
            X[i] -= factor * X.std()
    return X, y

def get_models():
    models = list()
    models.append(LinearRegression())
    models.append(HuberRegressor())
    return models

def plot_best_fit(X, y, xaxis, model):
    model.fit(X, y)
    yaxis = model.predict(xaxis.reshape((len(xaxis), 1)))
    plt.plot(xaxis, yaxis, label=type(model).__name__)

In [None]:
X, y = get_dataset()
xaxis = arange(X.min(), X.max(), 0.01)
for model in get_models():
    plot_best_fit(X, y, xaxis, model)
plt.scatter(X, y)
plt.title('Robust Regression')
plt.legend()
plt.show()

In [None]:
import time
from IPython.display import clear_output


colormap = np.array(['b', 'r'])
cate=np.array([0 for _ in range(len(X))])

new_x,new_y=[0.5,0.51,0.5137,0.535,0.57,0.49],[0,0.5,-1,-1.9,0.05,0.03]

for nx,ny in zip(new_x,new_y):
    cate=np.append(cate,1)
    X,y=np.append(X,[[nx]], axis=0),np.append(y,[ny], axis=0)
    xaxis = arange(X.min(), X.max(), 0.01)
    for model in get_models():
        plot_best_fit(X, y, xaxis, model)
    plt.scatter(X, y, c=colormap[cate])
    plt.title('Robust Regression')
    plt.legend()
    plt.show()
    time.sleep(2)
    clear_output(wait=True)

## 1.2 Least Quantile of Squares

In [None]:
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [None]:
X.resize(X.shape[0])
sns.scatterplot(x=X,y=y)

In [None]:
sns.distplot(y)

In [None]:
from sklearn.linear_model import LinearRegression

X=X.reshape(-1,1)
lm_model = LinearRegression()
lm_model.fit(X,y)
y_pred = lm_model.predict(X)

plt.scatter(X, y, color ='gray')
plt.plot(X, y_pred, color ='red', linewidth=2)
plt.show()

In [None]:
import pandas as pd

X=X.reshape(-1,1)
df=pd.DataFrame([X.reshape(-1),y],index=['X','y']).transpose()
mod = smf.quantreg('y ~ X', df)
quantiles = np.arange(.05,.96,.1)

model_list = []
pred_dict = {}

In [None]:
def fit_model(q):
  res = mod.fit(q=q)
  return [q, res.params['Intercept'], res.params['X']] + res.conf_int().loc['X'].tolist()

models = [fit_model(x) for x in quantiles]
models = pd.DataFrame(models, columns=['q', 'a', 'b', 'lb', 'ub'])

In [None]:
ols = smf.ols('y ~ X', df).fit()
ols_ci = ols.conf_int().loc['X'].tolist()
ols = dict(a = ols.params['Intercept'],
           b = ols.params['X'],
           lb = ols_ci[0],
           ub = ols_ci[1])

print(models)
print(ols)

In [None]:
x = np.arange(X.min(), X.max(), 0.01)
get_y = lambda a, b: a + b * x

fig, ax = plt.subplots(figsize=(8, 6))

for i in range(models.shape[0]):
    y = get_y(models.a[i], models.b[i])
    ax.plot(x, y, linestyle='dotted', label= str(round(0.05 + i*0.1, 2)))

y = get_y(ols['a'], ols['b'])
ax.plot(x, y, color='red', label='OLS')
ax.scatter(x, y, alpha=.2, color='grey')
legend = ax.legend()
ax.set_xlabel('X', fontsize=16)
ax.set_ylabel('y', fontsize=16)

In [None]:
n = models.shape[0]
p1 = plt.plot(models.q, models.b, color='black', label='Quantile Reg.')
p2 = plt.plot(models.q, models.ub, linestyle='dotted', color='black')
p3 = plt.plot(models.q, models.lb, linestyle='dotted', color='black')
p4 = plt.plot(models.q, [ols['b']] * n, color='red', label='OLS')
p5 = plt.plot(models.q, [ols['lb']] * n, linestyle='dotted', color='red')
p6 = plt.plot(models.q, [ols['ub']] * n, linestyle='dotted', color='red')
plt.ylabel(r'$\beta_{X}$')
plt.xlabel('Quantiles distribution')
plt.legend()
plt.show()

## 1.3 강건성 시뮬레이션

In [None]:
import numpy as np

# price with normal dist
ret=np.random.normal(size=(999,100))
pf_ls,pf_rank,pf_trim,pf_winz=np.array([]),np.array([]),np.array([]),np.array([])

In [None]:
def long_short(i,pf_ls):
    return np.append(pf_ls,(pf_ls[:,-1]*(1+i/100)).reshape(-1,1),axis=1)

def ranking(i,pf_rank)

In [None]:
for i in ret:

    price_nd=np.append(price_nd,(price_nd[:,-1]*(1+i/100)).reshape(-1,1),axis=1)