In [213]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d, UnivariateSpline, Rbf, make_interp_spline
from sklearn.linear_model import LinearRegression

%matplotlib inline

In [214]:
try:
    SD = pd.read_csv("../data/Stock/stock.csv", skiprows=[1],index_col=0)
except :
    print('Error: stock.csv was empty. Please Run tools/updata_stock.py')

In [215]:
# SDM stock_data_masked
mask_percent = 0.3
title = SD.columns
SD = SD.to_numpy()
SDM = SD.copy()

MASK = np.random.choice([True, False], size=SDM.shape, p=[mask_percent, 1-mask_percent])
MASK[0, :] = MASK[-1, :] = False
SDM = np.ma.masked_array(SDM, mask=MASK)

model = LinearRegression()
inter_func = {"interp1d": interp1d,
              "UnivariateSpline" : UnivariateSpline, 
              "Rbf" : Rbf, 
              "make_interp_spline" : make_interp_spline,
              "LinearRegression": model}
stock_avg = np.mean(SD, axis=0)

In [216]:
find_mask = lambda array : np.nonzero(array)[0]
DIS = lambda ground, inter : np.around(np.sum(np.abs(ground-inter)), decimals = 2)

def INTER(sdm, mask, func = interp1d):
    y_axis = np.delete(sdm, mask)
    interp_func = func(find_mask(~mask), y_axis)
    mask_val = interp_func(find_mask(mask))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

def LR(sdm, mask, func):
    y_axis = np.delete(sdm, mask)
    model = LinearRegression()
    model.fit(find_mask(~mask).reshape(-1, 1), y_axis)
    mask_val = model.predict(find_mask(mask).reshape(-1, 1))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

In [217]:
loss_table = pd.DataFrame(columns=title, index=[])
for inter_name, func in inter_func.items():
    loss_queue = dict()
    for col, col_name in enumerate(title):
        if inter_name == "LinearRegression":
            inter_res = LR(SDM[:, col].copy(), MASK[:, col].copy(), func)
        else:
            inter_res = INTER(SDM[:, col].copy(), MASK[:, col].copy(), func)
        inter_dis = DIS(SD[:, col], inter_res)
        loss_queue[col_name] = inter_dis
    loss_queue = pd.DataFrame(loss_queue, columns=title, index=[inter_name])
    loss_table = pd.concat([loss_table, loss_queue])
# loss_table = loss_table.div(stock_avg, axis=1)
print(loss_table)

                        meta     goog      amzn      nflx      aapl
interp1d             1400.98   449.24    676.23   3183.65    531.46
UnivariateSpline     1656.38   676.84    842.62   3646.56    752.65
Rbf                  1681.88   528.68    775.90   3688.64    655.26
make_interp_spline   1650.13   525.88    761.38   3670.08    642.72
LinearRegression    17097.71  9640.22  12840.02  51523.70  16774.53


In [211]:
stock_avg = np.mean(SD, axis=0)/1000
print(stock_avg)

meta    0.137486
goog    0.099762
amzn    0.116815
nflx    0.235006
aapl    0.145352
dtype: float64
