In [159]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d, UnivariateSpline, Rbf, make_interp_spline
from sklearn.linear_model import LinearRegression

%matplotlib inline

In [202]:
try:
    SD = pd.read_csv("../data/Stock/stock.csv", skiprows=[1],index_col=0)
except :
    print('Error: stock.csv was empty. Please Run tools/updata_stock.py')

              meta      goog      amzn      nflx      aapl
Date                                                      
2012-05-21   34.03   15.2954   10.9055   10.2486   17.1157
2012-05-22   31.00   14.9639   10.7665    9.6729   16.9842
2012-05-23   32.00   15.1796   10.8640   10.2729   17.3987
2012-05-24   33.03   15.0351   10.7620   10.0386   17.2389
2012-05-25   31.91   14.7330   10.6445   10.0314   17.1465
...            ...       ...       ...       ...       ...
2022-10-03  138.61   99.3000  115.8800  239.0400  142.4500
2022-10-04  140.28  102.4100  121.0900  240.7400  146.1000
2022-10-05  138.98  102.2200  120.9500  236.7300  146.4000
2022-10-06  139.07  102.2400  120.3000  240.0200  145.4300
2022-10-07  133.45   99.5700  114.5600  224.7500  140.0900

[2614 rows x 5 columns]


In [180]:
# SDM stock_data_masked
mask_percent = 0.3
title = SD.columns
SD = SD.to_numpy()
SDM = SD.copy()

MASK = np.random.choice([True, False], size=SDM.shape, p=[mask_percent, 1-mask_percent])
MASK[0, :] = MASK[-1, :] = False
SDM = np.ma.masked_array(SDM, mask=MASK)

model = LinearRegression()
inter_func = {"interp1d": interp1d,
              "UnivariateSpline" : UnivariateSpline, 
              "Rbf" : Rbf, 
              "make_interp_spline" : make_interp_spline,
              "LinearRegression": model}
stock_avg = np.mean(SD, axis=0)

In [197]:
find_mask = lambda array : np.nonzero(array)[0]
DIS = lambda ground, inter : np.around(np.sum(np.abs(ground-inter)), decimals = 2)

def INTER(sdm, mask, func = interp1d):
    y_axis = np.delete(sdm, mask)
    interp_func = func(find_mask(~mask), y_axis)
    mask_val = interp_func(find_mask(mask))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

def LR(sdm, mask, func):
    y_axis = np.delete(sdm, mask)
    model = LinearRegression()
    model.fit(find_mask(~mask).reshape(-1, 1), y_axis)
    mask_val = model.predict(find_mask(mask).reshape(-1, 1))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

In [198]:
loss_table = pd.DataFrame(columns=title, index=[])
for inter_name, func in inter_func.items():
    loss_queue = dict()
    for col, col_name in enumerate(title):
        if inter_name == "LinearRegression":
            inter_res = LR(SDM[:, col].copy(), MASK[:, col].copy(), func)
        else:
            inter_res = INTER(SDM[:, col].copy(), MASK[:, col].copy(), func)
        inter_dis = DIS(SD[:, col], inter_res)
        loss_queue[col_name] = inter_dis
    loss_queue = pd.DataFrame(loss_queue, columns=title, index=[inter_name])
    loss_table = pd.concat([loss_table, loss_queue])
loss_table = loss_table.div(stock_avg, axis=1)
print(loss_table)

[[ 0]
 [ 3]
 [ 5]
 [ 6]
 [ 7]
 [ 9]
 [10]
 [12]
 [15]
 [16]
 [17]
 [19]
 [20]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]]
[10.5829412  10.90253157 11.11559183 11.22212195 11.32865208 11.54171233
 11.64824246 11.86130271 12.18089309 12.28742321 12.39395334 12.60701359
 12.71354372 12.82007384 12.92660397 13.0331341  13.13966422 13.24619435
 13.35272447 13.4592546 ]
[[ 0]
 [ 3]
 [ 6]
 [ 7]
 [ 8]
 [ 9]
 [10]
 [12]
 [13]
 [14]
 [15]
 [16]
 [17]
 [18]
 [21]
 [22]
 [26]
 [27]
 [28]
 [29]]
[0.4819256  0.60972716 0.73752871 0.78012923 0.82272975 0.86533027
 0.90793079 0.99313182 1.03573234 1.07833286 1.12093338 1.1635339
 1.20613442 1.24873493 1.37653649 1.41913701 1.58953908 1.6321396
 1.67474012 1.71734064]
[[ 0]
 [ 1]
 [ 3]
 [ 4]
 [ 8]
 [ 9]
 [10]
 [11]
 [12]
 [14]
 [17]
 [18]
 [19]
 [21]
 [22]
 [23]
 [24]
 [25]
 [26]
 [27]]
[-17.33463893 -17.26813113 -17.13511555 -17.06860775 -16.80257658
 -16.73606878 -16.66956099 -16.6030532  -16.5365454  -16.40352982
 -16.20400643 -16.13749864 -16.070990