In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d, UnivariateSpline, Rbf, make_interp_spline
from sklearn.linear_model import LinearRegression
from tqdm import tqdm
import ffn
%matplotlib inline

In [2]:
find_mask = lambda array : np.nonzero(array)[0]
DIS = lambda ground, inter : np.around(np.sum(np.abs(ground-inter)), decimals = 2)

def INTER(sdm, mask, func = interp1d):
    y_axis = np.delete(sdm, mask)
    interp_func = func(find_mask(~mask), y_axis)
    mask_val = interp_func(find_mask(mask))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

def LR(sdm, mask):
    y_axis = np.delete(sdm, mask)
    model = LinearRegression()
    model.fit(find_mask(~mask).reshape(-1, 1), y_axis)
    mask_val = model.predict(find_mask(mask).reshape(-1, 1))
    sdm[find_mask(mask)] = mask_val
    return np.around(sdm, decimals=2)

inter_func = {"interp1d": interp1d,
              "UnivariateSpline" : UnivariateSpline, 
              "Rbf" : Rbf, 
              "make_interp_spline" : make_interp_spline,
              "LinearRegression": None}


In [3]:
def get_res(day = 500, mask_percent = 0.2, inter_name=None, func = None):
    SD = pd.read_csv("../data/stock.csv", skiprows=[1],index_col=0)
    SD = SD[-day:]
    title = SD.columns
    SD = SD.to_numpy()
    SDM = SD.copy()

    MASK = np.random.choice([True, False], size=SDM.shape, p=[mask_percent, 1-mask_percent])
    MASK[0, :] = MASK[-1, :] = False
    SDM = np.ma.masked_array(SDM, mask=MASK)


    loss_queue = dict()
    for col, col_name in enumerate(title):
        if inter_name == "LinearRegression":
            inter_res = LR(SDM[:, col].copy(), MASK[:, col].copy())
        else:
            inter_res = INTER(SDM[:, col].copy(), MASK[:, col].copy(), func)
        inter_dis = DIS(SD[:, col], inter_res)
        return inter_dis/(day*mask_percent*np.mean(SD[:, col])/100)


In [4]:
output = dict()
def AVG(lst):
    return sum(lst)/len(lst)
for k1 in inter_func.keys():
    output[k1] = dict()
for day in tqdm(range(20, 100, 10)):

    avg = dict()
    for name in inter_func.keys():
        avg[name] = list()
    for _ in range(10):
        for k1, val1 in inter_func.items():
            ret = get_res(day=day,inter_name=k1, func=val1)
            avg[k1].append(ret)
    
    for name in inter_func.keys():
        output[name][day] = AVG(avg[name])
print(output)

  0%|          | 0/8 [00:00<?, ?it/s]


ValueError: Found array with 0 sample(s) (shape=(0, 1)) while a minimum of 1 is required by LinearRegression.

In [None]:
df = pd.DataFrame(output)
# df = df.drop(columns=['LinearRegression'])
print(df)

In [None]:
print(inter_func.keys())

In [None]:
df.index.name = 'date'
df.to_csv("./mask.csv")
sns.lineplot(data=df[-200:],dashes=False)
# sns.lineplot(data=Risk_Table[-200:],dashes=False)
plt.show()

In [None]:
prices = pd.read_csv("../data/Stock/stock.csv", index_col=0)
# length = [100, 200, 500, 1000, 2000]
Risk_Table = pd.DataFrame(columns=prices.columns)
import ffn
for l in tqdm(range(10, 1000, 5)):
    test = prices.iloc[-l:]
    Dnext = test[ 1:].reset_index(drop=True)
    Dthis = test[:-1].reset_index(drop=True)
    Risk_each_day = (Dnext - Dthis)**2
    Risk_square = Risk_each_day.sum(axis=0)/test.shape[0]
    Risk = Risk_square**(1/2)
    Risk.name = f"{l}"
    Risk_Table = pd.concat([Risk_Table, Risk.to_frame().T])
print(Risk_Table)

In [None]:
# Risk_Table.index.name = 'date'
Risk_Table = Risk_Table.rolling(5).mean()
print(Risk_Table)
sns.lineplot(data=Risk_Table,dashes=False)
plt.gca().invert_xaxis()

In [None]:

Risk_Table.to_csv("./risk.csv")

In [None]:
print(output)

In [None]:
# SDM stock_data_masked
mask_percent = 0.3
title = SD.columns
SD = SD.to_numpy()
SDM = SD.copy()

MASK = np.random.choice([True, False], size=SDM.shape, p=[mask_percent, 1-mask_percent])
MASK[0, :] = MASK[-1, :] = False
SDM = np.ma.masked_array(SDM, mask=MASK)

model = LinearRegression()
inter_func = {"interp1d": interp1d,
              "UnivariateSpline" : UnivariateSpline, 
              "Rbf" : Rbf, 
              "make_interp_spline" : make_interp_spline,
              "LinearRegression": model}
stock_avg = np.mean(SD, axis=0)