In [1]:
import os, sys

parent = os.path.abspath('..')
sys.path.insert(1, parent)

In [2]:
from helper_funcs.preprocessing import total_timeseries, get_covariates, embeddings_init

from darts.dataprocessing.transformers import StaticCovariatesTransformer
from darts.utils.model_selection import train_test_split
from darts.models import BlockRNNModel

from sklearn.model_selection import cross_val_score
from sklearn import preprocessing
from sklearn import linear_model

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

In [3]:
EXP_MA = 15
TEST_SIZE = 0.2

In [4]:
timeseries = total_timeseries(EXP_MA, market=False, sentiment=True, embeddings=True, large=True)

In [5]:
train, val = train_test_split(
    timeseries,
    axis=1,
    test_size=TEST_SIZE
)

data = [train, val]

target_train, past_train, future_train, target_val, past_val, future_val, target_test, past_test, future_test = get_covariates(
    data,
    target='sentiment_score_2',
    past_covariates=[],
    embeddings=True
    )

In [7]:
companies = ['Apple', 'Amazon', 'Google_1', 'Google_2', 'Microsoft', 'Tesla']
true = []
artificial = []

for i in range(6):
    X = past_train[i].pd_dataframe().reset_index(drop=True).to_numpy()
    X_artificial = np.random.randn(*X.shape)
    y = target_train[i].pd_dataframe().reset_index(drop=True).to_numpy()

    X_val = past_val[i].pd_dataframe().reset_index(drop=True).to_numpy()
    y_val = target_val[i].pd_dataframe().reset_index(drop=True).to_numpy()

    model = linear_model.LinearRegression().fit(X, y)
    print(f'True data for {companies[i]}: {model.score(X, y)}')
    true.append(model.score(X, y))

    model = linear_model.LinearRegression().fit(X_artificial, y)
    print('Artificial data :', model.score(X_artificial, y))
    artificial.append(model.score(X_artificial, y))

    print("------------------------------------")

True data for Apple: 0.9313945900206475
Artificial data : 0.7207027661160836
------------------------------------
True data for Amazon: 0.9310057918357457
Artificial data : 0.7063353494010232
------------------------------------
True data for Google_1: 0.9133097275013186
Artificial data : 0.7122631474516651
------------------------------------
True data for Google_2: 0.9146352157316393
Artificial data : 0.7569013702875348
------------------------------------
True data for Microsoft: 0.9228723506769817
Artificial data : 0.7330599691191713
------------------------------------
True data for Tesla: 0.9635194158656059
Artificial data : 0.7389078215619178
------------------------------------


In [None]:
del true[3]

In [None]:
del artificial[3]

In [None]:
df = pd.DataFrame(
    list(zip(true, artificial)),
    columns = ['True', 'Artificial'],
    index = ['Apple', 'Amazon', 'Google', 'Microsoft', 'Tesla']
)

In [None]:
print(df.to_latex())