# Original DataSet

In [25]:
from spflows.configs_classes.forecasting_configs import ForecastingModelConfig
from spflows.data.datamodules import ForecastingDataModule

from gluonts.dataset.field_names import FieldName
from gluonts.transform import (
    Transformation,
    Chain,
    InstanceSplitter,
    ExpectedNumInstanceSampler,
    ValidationSplitSampler,
    TestSplitSampler,
    RenameFields,
    AsNumpyArray,
    ExpandDimArray,
    AddObservedValuesIndicator,
    AddTimeFeatures,
    VstackFeatures,
    SetFieldIfNotPresent,
    TargetDimIndicator,
)
from gluonts.dataset.multivariate_grouper import MultivariateGrouper
from gluonts.dataset.repository.datasets import get_dataset
from itertools import islice

  from .autonotebook import tqdm as notebook_tqdm


In [26]:
config = ForecastingModelConfig(prefetch_factor=None,
                                batch_size=19)
datamodule = ForecastingDataModule(config)
datamodule.setup()
config, all_datasets = ForecastingDataModule.get_data_and_update_config(config)
training_data,test_data,validation_data = all_datasets
dataset = get_dataset(config.dataset_str_name, regenerate=False)

In [42]:
for idx,value in enumerate(islice(dataset.train,9)):
    print(value)

{'target': array([84.10138 , 93.31797 , 92.1659  , ..., 71.42857 , 52.99539 ,
       52.419353], dtype=float32), 'start': Period('2014-01-14 00:00', 'H'), 'feat_static_cat': array([0]), 'item_id': 0}
{'target': array([175.53192 , 164.89362 , 154.78723 , ..., 128.7234  , 129.78723 ,
       116.489365], dtype=float32), 'start': Period('2014-01-01 00:00', 'H'), 'feat_static_cat': array([1]), 'item_id': 1}
{'target': array([31.993204, 31.28539 , 31.568516, ..., 63.703285, 45.58324 ,
       41.19479 ], dtype=float32), 'start': Period('2014-01-01 00:00', 'H'), 'feat_static_cat': array([2]), 'item_id': 2}
{'target': array([56.265984, 55.20034 , 55.41347 , ..., 60.102303, 57.757885,
       55.839725], dtype=float32), 'start': Period('2014-01-01 00:00', 'H'), 'feat_static_cat': array([3]), 'item_id': 3}
{'target': array([47.902317, 49.624294, 44.45836 , ..., 43.5191  , 45.241077,
       41.0144  ], dtype=float32), 'start': Period('2014-01-01 00:00', 'H'), 'feat_static_cat': array([4]), 'item_id

# Raw From Crypto

In [8]:
from spflows.data.gecko.gecko_requests import (
    get_key
)
from spflows.data.gecko.gecko_metadata import AllCoinsMetadata,CoinMetadata
from spflows.data.gecko.gecko_utils import get_dataframe_with_freq_bitcoin,get_dataframe_with_freq_from_bitcoin

In [9]:
coingecko_key = get_key()
date_string="2024-12-18"

all_coins_metadata = AllCoinsMetadata(date_string=date_string,coingecko_key=coingecko_key)
all_coins_metadata.download_df_timeseries()


100%|██████████| 135/135 [00:01<00:00, 70.80it/s]

Obtained 135 time series. Missing: 0





In [12]:
coins_not_bitcoin = [coin for coin in all_coins_metadata.df_time_series.keys() if coin != "bitcoin"]
df = all_coins_metadata.df_time_series[coins_not_bitcoin[0]]
df_bitcoin = all_coins_metadata.df_time_series["bitcoin"]

In [14]:
df_bitcoin_freq = get_dataframe_with_freq_bitcoin(df_bitcoin)
df_freq = get_dataframe_with_freq_from_bitcoin(df,df_bitcoin_freq)

In [40]:
import math
import random
from abc import abstractmethod
from typing import (
    Callable,
    Dict,
    List,
    NamedTuple,
    Optional,
    Tuple,
    Union,
    cast,
)
import pandas as pd
import numpy as np
from gluonts.dataset.common import (
    BasicFeatureInfo,
    ArtificialDataset,
    CategoricalFeatureInfo,
    DataEntry,
    Dataset,
    ListDataset,
    MetaData,
    TrainDatasets,
)

def metadata(self) -> MetaData:
    return MetaData(
        freq=self.freq.freqstr, prediction_length=self.prediction_length
    )

class ArtificialDataset:
    """
    Parent class of a dataset that can be generated from code.
    """

    def __init__(self, freq) -> None:
        self.freq: BaseOffset = to_offset(freq)


    @property
    @abstractmethod
    def metadata(self) -> MetaData:
        pass

    @property
    @abstractmethod
    def train(self) -> List[DataEntry]:
        pass

    @property
    @abstractmethod
    def test(self) -> List[DataEntry]:
        pass

    # todo return the same type as dataset repo for better usability
    def generate(self) -> TrainDatasets:
        return TrainDatasets(
            metadata=self.metadata,
            train=ListDataset(self.train, self.freq),
            test=ListDataset(self.test, self.freq),
        )

class CoinDataset(ArtificialDataset):
    """
    This dataset is set to behave exactly as the dataset
    obtained from get_dataset in gluon_ts, it will generate 6 dimensions
    index 0-2 bitcoin price, market_caps, total_volumes
    index 3-5 altcoin price, market_caps, total_volumes

    for portfolio creation prediction length is set to 96 hours (4 days)
    freq in hours H
    """
    def __init__(self,
                 coin_id,
                 df_freq,
                 df_bitcoin_freq,
                 prediction_length: int = 96,  # change for days (portfolio sensitivity)
                 freq_str: str = "H",
                 include_market_cap: bool = True,
                 include_volumes: bool = True):
        """
        df_freq: pd.DataFrame
        df_bitcoin_freq: pd.DataFrame
        """
        super().__init__(freq_str)
        self.coin_id = coin_id
        self.df_freq = df_freq
        self.df_bitcoin_freq = df_bitcoin_freq
        self.prediction_length = prediction_length
        self.include_market_cap = include_market_cap
        self.include_volumes = include_volumes
        self._set_data_entries()

    def metadata(self):
        return MetaData(freq=self.freq_str,
                        feat_static_cat=[CategoricalFeatureInfo(name='feat_static_cat_0', cardinality='6')],
                        prediction_length=self.prediction_length)

    def _set_data_entries(self):
        """
        Set data entries for the dataset.
        """
        alt_coin_prices = self.df_freq["prices"].values
        alt_coin_market_caps = self.df_freq["market_caps"].values
        alt_coin_total_volumes = self.df_freq["total_volumes"].values

        bitcoin_prices = self.df_bitcoin_freq["prices"].values
        bitcoin_market_caps = self.df_bitcoin_freq["market_caps"].values
        bitcoin_total_volumes = self.df_bitcoin_freq["total_volumes"].values

        timestamp = self.df_bitcoin_freq.index[0]
        period = timestamp.to_period(freq='H')

        self.data_list = [
            {'target': bitcoin_prices, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 0},
            {'target': bitcoin_market_caps, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 1},
            {'target': bitcoin_total_volumes, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 2},
            {'target': alt_coin_prices, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 3},
            {'target': alt_coin_market_caps, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 4},
            {'target': alt_coin_total_volumes, 'start': period, 'feat_static_cat': np.array([0]), 'item_id': 5}
        ]

    def train(self):
        return self.data_list

    def test(self):
        return self.data_list