In [43]:
# -*- coding: utf-8 -*-

# Advanced AQUA
# Amitava Chakraborty 04 Feb, 2020

from abc import ABC, abstractmethod
import logging
import copy
from enum import Enum

import numpy as np
import fastdtw
import datetime
import random
import pandas as pd



from qiskit.aqua import AquaError
from qiskit.aqua.parser import JSONSchema

class AMCData(Enum):
    """ AMC Data enum """
    NASDAQ = 'NASDAQ'
    NYSE = 'NYSE'
    LONDON = 'XLON'
    EURONEXT = 'XPAR'
    SINGAPORE = 'XSES'
    RANDOM = 'RANDOM'

class DataType(Enum):
    """ Data Type Enum """
    DAILYADJUSTED = 'Daily (adj)'
    DAILY = 'Daily'
    BID = 'Bid'
    ASK = 'Ask'

class BaseDataProvider(ABC):

    CONFIGURATION = None

    def __init__(self):
        self._configuration = copy.deepcopy(self.CONFIGURATION)
        self._data = None
        self._n = 0  # pylint: disable=invalid-name
        self.period_return_mean = None
        self.cov = None
        self.period_return_cov = None
        self.rho = None
        self.mean = None

    @property
    def configuration(self):
        """Return driver configuration."""
        return self._configuration

    def get_mean_vector(self):
        """ Returns a vector containing the mean value of each asset.

        Returns:
            numpy.ndarray: a per-asset mean vector.
        Raises:
            QiskitFinanceError: no data loaded
        """
        try:
            if not self._data:
                raise QiskitFinanceError(
                    'No data loaded, yet. Please run the method run() first to load the data.'
                )
        except AttributeError:
            raise QiskitFinanceError(
                'No data loaded, yet. Please run the method run() first to load the data.'
            )
        self.mean = np.mean(self._data, axis=1)
        return self.mean


    def get_covariance_matrix(self):
        """
        Returns the covariance matrix.

        Returns:
            numpy.ndarray: an asset-to-asset covariance matrix.
        Raises:
            QiskitFinanceError: no data loaded
        """
        try:
            if not self._data:
                raise QiskitFinanceError(
                    'No data loaded, yet. Please run the method run() first to load the data.'
                )
        except AttributeError:
            raise QiskitFinanceError(
                'No data loaded, yet. Please run the method run() first to load the data.'
            )
        self.cov = np.cov(self._data, rowvar=True)
        return self.cov


    def get_similarity_matrix(self):
        """
        Returns time-series similarity matrix computed using dynamic time warping.

        Returns:
            numpy.ndarray: an asset-to-asset similarity matrix.
        Raises:
            QiskitFinanceError: no data loaded
        """
        try:
            if not self._data:
                raise QiskitFinanceError(
                    'No data loaded, yet. Please run the method run() first to load the data.'
                )
        except AttributeError:
            raise QiskitFinanceError(
                'No data loaded, yet. Please run the method run() first to load the data.'
            )
        self.rho = np.zeros((self._n, self._n))
        for i_i in range(0, self._n):
            self.rho[i_i, i_i] = 1.
            for j_j in range(i_i + 1, self._n):
                this_rho, _ = fastdtw.fastdtw(self._data[i_i], self._data[j_j])
                this_rho = 1.0 / this_rho
                self.rho[i_i, j_j] = this_rho
                self.rho[j_j, i_i] = this_rho
        return self.rho


    # gets coordinates suitable for plotting
    def get_coordinates(self):
        """ Returns random coordinates for visualisation purposes. """
        # Coordinates for visualisation purposes
        x_c = np.zeros([self._n, 1])
        y_c = np.zeros([self._n, 1])
        x_c = (np.random.rand(self._n) - 0.5) * 1
        y_c = (np.random.rand(self._n) - 0.5) * 1
        # for (cnt, s) in enumerate(self.tickers):
        # x_c[cnt, 1] = self.data[cnt][0]
        # y_c[cnt, 0] = self.data[cnt][-1]
        return x_c, y_c

In [48]:
"""
Python implementation of provider of mock stock-market data, which are generated pseudo-randomly.
"""
class RandomDataProvider(BaseDataProvider):

    CONFIGURATION = {
        "name": "RND",
        "description": "Pseudo-Random Data Provider",
        "input_schema": {
            "$schema": "http://json-schema.org/draft-07/schema#",
            "id": "rnd_schema",
            "type": "object",
            "properties": {
                "stockmarket": {
                    "type": "string",
                    "default": "RANDOM"
                },
                "datatype": {
                    "type": "string",
                    "default": DataType.DAILYADJUSTED.value,
                    "enum": [DataType.DAILYADJUSTED.value]
                },
            },
        }
    }

    def __init__(self,
                 tickers=None,
                 stockmarket=AMCData.RANDOM,
                 start=datetime.datetime(2016, 1, 1),
                 end=datetime.datetime(2016, 1, 30),
                 seed=None):
        """
        Initializer
        Args:
            tickers (str or list): tickers
            stockmarket (StockMarket): RANDOM
            start (datetime): first data point
            end (datetime): last data point precedes this date
            seed (None or int): shall a seed be used?
        Raises:
            QiskitFinanceError: provider doesn't support stock market value
        """
        super().__init__()
        tickers = tickers if tickers is not None else ["TICKER1", "TICKER2"]
        if isinstance(tickers, list):
            self._tickers = tickers
        else:
            self._tickers = tickers.replace('\n', ';').split(";")
        self._n = len(self._tickers)

        if stockmarket not in [AMCData.RANDOM]:
            msg = "RandomDataProvider does not support "
            msg += stockmarket.value
            msg += " as a stock market. Please use Stockmarket.RANDOM."
            raise QiskitFinanceError(msg)

        # This is to aid serialization; string is ok to serialize
        self._stockmarket = str(stockmarket.value)

        self._start = start
        self._end = end
        self._seed = seed

        # self.validate(locals())

    @staticmethod
    def check_provider_valid():
        """ check provider valid """
        return


    @classmethod
    def init_from_input(cls, section):
        """
        Initialize via section dictionary.

        Args:
            section (dict): section dictionary

        Returns:
            RandomDataProvider: Driver object
        Raises:
            QiskitFinanceError: invalid section
        """
        if section is None or not isinstance(section, dict):
            raise QiskitFinanceError(
                'Invalid or missing section {}'.format(section))

        # params = section
        kwargs = {}
        # for k, v in params.items():
        #    if k == ExchangeDataDriver. ...: v = UnitsType(v)
        #    kwargs[k] = v
        logger.debug('init_from_input: %s', kwargs)
        return cls(**kwargs)


    def run(self):
        """
        Generates data pseudo-randomly, thus enabling get_similarity_matrix
        and get_covariance_matrix methods in the base class.
        """
        self.check_provider_valid()

        length = (self._end - self._start).days
        if self._seed:
            random.seed(self._seed)
            np.random.seed(self._seed)

        self._data = []
        for _ in self._tickers:
            d_f = pd.DataFrame(np.random.randn(length)).cumsum() + random.randint(1, 101)
            #trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values)))
            # pylint: disable=no-member
            self._data.append(d_f[0].values.tolist())

In [49]:
num_assets = 4

# Generate expected return and covariance matrix from (random) time-series
stocks = [("TICKER%s" % i) for i in range(num_assets)]
print(stocks)

data = RandomDataProvider(tickers=stocks,
                 start=datetime.datetime(2016,1,1),
                 end=datetime.datetime(2016,1,30))
data.run()

print(data._data)
mu = data.get_mean_vector()
sigma = data.get_covariance_matrix()
print(mu)
print(sigma)

['TICKER0', 'TICKER1', 'TICKER2', 'TICKER3']
[[40.676914777977615, 40.81360410047081, 43.59764532203081, 44.19992280148693, 44.83579107235393, 45.472445742762886, 45.16075939175839, 45.42441103701594, 45.892911663075296, 45.98149516225794, 45.938808818577364, 46.28405377305013, 46.01941669920792, 45.43451984647188, 45.96423631189036, 44.81997070477337, 44.09323862307913, 44.66783084823986, 46.39524643544894, 47.12946764952326, 46.303747772858536, 47.26301551119812, 47.4538664448419, 46.50454530852074, 45.1621903103311, 44.28883369959056, 43.67963979347095, 44.26265342002197, 44.10355803928345], [47.43767137070511, 47.826928004381486, 48.21023359549811, 47.32985233076143, 46.35751160570506, 47.57247675949396, 49.11847300216837, 49.37586402467523, 50.99982514640744, 51.53593277978653, 52.12474439345043, 51.75687404855906, 50.71522213550291, 50.90926846803761, 52.92493847280018, 53.24159786645905, 52.79841544675899, 52.06203917789831, 53.92933870530793, 56.17234710788445, 57.4894324114881

In [17]:
d_f = pd.DataFrame(np.random.randn(30)).cumsum() + random.randint(0, 100)
d_f

Unnamed: 0,0
0,42.789062
1,42.171625
2,43.090295
3,42.878029
4,44.053644
5,45.612543
6,44.656493
7,44.605337
8,42.192922
9,42.158399


In [22]:
print(d_f[0].values)
print(len(d_f[0].values))
print(np.zeros(len(d_f[0].values)))
trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values)))
print(trimmed)

[42.78906241 42.1716249  43.09029512 42.87802862 44.05364446 45.6125433
 44.65649302 44.60533703 42.19292197 42.15839903 42.09437091 43.50787143
 42.52434234 42.01061136 40.98707932 41.99703942 41.72445846 43.35268969
 43.21019275 44.83520363 45.33604214 45.81039041 46.334724   46.14125478
 45.65377513 45.90745524 45.07606418 44.32239509 45.68123765 46.82310258]
30
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0.]
[42.78906241 42.1716249  43.09029512 42.87802862 44.05364446 45.6125433
 44.65649302 44.60533703 42.19292197 42.15839903 42.09437091 43.50787143
 42.52434234 42.01061136 40.98707932 41.99703942 41.72445846 43.35268969
 43.21019275 44.83520363 45.33604214 45.81039041 46.334724   46.14125478
 45.65377513 45.90745524 45.07606418 44.32239509 45.68123765 46.82310258]


In [29]:
data = []
for _ in ['A','B','C','D']:
    d_f = pd.DataFrame(np.random.randn(20)).cumsum() + random.randint(1, 101)
    print(d_f)
    trimmed = np.maximum(d_f[0].values, np.zeros(len(d_f[0].values)))
    # pylint: disable=no-member
    #data.append(trimmed.tolist())
    data.append(d_f[0].values.tolist())
print(data)

           0
0   5.966168
1   8.028574
2   8.299015
3   7.532092
4   7.034338
5   5.741193
6   6.398797
7   4.032687
8   2.477397
9   2.035517
10  2.049325
11  2.294990
12  1.504101
13  1.937832
14  4.305357
15  4.466205
16  4.457708
17  4.277749
18  3.720283
19  2.784706
            0
0   27.839740
1   28.002015
2   27.985362
3   28.799187
4   28.350199
5   28.451351
6   27.488934
7   27.868486
8   29.999258
9   31.495374
10  32.856157
11  33.392479
12  31.853113
13  32.070191
14  32.188135
15  34.339776
16  33.528232
17  32.721810
18  32.134886
19  32.816522
           0
0   3.330211
1   2.304687
2   1.309525
3   1.593267
4   1.344444
5   0.759439
6   1.196154
7   1.663917
8   1.430703
9   1.010988
10  0.921180
11  0.633229
12  0.550572
13 -0.119307
14 -0.789219
15 -1.859147
16 -3.548846
17 -2.814746
18 -1.440977
19 -1.376008
            0
0   30.759410
1   29.963673
2   30.952481
3   33.620765
4   33.736816
5   34.205434
6   34.398962
7   34.332053
8   36.597096
9   36.592442
10  36

In [40]:
period_returns = np.array(data)[:, 1:] / np.array(data)[:, :-1] - 1
period_return_mean = np.mean(np.array(data), axis=1)
print(period_return_mean)
print(np.mean(data[0]))

[ 4.46720165 30.70906043  0.30500318 34.93924794]
4.4672016454260675
