In [2]:
import datetime
import numpy as np
import pandas as pd
import pandas_datareader as pdr
from tqdm.auto import tqdm
from sklearn.preprocessing import MinMaxScaler
from utils.leadlag import leadlag #USE THIS LINE FOR JUPYTER NOTEBOOK
#from leadlag import leadlag #USE THIS LINE FOR GOOGLE COLAB

import import_ipynb
from cvae_novol import CVAE


class MarketGenerator:
    def __init__(self, ticker=None, start=datetime.date(1970, 1, 1),
                 end=datetime.date(2021, 5, 14), df = None, filename=None, freq="M"):

        self.ticker = ticker
        self.start = start
        self.end = end
        self.freq = freq
        
        self._load_data(df, filename)

        self._build_dataset()
        self.generator = CVAE(n_latent=8, alpha=0.003)


    def _load_data(self, df, filename):
        if df is not None:
            self.data = df
            self.data.index = pd.to_datetime(self.data.index, unit='D')
            
        elif filename:
            self.data = pd.read_csv(filename)['Close']
            self.data.index = pd.to_datetime(self.data.index, unit='D')
            
        else:
            try:
                self.data = pdr.get_data_yahoo(self.ticker, self.start, self.end)["Close"]
            except:
                raise RuntimeError(f"Could not download data for {self.ticker} from {self.start} to {self.end}.")


        
        self.windows = []
        for _, window in self.data.resample(self.freq):
            values = window.values 
            path = leadlag(values)
            self.windows.append(path)

#         self.data = np.diff(np.log(self.data)) #i added this
 
    def _build_dataset(self):
        # Calculates log-returns of path (no signatures)

        self.orig_logsig = np.array([np.diff(np.log(path[::2, 1])) for path in self.windows])
        #self.orig_logsig = np.array([p for p in self.orig_logsig if len(p) >= 4])
        #steps = min(map(len, self.orig_logsig))
        #self.orig_logsig = np.array([val[:steps] for val in self.orig_logsig])
        self.orig_logsig = [a.tolist() for a in self.orig_logsig[:-1]] #i added this line

#         self.data = [[a] for a in self.data]

        self.scaler = MinMaxScaler(feature_range=(0.00001, 0.99999))
        logsig = self.scaler.fit_transform(self.orig_logsig)
        self.logsigs = logsig[1:]
        self.conditions = logsig[:-1]

        
    def train(self, n_epochs=10000):
        self.generator.train(self.logsigs, self.conditions, n_epochs=n_epochs)

    def generate(self, logsig, n_samples=None, normalised=False, mean=0, var=1):
        generated = self.generator.generate(logsig, n_samples=n_samples, mean=mean, var=var)
        
        if normalised:
            return generated

        if n_samples is None:
            return self.scaler.inverse_transform(generated.reshape(1, -1))[0]
        
        return self.scaler.inverse_transform(generated)
    
    

importing Jupyter notebook from cvae.ipynb


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
