<a href="https://colab.research.google.com/github/brownian-explorer/securities-master/blob/main/access.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [50]:
import pandas as pd
import os

from google.colab import drive
from datetime import datetime
# drive.mount('/content/drive')

In [51]:
class YahooRetriever:
    def __init__(self):
        self.file_path = "/content/drive/MyDrive/findata/OHLC_yahoo"

    def retrieve_single_ohlc(self, ticker, start_date, end_date):
        start_date = datetime.strptime(start_date, "%d/%m/%Y")
        end_date = datetime.strptime(end_date, "%d/%m/%Y")
        years = range(start_date.year, end_date.year + 1)
        dfs = []

        for year in years:
            file_path = f"{self.file_path}/{ticker[0].upper()}/{ticker.upper()}/{year}.csv"
            try:
                df = pd.read_csv(file_path)
                df['Date'] = pd.to_datetime(df['Date'])  # Assuming the date column is named 'Date'
                mask = (df['Date'] >= start_date) & (df['Date'] <= end_date)
                dfs.append(df.loc[mask])
            except FileNotFoundError:
                print(f"File not found for the year {year}")

        if not dfs:
            print("No data found within the specified date range.")
            return None

        combined_df = pd.concat(dfs, ignore_index=True)
        return combined_df

    def retrieve_multi_ohlc(self, tickers, start_date, end_date, ohlc):
        ohlc = ohlc[0].upper() + ohlc[1:].lower()
        df = pd.DataFrame()

        for i, ticker in enumerate(tickers):
          ticker = ticker.upper()
          single_data = self.retrieve_single_ohlc(ticker, start_date, end_date)
          if i == 0 and not single_data.empty:
              df["Date"] = single_data["Date"]
              df[ticker] = single_data["Close"]
          elif i > 0 and not single_data.empty:
              df[ticker] = single_data["Close"]
          else:
              print(f"No data found for {ticker}")
              continue
        return df


In [52]:
aapl = YahooRetriever()

In [53]:
df = aapl.retrieve_multi_ohlc(["amd", "nvda"], "01/01/2017", "01/11/2023", "close")

In [54]:
import numpy as np
import pandas as pd
import datetime
import collections
import math
import pytz
import scipy.stats as st

from numba import njit

In [78]:
FROMDATE = datetime.datetime(2017, 1, 1, tzinfo=pytz.utc)
TODATE = datetime.datetime(2023, 11, 1, tzinfo=pytz.utc)
PERIOD = 100
CASH = 100000
COMMPERC = 0.005  # 0.5%
ORDER_PCT1 = 0.1
ORDER_PCT2 = 0.1
UPPER = st.norm.ppf(1 - 0.05 / 2)
LOWER = -st.norm.ppf(1 - 0.05 / 2)
MODE = 'log_return'  # OLS, log_return

In [79]:
@njit
def rolling_logret_zscore_nb(a, b, period):
    """Calculate the log return spread."""
    spread = np.full_like(a, np.nan, dtype=np.float_)
    spread[1:] = np.log(a[1:] / a[:-1]) - np.log(b[1:] / b[:-1])
    zscore = np.full_like(a, np.nan, dtype=np.float_)
    for i in range(a.shape[0]):
        from_i = max(0, i + 1 - period)
        to_i = i + 1
        if i < period - 1:
            continue
        spread_mean = np.mean(spread[from_i:to_i])
        spread_std = np.std(spread[from_i:to_i])
        zscore[i] = (spread[i] - spread_mean) / spread_std
    return spread, zscore

@njit
def ols_spread_nb(a, b):
    """Calculate the OLS spread."""
    a = np.log(a)
    b = np.log(b)
    _b = np.vstack((b, np.ones(len(b)))).T
    slope, intercept = np.dot(np.linalg.inv(np.dot(_b.T, _b)), np.dot(_b.T, a))
    spread = a - (slope * b + intercept)
    return spread[-1]

@njit
def rolling_ols_zscore_nb(a, b, period):
    """Calculate the z-score of the rolling OLS spread."""
    spread = np.full_like(a, np.nan, dtype=np.float_)
    zscore = np.full_like(a, np.nan, dtype=np.float_)
    for i in range(a.shape[0]):
        from_i = max(0, i + 1 - period)
        to_i = i + 1
        if i < period - 1:
            continue
        spread[i] = ols_spread_nb(a[from_i:to_i], b[from_i:to_i])
        spread_mean = np.mean(spread[from_i:to_i])
        spread_std = np.std(spread[from_i:to_i])
        zscore[i] = (spread[i] - spread_mean) / spread_std
    return spread, zscore

In [80]:
if MODE == 'OLS':
    vbt_spread, vbt_zscore = rolling_ols_zscore_nb(
        df['AMD'].values,
        df['NVDA'].values,
        PERIOD
    )
elif MODE == 'log_return':
    vbt_spread, vbt_zscore = rolling_logret_zscore_nb(
        df['AMD'].values,
        df['NVDA'].values,
        PERIOD
    )
else:
    raise ValueError("Unknown mode")
vbt_spread = pd.Series(vbt_spread, index=df['Date'], name='spread')
vbt_zscore = pd.Series(vbt_zscore, index=df['Date'], name='zscore')

In [81]:
vbt_short_signals = (vbt_zscore > UPPER).rename('short_signals')
vbt_long_signals = (vbt_zscore < LOWER).rename('long_signals')

In [82]:
vbt_short_signals, vbt_long_signals = pd.Series.vbt.signals.clean(
    vbt_short_signals, vbt_long_signals, entry_first=False, broadcast_kwargs=dict(columns_from='keep'))

def plot_spread_and_zscore(spread, zscore):
    fig = vbt.make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.05)
    spread.vbt.plot(add_trace_kwargs=dict(row=1, col=1), fig=fig)
    zscore.vbt.plot(add_trace_kwargs=dict(row=2, col=1), fig=fig)
    vbt_short_signals.vbt.signals.plot_as_exit_markers(zscore, add_trace_kwargs=dict(row=2, col=1), fig=fig)
    vbt_long_signals.vbt.signals.plot_as_entry_markers(zscore, add_trace_kwargs=dict(row=2, col=1), fig=fig)
    fig.update_layout(height=500)
    fig.add_shape(
        type="rect",
        xref='paper',
        yref='y2',
        x0=0,
        y0=UPPER,
        x1=1,
        y1=LOWER,
        fillcolor="gray",
        opacity=0.2,
        layer="below",
        line_width=0,
    )
    return fig

plot_spread_and_zscore(vbt_spread, vbt_zscore).show()

In [83]:
# Build percentage order size
symbol_cols = pd.Index(["AMD", "NVDA"], name='symbol')
vbt_order_size = pd.DataFrame(index=df["Date"], columns=symbol_cols)
vbt_order_size["AMD"] = np.nan
vbt_order_size["NVDA"] = np.nan
vbt_order_size.loc[vbt_short_signals, "AMD"] = -ORDER_PCT1
vbt_order_size.loc[vbt_long_signals, "AMD"] = ORDER_PCT1
vbt_order_size.loc[vbt_short_signals, "NVDA"] = ORDER_PCT2
vbt_order_size.loc[vbt_long_signals, "NVDA"] = -ORDER_PCT2

# Execute at the next bar
vbt_order_size = vbt_order_size.vbt.fshift(1)

print(vbt_order_size[~vbt_order_size.isnull().any(axis=1)])

symbol      AMD  NVDA
Date                 
2017-06-22 -0.1   0.1
2017-10-26  0.1  -0.1
2017-11-07 -0.1   0.1
2018-02-12  0.1  -0.1
2018-02-13 -0.1   0.1
2018-03-21  0.1  -0.1
2018-04-06 -0.1   0.1
2018-08-01  0.1  -0.1
2018-08-20 -0.1   0.1
2018-09-14  0.1  -0.1
2018-11-19 -0.1   0.1
2019-07-12  0.1  -0.1
2019-08-09 -0.1   0.1
2019-08-20  0.1  -0.1
2019-11-20 -0.1   0.1
2020-01-30  0.1  -0.1
2020-03-12 -0.1   0.1
2020-03-25  0.1  -0.1
2020-07-23 -0.1   0.1
2020-09-03  0.1  -0.1
2021-05-05 -0.1   0.1
2021-08-06  0.1  -0.1
2021-09-28 -0.1   0.1
2021-10-27  0.1  -0.1
2021-11-09 -0.1   0.1
2021-11-19  0.1  -0.1
2022-02-22 -0.1   0.1
2022-03-21  0.1  -0.1
2022-05-05 -0.1   0.1
2022-10-10  0.1  -0.1
2022-12-28 -0.1   0.1
2023-01-18  0.1  -0.1
2023-02-02 -0.1   0.1
2023-02-24  0.1  -0.1
2023-05-05 -0.1   0.1
2023-05-26  0.1  -0.1
2023-06-07 -0.1   0.1
2023-06-14  0.1  -0.1
2023-09-05 -0.1   0.1
2023-10-24  0.1  -0.1


In [84]:
# Simulate the portfolio
vbt_close_price = pd.concat((df['AMD'], df['NVDA']), axis=1, keys=symbol_cols)
vbt_open_price = pd.concat((df['AMD'], df['NVDA']), axis=1, keys=symbol_cols)

def simulate_from_orders():
    """Simulate using `Portfolio.from_orders`."""
    return vbt.Portfolio.from_orders(
        vbt_close_price,  # current close as reference price
        size=vbt_order_size,
        price=vbt_open_price,  # current open as execution price
        size_type='targetpercent',
        val_price=vbt_close_price.vbt.fshift(1),  # previous close as group valuation price
        init_cash=CASH,
        fees=COMMPERC,
        cash_sharing=True,  # share capital between assets in the same group
        group_by=True,  # all columns belong to the same group
        call_seq='auto',  # sell before buying
        freq='d'  # index frequency for annualization
    )

vbt_pf = simulate_from_orders()

In [85]:
print(vbt_pf.orders.records_readable)

    Order Id Column  Timestamp         Size       Price        Fees  Side
0          0    AMD 2017-06-22   715.307606   14.380000   51.430617  Sell
1          1   NVDA 2017-06-22   250.830875   39.592499   49.655106   Buy
2          2   NVDA 2017-10-26   464.769219   48.922501  113.688362  Sell
3          3    AMD 2017-10-26  1555.358308   12.010000   93.399268   Buy
4          4    AMD 2017-11-07  1702.704317   12.050000  102.587937  Sell
..       ...    ...        ...          ...         ...         ...   ...
75        75    AMD 2023-06-14   130.647298  127.330002   83.176603   Buy
76        76    AMD 2023-09-05   137.444231  110.779999   76.130358  Sell
77        77   NVDA 2023-09-05    36.095506  485.480011   87.618233   Buy
78        78   NVDA 2023-10-24    34.634390  436.630005   75.612070  Sell
79        79    AMD 2023-10-24   151.025972  101.669998   76.774052   Buy

[80 rows x 7 columns]


In [86]:
print(vbt_pf.stats())

Start                                2017-01-03 00:00:00
End                                  2023-11-01 00:00:00
Period                                1720 days 00:00:00
Start Value                                     100000.0
End Value                                   79408.747315
Total Return [%]                              -20.591253
Benchmark Return [%]                         1202.436503
Max Gross Exposure [%]                          4.004812
Total Fees Paid                               7163.70382
Max Drawdown [%]                               25.021311
Max Drawdown Duration                 1415 days 00:00:00
Total Trades                                          80
Total Closed Trades                                   78
Total Open Trades                                      2
Open Trade PnL                                667.341975
Win Rate [%]                                   51.282051
Best Trade [%]                                 46.892287
Worst Trade [%]                

In [87]:
# Plot portfolio
from functools import partial

def plot_orders(portfolio, column=None, add_trace_kwargs=None, fig=None):
    portfolio.orders.plot(column=column, add_trace_kwargs=add_trace_kwargs, fig=fig)

vbt_pf.plot(subplots=[
    ('symbol1_orders', dict(
        title=f"Orders (AMD)",
        check_is_not_grouped=False,
        plot_func=partial(plot_orders, column="AMD"),
        pass_column=False
    )),
    ('symbol2_orders', dict(
        title=f"Orders (NVDA)",
        check_is_not_grouped=False,
        plot_func=partial(plot_orders, column="NVDA"),
        pass_column=False
    ))
]).show()