In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

%load_ext autoreload
%autoreload 2
from strategies import PortfolioType, Universe, Portfolio, Strategy  # noqa: F401


In [2]:
DATA_PATH = "./data/aligned_data/"

In [3]:
TESTY_TICKERS = ["ADBE", "PDCE", "KBAL", "F", "LPL", "SCX", "GOOG"]

In [4]:
prices = pd.read_csv(f"{DATA_PATH}/prices.csv", parse_dates=True, index_col=0).loc[:, TESTY_TICKERS]
returns = pd.read_csv(f"{DATA_PATH}/returns.csv", parse_dates=True, index_col=0).loc[
    :, TESTY_TICKERS
]
volume = pd.read_csv(f"{DATA_PATH}/volume.csv", parse_dates=True, index_col=0).loc[:, TESTY_TICKERS]
ychopmdf = (
    pd.read_csv(f"{DATA_PATH}/ychopmdf.csv", parse_dates=True, index_col=0).loc[:, TESTY_TICKERS]
    / 100
)
asset_turnover = pd.read_csv(f"{DATA_PATH}/asset_turnover.csv", parse_dates=True, index_col=0).loc[
    :, TESTY_TICKERS
]

company_info = pd.read_csv("./data/yf/company_info.csv", index_col=0).loc[TESTY_TICKERS]

In [5]:
def min_max_scale(df: pd.DataFrame) -> pd.DataFrame:
    """
    This function performs min-max scaling on a pandas DataFrame.
    Min-max scaling transforms the numerical values in each column of the DataFrame to a range between 0 and 1, where the minimum value becomes 0 and the maximum value becomes 1. 

    Returns: A pandas DataFrame with the same shape and columns as the input, but with scaled values between 0 and 1.
    """
    df = (df - df.min()) / (df.max() - df.min())
    return df.copy()

In [6]:
diff_vector = (min_max_scale(asset_turnover.T) - min_max_scale(ychopmdf.T)).T
diff_vector

Unnamed: 0_level_0,ADBE,PDCE,KBAL,F,LPL,SCX,GOOG
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-04-01,-0.754744,,0.992637,0.000114,0.517767,0.427518,-0.918095
2012-04-01,-0.883726,,1.0,0.041523,0.169552,0.368196,-0.936986
2013-04-01,-0.697843,-0.326824,1.0,0.277825,,0.473411,-0.767611
2014-04-01,-0.387075,-0.466771,1.0,0.372708,0.738521,,-0.71049
2015-04-01,-0.125283,-0.6905,,0.333698,1.0,0.840059,-0.687525
2016-04-01,-0.335784,-0.36218,0.946685,0.274426,0.844008,0.842522,-0.687112
2017-04-01,-0.863947,,0.831483,-0.205375,0.344927,0.325151,-0.949963
2018-04-01,-0.846349,0.0,0.869976,0.051182,0.164022,,-0.726337
2019-04-01,-0.876161,-0.764628,0.815961,-0.124305,0.248311,0.385436,-0.597855
2020-04-01,-0.85675,-0.852911,0.496998,-0.345679,0.117511,0.026037,-0.680529


In [7]:
prices.to_csv("./tests/test_files/test_prices.csv")
returns.to_csv("./tests/test_files/test_returns.csv")
volume.to_csv("./tests/test_files/test_volume.csv")
ychopmdf.to_csv("./tests/test_files/test_ychopmdf.csv")
asset_turnover.to_csv("./tests/test_files/test_asset_turnover.csv")
diff_vector.to_csv("./tests/test_files/test_diff_vector.csv")
company_info.to_csv("./tests/test_files/test_company_info.csv")

In [8]:
# prices.loc[:, prices.loc["2014-04-01"].isna().values]

In [9]:
universe = Universe(diff_vector, ychopmdf, asset_turnover, returns, volume, company_info)
universe.symbols

['ADBE', 'PDCE', 'KBAL', 'F', 'LPL', 'SCX', 'GOOG']

In [25]:
res = set()
for i in range(2011, 2024):
    res.update(universe.verify_candidates(TESTY_TICKERS, pd.Timestamp(year=i, month=4, day=1)))
res

{'KBAL', 'LPL', 'PDCE', 'SCX'}

In [27]:
res == {'KBAL', 'LPL', 'PDCE', 'SCX'}

True

In [13]:
universe.verify_candidates(TESTY_TICKERS, pd.Timestamp(year=2015, month=4, day=1))

['KBAL']