# TB- HRP

Start by installing the dependicies

In [1]:
!git clone https://github.com/bkrayfield/TBHRP.git
!pip install -q sec_api

Cloning into 'TBHRP'...
remote: Enumerating objects: 212, done.[K
remote: Counting objects: 100% (23/23), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 212 (delta 8), reused 0 (delta 0), pack-reused 189 (from 1)[K
Receiving objects: 100% (212/212), 3.31 MiB | 6.54 MiB/s, done.
Resolving deltas: 100% (84/84), done.
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.5/66.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
%cd TBHRP

/content/TBHRP


## Preconfiguration here.
You should define your stock price file, the location of your results, a "Run Identifier" (simply a number that can help identify your run, for bootstrapping for example), and the years of analysis you want.

In [3]:
from google.colab import userdata
### Configuration

# Replace the API Key with your own SEC API key here.
# You can purchase or aquire a free API key from https://sec-api.io/
SEC_API_KEY = userdata.get('SEC_API_KEY')

# Data can be use the repo data by default, or you can customize with your own.
INPUT_DATA = "Data/top500Total.csv"
OUTPUT_LOCATION = "Results/"
RUN_IDENTIFIER = "1"
ANALYSIS_YEARS = [2018, 2022]

## The TB-HRP code can be run in two ways

1. Using the SEC-API, which allows for the most up to date SEC fillings to be included in the analysis.
2. Using other external text data formatting in the same style as the SEC-API data. This allows you to employ data outside of the stock market, including startups, housing, etc.

## Using SEC-API

In [4]:
from TBHRP import doHRP

# Execute the analysis
doHRP.run_hrp_analysis(
    api_key=SEC_API_KEY,
    input_csv_path=INPUT_DATA,
    output_dir=OUTPUT_LOCATION,
    run_id=RUN_IDENTIFIER,
    years=ANALYSIS_YEARS,
    sample_size=10,
    window_size=60,
)

Running using preloaded data mode.
Step 1: Loading and preparing price data...
Step 2: Identifying tickers with available 10-K filings...
Step 3: Calculating returns and sampling 10 tickers...
Selected tickers: ['AMD', 'IDXX', 'VLO', 'FDX', 'IP', 'WY', 'AAP', 'HON', 'WFC', 'SPG']
Step 4: Generating text-based similarity matrices...
Step 5: Aligning data and creating rolling windows...
Step 6: Running optimization over all windows...
Step 7: Saving results to disk...
Analysis complete.


## Using Your Own Data (Example files can be found in the "Sample_Input" directory)

In [5]:
from TBHRP import doHRP
import numpy as np # Import numpy

# Execute the analysis
doHRP.run_hrp_analysis(
    api_key="",
    input_csv_path=INPUT_DATA,
    output_dir=OUTPUT_LOCATION,
    run_id=RUN_IDENTIFIER,
    years=ANALYSIS_YEARS,
    sample_size=10,
    window_size=60,
    preloaded_urls_path = r"Sample_Input/sample_index.csv",
    preloaded_texts_path = r"Sample_Input/sample_input.pk",
)

Running using SEC-API.
Step 1: Loading and preparing price data...
Step 2: Identifying tickers with available 10-K filings...
Step 3: Calculating returns and sampling 10 tickers...
Selected tickers: ['BXP', 'BDX', 'BAC', 'WAB', 'ATO', 'MU', 'ITW', 'NOC', 'LLY', 'DHI']
Step 4: Generating text-based similarity matrices...
 Using preloaded filing texts.
  Using preloaded filing URLs.
Step 5: Aligning data and creating rolling windows...
Step 6: Running optimization over all windows...
Step 7: Saving results to disk...
Analysis complete.


## Compute the results from the stored weighted returns.



This section calculates and presents the results from the stored weighted returns.

The calculate_results function takes the calculated weights and pandas DataFrames as input. It then iterates through different result categories ('HRP', 'TBHRP', 'IV', 'EQ', 'MV') and computes the final return for each category over the specified time windows.

The code then loads the saved pandas DataFrames and weights from the "Results/" directory and calls the calculate_results function. Finally, it prints the mean, standard deviation, and Sharpe ratio (mean / standard deviation) for each result category.


In [7]:
import os
import pickle
import numpy as np
import pandas as pd

def calculate_results(weights, pd_frames):
    result_categories = ['HRP', 'TBHRP', 'IV', 'EQ','MV']
    results = {category: [] for category in result_categories}
    num_results = len(weights[0]['HRP'])

    for category in result_categories:
        for weight in weights:
            weight['EQ'] = pd.Series([1/num_results for _ in range(num_results)], index=weight['HRP'].index)

        pd_frames_copy = pd_frames.copy()

        for index in range(len(pd_frames_copy)-1):
            # Set in right order
            pd_frames_copy[index+1] = pd_frames_copy[index+1][weights[index][category].index]

            # Add one to DataFrame
            pd_frames_copy[index+1] += 1

            # Multiply first row
            pd_frames_copy[index+1].iloc[0] *= weights[index][category]

            # Final Return
            results[category].append(pd_frames_copy[index+1].cumprod(axis=0).iloc[-1].sum() - 1)

    return results

home_dir_frames = "Results/"
home_dir_weights = "Results/"

frames_files = [file for file in os.listdir(home_dir_frames) if os.path.isfile(os.path.join(home_dir_frames, file))]
weights_files = [file for file in os.listdir(home_dir_weights) if os.path.isfile(os.path.join(home_dir_weights, file))]

frames_files = [x for x in frames_files if x.find("frames.pk") > 0]
weights_files = [x for x in weights_files if x.find("weights.pk") > 0]

for frame_file, weight_file in zip(frames_files, weights_files):
    with open(os.path.join(home_dir_frames, frame_file), 'rb') as file:
        frame = pickle.load(file)
    with open(os.path.join(home_dir_weights, weight_file), 'rb') as file:
        weights = pickle.load(file)

    results = calculate_results(weights, frame)

    # Create a list to hold the results data
    table_data = []
    for key, values in results.items():
        arr = np.array(values)
        mean_val = np.mean(arr)
        std_val = np.std(arr)

        # Avoid division by zero if standard deviation is 0
        sharpe = mean_val / std_val if std_val != 0 else 0.0

        table_data.append({
            "Strategy": key,
            "Mean Return": mean_val,
            "Std Dev": std_val,
            "Sharpe Ratio": sharpe
        })

    # Create and print the DataFrame
    results_df = pd.DataFrame(table_data)
    print(results_df.round(4))


  Strategy  Mean Return  Std Dev  Sharpe Ratio
0      HRP       0.0367   0.0579        0.6328
1    TBHRP       0.0364   0.0564        0.6460
2       IV       0.0378   0.0602        0.6276
3       EQ       0.0397   0.0679        0.5847
4       MV       0.0332   0.0533        0.6224
