In [6]:
import warnings
import numpy as np
import pandas as pd
from pathlib import Path
import os
import vectorbt as vbt
import io
import sys
from contextlib import redirect_stdout
from datetime import datetime
import math
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import warnings
warnings.filterwarnings('ignore')

# Add project root to path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# Import utils with different aliases
from src.utils import csv_exporter as csv_utils
from src.utils import transformations as trans_utils
from src.utils import data_merger as merge_utils
from src.utils import metrics as metric_utils
from src.core.bloomberg_fetcher import fetch_bloomberg_data
from src.utils.transformations import get_ohlc



In [7]:
# Getting all the data 
mapping = {
    ('I05510CA Index', 'INDEX_OAS_TSY_BP'): 'cad_oas',
    ('LF98TRUU Index', 'INDEX_OAS_TSY_BP'): 'us_hy_oas',
    ('LUACTRUU Index', 'INDEX_OAS_TSY_BP'): 'us_ig_oas',
    ('SPTSX Index', 'PX_LAST'): 'tsx',
    ('VIX Index', 'PX_LAST'): 'vix',
}

# Calculate dates
end_date = datetime.now().strftime('%Y-%m-%d')
start_date ='2002-01-01'

# Fetch the data
df = fetch_bloomberg_data(
    mapping=mapping,
    start_date=start_date,
    end_date=end_date,
    periodicity='D',
    align_start=True
).dropna()

# Getting all the er_ytd data 
mapping1 = {
    ('I05510CA Index', 'INDEX_EXCESS_RETURN_YTD'): 'cad_ig_er',
    ('LF98TRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_hy_er',
    ('LUACTRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_ig_er',
}

# Fetch the er_ytd_data
df1 = fetch_bloomberg_data(
    mapping=mapping1,
    start_date=start_date,
    end_date=end_date,
    periodicity='D',
    align_start=True
).dropna()

# Convert er_ytd data to an index
df2 = trans_utils.convert_er_ytd_to_index(df1[['cad_ig_er','us_hy_er','us_ig_er']])
final_df = merge_utils.merge_dfs(df, df2, fill='ffill', start_date_align='yes')

# Handle bad data point for cad_oas on Nov 15 2005
bad_date = '2005-11-15'
if bad_date in final_df.index:
    final_df.loc[bad_date, 'cad_oas'] = final_df.loc[final_df.index < bad_date, 'cad_oas'].iloc[-1]

def calculate_qtd_changes(df):
    df_copy = df.copy()
    df_copy.index = pd.to_datetime(df_copy.index)
    
    qtd_changes = pd.DataFrame(index=df_copy.index)
    for col in df_copy.columns:
        changes = []
        for date in df_copy.index:
            # Get the start of the current quarter
            current_quarter_start = date.to_period('Q').start_time
            
            # Get the end of previous quarter (last day of previous quarter)
            prev_quarter_end = current_quarter_start - pd.Timedelta(days=1)
            
            # Get the value at the end of previous quarter
            mask = df_copy.index <= prev_quarter_end
            if not mask.any():
                changes.append(np.nan)
                continue
            
            prev_quarter_value = df_copy[col][mask].iloc[-1]
            current_value = df_copy[col][date]
            
            # Calculate change
            pct_change = (current_value / prev_quarter_value - 1) * 100
            changes.append(pct_change)
            
        qtd_changes[f'{col}_qtd'] = changes
    return qtd_changes

def calculate_ytd_changes(df):
    df_copy = df.copy()
    df_copy.index = pd.to_datetime(df_copy.index)
    
    ytd_changes = pd.DataFrame(index=df_copy.index)
    for col in df_copy.columns:
        changes = []
        for date in df_copy.index:
            # Get the start of the current year
            current_year_start = date.to_period('Y').start_time
            
            # Get the end of previous year (last day of previous year)
            prev_year_end = current_year_start - pd.Timedelta(days=1)
            
            # Get the value at the end of previous year
            mask = df_copy.index <= prev_year_end
            if not mask.any():
                changes.append(np.nan)
                continue
            
            prev_year_value = df_copy[col][mask].iloc[-1]
            current_value = df_copy[col][date]
            
            # Calculate change
            pct_change = (current_value / prev_year_value - 1) * 100
            changes.append(pct_change)
            
        ytd_changes[f'{col}_ytd'] = changes
    return ytd_changes

def add_all_changes(df):
    # Make a copy to avoid modifying original
    result_df = df.copy()
    result_df.index = pd.to_datetime(result_df.index)
    
    # Calculate regular percentage changes (3M and 1YR)
    for col in df.columns:
        # 3-month change (63 trading days approximation)
        result_df[f'{col}_3m'] = df[col].pct_change(periods=63) * 100
        # 1-year change (252 trading days approximation)
        result_df[f'{col}_1yr'] = df[col].pct_change(periods=252) * 100
    
    # Add QTD and YTD changes
    qtd_df = calculate_qtd_changes(df)
    ytd_df = calculate_ytd_changes(df)
    
    # Combine all changes
    for col in qtd_df.columns:
        result_df[col] = qtd_df[col]
    for col in ytd_df.columns:
        result_df[col] = ytd_df[col]
    
    # Drop any rows with missing values
    result_df = result_df.dropna()
    
    return result_df

def verify_calculations(df, n_random_samples=5):
    """Comprehensive verification of calculations with better edge case handling"""
    print("Running verification checks...")
    print("\n1. Random Date Samples:")
    print("=" * 80)
    
    # Get random dates, excluding the first year of data to ensure we have historical data
    valid_dates = df.index[252:]  # Skip first year
    if len(valid_dates) < n_random_samples:
        n_random_samples = len(valid_dates)
    
    if n_random_samples == 0:
        print("Not enough data for verification")
        return
        
    random_dates = pd.Index(valid_dates).to_series().sample(n=n_random_samples).sort_index().index
    
    for date in random_dates:
        date = pd.to_datetime(date)
        
        # Get period boundaries
        current_quarter_start = date.to_period('Q').start_time
        current_year_start = date.to_period('Y').start_time
        prev_quarter_end = current_quarter_start - pd.Timedelta(days=1)
        prev_year_end = current_year_start - pd.Timedelta(days=1)
        
        print(f"\nDate: {date.strftime('%Y-%m-%d')}")
        print(f"Previous Quarter End: {prev_quarter_end.strftime('%Y-%m-%d')}")
        print(f"Previous Year End: {prev_year_end.strftime('%Y-%m-%d')}")
        
        # Check calculations for TSX
        col = 'tsx'
        
        # Safely get historical values
        try:
            current_value = df[col][date]
            prev_quarter_mask = df.index <= prev_quarter_end
            prev_year_mask = df.index <= prev_year_end
            
            if not prev_quarter_mask.any() or not prev_year_mask.any():
                print(f"Insufficient historical data for date: {date}")
                continue
                
            prev_quarter_value = df[prev_quarter_mask][col].iloc[-1]
            prev_year_value = df[prev_year_mask][col].iloc[-1]
            
            manual_qtd = (current_value / prev_quarter_value - 1) * 100
            manual_ytd = (current_value / prev_year_value - 1) * 100
            
            print(f"\nTSX Values:")
            print(f"Current: {current_value:.2f}")
            print(f"Prev Quarter End: {prev_quarter_value:.2f}")
            print(f"Prev Year End: {prev_year_value:.2f}")
            print(f"QTD% (Manual): {manual_qtd:.2f}%")
            print(f"QTD% (Calculated): {df[f'{col}_qtd'][date]:.2f}%")
            print(f"YTD% (Manual): {manual_ytd:.2f}%")
            print(f"YTD% (Calculated): {df[f'{col}_ytd'][date]:.2f}%")
            
            # Check for significant differences
            qtd_diff = abs(manual_qtd - df[f'{col}_qtd'][date])
            ytd_diff = abs(manual_ytd - df[f'{col}_ytd'][date])
            
            if qtd_diff > 0.01:
                print(f"WARNING: Large QTD difference ({qtd_diff:.4f}%)")
            if ytd_diff > 0.01:
                print(f"WARNING: Large YTD difference ({ytd_diff:.4f}%)")
        except Exception as e:
            print(f"Error processing date {date}: {str(e)}")
            continue
            
    print("\n2. Quarter/Year Boundary Checks:")
    print("=" * 80)
    
    # Find dates near quarter/year boundaries
    quarter_starts = pd.Index(df.index[pd.to_datetime(df.index).is_quarter_start])
    year_starts = pd.Index(df.index[pd.to_datetime(df.index).is_year_start])
    
    if len(quarter_starts) > 0:
        print("\nFirst day of quarter example:")
        first_quarter_date = quarter_starts[len(quarter_starts)//2]  # Take a middle example
        print(f"Date: {first_quarter_date}")
        if f'tsx_qtd' in df.columns:
            print(f"TSX QTD%: {df.loc[first_quarter_date, 'tsx_qtd']:.2f}%")
        else:
            print("TSX QTD% not available")
        
    if len(year_starts) > 0:
        print("\nFirst day of year example:")
        first_year_date = year_starts[len(year_starts)//2]  # Take a middle example
        print(f"Date: {first_year_date}")
        if f'tsx_ytd' in df.columns:
            print(f"TSX YTD%: {df.loc[first_year_date, 'tsx_ytd']:.2f}%")
        else:
            print("TSX YTD% not available")

# Calculate all changes
print("Calculating changes...")
final_df = add_all_changes(final_df)

# Run comprehensive verification
print("\nRunning verification...")
verify_calculations(final_df)

# Show basic info about the final dataset
print("\nFinal Dataset Info:")
print("=" * 80)
final_df.info()

Calculating changes...

Running verification...
Running verification checks...

1. Random Date Samples:

Date: 2006-01-04
Previous Quarter End: 2005-12-31
Previous Year End: 2005-12-31

TSX Values:
Current: 11501.48
Prev Quarter End: 11272.26
Prev Year End: 11272.26
QTD% (Manual): 2.03%
QTD% (Calculated): 2.03%
YTD% (Manual): 2.03%
YTD% (Calculated): 2.03%

Date: 2008-05-15
Previous Quarter End: 2008-03-31
Previous Year End: 2007-12-31

TSX Values:
Current: 14828.06
Prev Quarter End: 13350.13
Prev Year End: 13833.06
QTD% (Manual): 11.07%
QTD% (Calculated): 11.07%
YTD% (Manual): 7.19%
YTD% (Calculated): 7.19%

Date: 2011-04-19
Previous Quarter End: 2011-03-31
Previous Year End: 2010-12-31

TSX Values:
Current: 13736.83
Prev Quarter End: 14116.10
Prev Year End: 13443.22
QTD% (Manual): -2.69%
QTD% (Calculated): -2.69%
YTD% (Manual): 2.18%
YTD% (Calculated): 2.18%

Date: 2018-10-29
Previous Quarter End: 2018-09-30
Previous Year End: 2017-12-31

TSX Values:
Current: 14721.75
Prev Quarter En

In [8]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType

# Create signals based on CAD OAS QTD being negative
signals = final_df['cad_oas_qtd'] < 0

# Create portfolio for our strategy
strategy_pf = vbt.Portfolio.from_signals(
    final_df['cad_ig_er_index'],
    entries=signals,
    exits=~signals,
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent,
    accumulate=False
)

# Create buy & hold portfolio for comparison
bh_pf = vbt.Portfolio.from_holding(
    final_df['cad_ig_er_index'],
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent
)

# Compare performance metrics
print("\nStrategy Performance:")
print(strategy_pf.stats())
print("\nBuy & Hold Performance:")
print(bh_pf.stats())


Strategy Performance:
Start                                2003-09-24 00:00:00
End                                  2025-03-12 00:00:00
Period                                5521 days 00:00:00
Start Value                                     100000.0
End Value                                  163916.288185
Total Return [%]                               63.916288
Benchmark Return [%]                           31.199195
Max Gross Exposure [%]                             100.0
Total Fees Paid                                      0.0
Max Drawdown [%]                                1.967439
Max Drawdown Duration                  508 days 00:00:00
Total Trades                                         169
Total Closed Trades                                  169
Total Open Trades                                      0
Open Trade PnL                                       0.0
Win Rate [%]                                   59.763314
Best Trade [%]                                  7.069267
Worst Tr

In [9]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType

# Create signals based on CAD OAS YTD being negative
signals = final_df['cad_oas_ytd'] < 0

# Create portfolio for our strategy
strategy_pf = vbt.Portfolio.from_signals(
    final_df['cad_ig_er_index'],
    entries=signals,
    exits=~signals,
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent,
    accumulate=False
)

# Create buy & hold portfolio for comparison
bh_pf = vbt.Portfolio.from_holding(
    final_df['cad_ig_er_index'],
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent
)

# Compare performance metrics
print("\nStrategy Performance:")
print(strategy_pf.stats())
print("\nBuy & Hold Performance:")
print(bh_pf.stats())


Strategy Performance:
Start                                2003-09-24 00:00:00
End                                  2025-03-12 00:00:00
Period                                5521 days 00:00:00
Start Value                                     100000.0
End Value                                  155062.518694
Total Return [%]                               55.062519
Benchmark Return [%]                           31.199195
Max Gross Exposure [%]                             100.0
Total Fees Paid                                      0.0
Max Drawdown [%]                                1.839838
Max Drawdown Duration                  485 days 00:00:00
Total Trades                                          83
Total Closed Trades                                   83
Total Open Trades                                      0
Open Trade PnL                                       0.0
Win Rate [%]                                   68.674699
Best Trade [%]                                 16.083912
Worst Tr

In [10]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType

# Create signals based on both CAD OAS QTD and YTD being negative
signals = (final_df['cad_oas_qtd'] < 0) & (final_df['cad_oas_ytd'] < 0)

# Create portfolio for our strategy
strategy_pf = vbt.Portfolio.from_signals(
    final_df['cad_ig_er_index'],
    entries=signals,
    exits=~signals,
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent,
    accumulate=False
)

# Create buy & hold portfolio for comparison
bh_pf = vbt.Portfolio.from_holding(
    final_df['cad_ig_er_index'],
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent
)

# Compare performance metrics
print("\nStrategy Performance:")
print(strategy_pf.stats())
print("\nBuy & Hold Performance:")
print(bh_pf.stats())


Strategy Performance:
Start                                2003-09-24 00:00:00
End                                  2025-03-12 00:00:00
Period                                5521 days 00:00:00
Start Value                                     100000.0
End Value                                  146756.341118
Total Return [%]                               46.756341
Benchmark Return [%]                           31.199195
Max Gross Exposure [%]                             100.0
Total Fees Paid                                      0.0
Max Drawdown [%]                                1.839838
Max Drawdown Duration                  509 days 00:00:00
Total Trades                                         151
Total Closed Trades                                  151
Total Open Trades                                      0
Open Trade PnL                                       0.0
Win Rate [%]                                   57.615894
Best Trade [%]                                  7.069267
Worst Tr

In [11]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType

# Create signals based on either CAD OAS QTD or YTD being negative
signals = (final_df['cad_oas_qtd'] < 0) | (final_df['cad_oas_ytd'] < 0)

# Create portfolio for our strategy
strategy_pf = vbt.Portfolio.from_signals(
    final_df['cad_ig_er_index'],
    entries=signals,
    exits=~signals,
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent,
    accumulate=False
)

# Create buy & hold portfolio for comparison
bh_pf = vbt.Portfolio.from_holding(
    final_df['cad_ig_er_index'],
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent
)

# Compare performance metrics
print("\nStrategy Performance:")
print(strategy_pf.stats())
print("\nBuy & Hold Performance:")
print(bh_pf.stats())


Strategy Performance:
Start                                2003-09-24 00:00:00
End                                  2025-03-12 00:00:00
Period                                5521 days 00:00:00
Start Value                                     100000.0
End Value                                  173193.691715
Total Return [%]                               73.193692
Benchmark Return [%]                           31.199195
Max Gross Exposure [%]                             100.0
Total Fees Paid                                      0.0
Max Drawdown [%]                                1.839838
Max Drawdown Duration                  405 days 00:00:00
Total Trades                                         101
Total Closed Trades                                  101
Total Open Trades                                      0
Open Trade PnL                                       0.0
Win Rate [%]                                   71.287129
Best Trade [%]                                 16.083912
Worst Tr

In [12]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType
import pandas as pd

# Define our indicators and their types
positive_indicators = ['cad_ig_er_index', 'us_hy_er_index', 'us_ig_er_index', 'tsx']
negative_indicators = ['cad_oas', 'us_hy_oas', 'us_ig_oas', 'vix']
intervals = ['3m', '1yr', 'qtd', 'ytd']

# Dictionary to store all portfolios
portfolios = {}

# Create strategies for positive indicators
for indicator in positive_indicators:
    for interval in intervals:
        col_name = f'{indicator}_{interval}'
        strategy_name = f'{indicator}_{interval}_pos'
        signals = final_df[col_name] > 0
        
        portfolios[strategy_name] = vbt.Portfolio.from_signals(
            final_df['cad_ig_er_index'],
            entries=signals,
            exits=~signals,
            freq='1D',
            init_cash=100000,
            size=1.0,
            size_type=SizeType.Percent,
            accumulate=False
        )

# Create strategies for negative indicators
for indicator in negative_indicators:
    for interval in intervals:
        col_name = f'{indicator}_{interval}'
        strategy_name = f'{indicator}_{interval}_neg'
        signals = final_df[col_name] < 0
        
        portfolios[strategy_name] = vbt.Portfolio.from_signals(
            final_df['cad_ig_er_index'],
            entries=signals,
            exits=~signals,
            freq='1D',
            init_cash=100000,
            size=1.0,
            size_type=SizeType.Percent,
            accumulate=False
        )

# Create buy & hold portfolio for comparison
bh_pf = vbt.Portfolio.from_holding(
    final_df['cad_ig_er_index'],
    freq='1D',
    init_cash=100000,
    size=1.0,
    size_type=SizeType.Percent
)
portfolios['buy_and_hold'] = bh_pf

# Create a comparison DataFrame
stats_list = []
for name, pf in portfolios.items():
    stats = pf.stats()
    stats_list.append(pd.Series(stats, name=name))

comparison_df = pd.DataFrame(stats_list)

# Display results
print("\nStrategy Comparison:")
print(comparison_df)


Strategy Comparison:
                             Start        End    Period  Start Value  \
cad_ig_er_index_3m_pos  2003-09-24 2025-03-12 5521 days     100000.0   
cad_ig_er_index_1yr_pos 2003-09-24 2025-03-12 5521 days     100000.0   
cad_ig_er_index_qtd_pos 2003-09-24 2025-03-12 5521 days     100000.0   
cad_ig_er_index_ytd_pos 2003-09-24 2025-03-12 5521 days     100000.0   
us_hy_er_index_3m_pos   2003-09-24 2025-03-12 5521 days     100000.0   
us_hy_er_index_1yr_pos  2003-09-24 2025-03-12 5521 days     100000.0   
us_hy_er_index_qtd_pos  2003-09-24 2025-03-12 5521 days     100000.0   
us_hy_er_index_ytd_pos  2003-09-24 2025-03-12 5521 days     100000.0   
us_ig_er_index_3m_pos   2003-09-24 2025-03-12 5521 days     100000.0   
us_ig_er_index_1yr_pos  2003-09-24 2025-03-12 5521 days     100000.0   
us_ig_er_index_qtd_pos  2003-09-24 2025-03-12 5521 days     100000.0   
us_ig_er_index_ytd_pos  2003-09-24 2025-03-12 5521 days     100000.0   
tsx_3m_pos              2003-09-24 2025-03

In [None]:
# Getting all the data 
mapping = {
    ('I05510CA Index', 'INDEX_OAS_TSY_BP'): 'cad_oas',
    ('LF98TRUU Index', 'INDEX_OAS_TSY_BP'): 'us_hy_oas',
    ('LUACTRUU Index', 'INDEX_OAS_TSY_BP'): 'us_ig_oas',
    ('SPTSX Index', 'PX_LAST'): 'tsx',
    ('VIX Index', 'PX_LAST'): 'vix',
}

# Calculate dates
end_date = datetime.now().strftime('%Y-%m-%d')
start_date ='2002-01-01'

# Fetch the data
df = fetch_bloomberg_data(
    mapping=mapping,
    start_date=start_date,
    end_date=end_date,
    periodicity='D',
    align_start=True
).dropna()

# Getting all the er_ytd data 
mapping1 = {
    ('I05510CA Index', 'INDEX_EXCESS_RETURN_YTD'): 'cad_ig_er',
    ('LF98TRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_hy_er',
    ('LUACTRUU Index', 'INDEX_EXCESS_RETURN_YTD'): 'us_ig_er',
}

# Fetch the er_ytd_data
df1 = fetch_bloomberg_data(
    mapping=mapping1,
    start_date=start_date,
    end_date=end_date,
    periodicity='D',
    align_start=True
).dropna()

# Convert er_ytd data to an index
df2 = trans_utils.convert_er_ytd_to_index(df1[['cad_ig_er','us_hy_er','us_ig_er']])
final_df = merge_utils.merge_dfs(df, df2, fill='ffill', start_date_align='yes')

# Handle bad data point for cad_oas on Nov 15 2005
bad_date = '2005-11-15'
if bad_date in final_df.index:
    final_df.loc[bad_date, 'cad_oas'] = final_df.loc[final_df.index < bad_date, 'cad_oas'].iloc[-1]

def calculate_qtd_changes(df):
    df_copy = df.copy()
    df_copy.index = pd.to_datetime(df_copy.index)
    
    qtd_changes = pd.DataFrame(index=df_copy.index)
    for col in df_copy.columns:
        changes = []
        for date in df_copy.index:
            # Get the start of the current quarter
            current_quarter_start = date.to_period('Q').start_time
            
            # Get the end of previous quarter (last day of previous quarter)
            prev_quarter_end = current_quarter_start - pd.Timedelta(days=1)
            
            # Get the value at the end of previous quarter
            mask = df_copy.index <= prev_quarter_end
            if not mask.any():
                changes.append(np.nan)
                continue
            
            prev_quarter_value = df_copy[col][mask].iloc[-1]
            current_value = df_copy[col][date]
            
            # Calculate change
            pct_change = (current_value / prev_quarter_value - 1) * 100
            changes.append(pct_change)
            
        qtd_changes[f'{col}_qtd'] = changes
    return qtd_changes

def calculate_ytd_changes(df):
    df_copy = df.copy()
    df_copy.index = pd.to_datetime(df_copy.index)
    
    ytd_changes = pd.DataFrame(index=df_copy.index)
    for col in df_copy.columns:
        changes = []
        for date in df_copy.index:
            # Get the start of the current year
            current_year_start = date.to_period('Y').start_time
            
            # Get the end of previous year (last day of previous year)
            prev_year_end = current_year_start - pd.Timedelta(days=1)
            
            # Get the value at the end of previous year
            mask = df_copy.index <= prev_year_end
            if not mask.any():
                changes.append(np.nan)
                continue
            
            prev_year_value = df_copy[col][mask].iloc[-1]
            current_value = df_copy[col][date]
            
            # Calculate change
            pct_change = (current_value / prev_year_value - 1) * 100
            changes.append(pct_change)
            
        ytd_changes[f'{col}_ytd'] = changes
    return ytd_changes

def add_all_changes(df):
    # Make a copy to avoid modifying original
    result_df = df.copy()
    result_df.index = pd.to_datetime(result_df.index)
    
    # Calculate regular percentage changes (3M and 1YR)
    for col in df.columns:
        # 3-month change (63 trading days approximation)
        result_df[f'{col}_3m'] = df[col].pct_change(periods=63) * 100
        # 1-year change (252 trading days approximation)
        result_df[f'{col}_1yr'] = df[col].pct_change(periods=252) * 100
    
    # Add QTD and YTD changes
    qtd_df = calculate_qtd_changes(df)
    ytd_df = calculate_ytd_changes(df)
    
    # Combine all changes
    for col in qtd_df.columns:
        result_df[col] = qtd_df[col]
    for col in ytd_df.columns:
        result_df[col] = ytd_df[col]
    
    # Drop any rows with missing values
    result_df = result_df.dropna()
    
    return result_df

def verify_calculations(df, n_random_samples=5):
    """Comprehensive verification of calculations"""
    print("Running verification checks...")
    print("\n1. Random Date Samples:")
    print("=" * 80)
    
    # Get random dates
    random_dates = df.sample(n=n_random_samples).index.sort_values()
    
    for date in random_dates:
        date = pd.to_datetime(date)
        
        # Get period boundaries
        current_quarter_start = date.to_period('Q').start_time
        current_year_start = date.to_period('Y').start_time
        prev_quarter_end = current_quarter_start - pd.Timedelta(days=1)
        prev_year_end = current_year_start - pd.Timedelta(days=1)
        
        print(f"\nDate: {date.strftime('%Y-%m-%d')}")
        print(f"Previous Quarter End: {prev_quarter_end.strftime('%Y-%m-%d')}")
        print(f"Previous Year End: {prev_year_end.strftime('%Y-%m-%d')}")
        
        # Check calculations for TSX
        col = 'tsx'
        current_value = df[col][date]
        prev_quarter_value = df[df.index <= prev_quarter_end][col].iloc[-1]
        prev_year_value = df[df.index <= prev_year_end][col].iloc[-1]
        
        manual_qtd = (current_value / prev_quarter_value - 1) * 100
        manual_ytd = (current_value / prev_year_value - 1) * 100
        
        print(f"\nTSX Values:")
        print(f"Current: {current_value:.2f}")
        print(f"Prev Quarter End: {prev_quarter_value:.2f}")
        print(f"Prev Year End: {prev_year_value:.2f}")
        print(f"QTD% (Manual): {manual_qtd:.2f}%")
        print(f"QTD% (Calculated): {df[f'{col}_qtd'][date]:.2f}%")
        print(f"YTD% (Manual): {manual_ytd:.2f}%")
        print(f"YTD% (Calculated): {df[f'{col}_ytd'][date]:.2f}%")
        
        # Check for significant differences
        qtd_diff = abs(manual_qtd - df[f'{col}_qtd'][date])
        ytd_diff = abs(manual_ytd - df[f'{col}_ytd'][date])
        
        if qtd_diff > 0.01:
            print(f"WARNING: Large QTD difference ({qtd_diff:.4f}%)")
        if ytd_diff > 0.01:
            print(f"WARNING: Large YTD difference ({ytd_diff:.4f}%)")
            
    print("\n2. Quarter/Year Boundary Checks:")
    print("=" * 80)
    
    # Find dates near quarter/year boundaries
    quarter_starts = df.index[pd.to_datetime(df.index).is_quarter_start]
    year_starts = df.index[pd.to_datetime(df.index).is_year_start]
    
    if len(quarter_starts) > 0:
        print("\nFirst day of quarter example:")
        first_quarter_date = quarter_starts[len(quarter_starts)//2]  # Take a middle example
        print(f"Date: {first_quarter_date}")
        print(f"TSX QTD%: {df.loc[first_quarter_date, 'tsx_qtd']:.2f}%")
        
    if len(year_starts) > 0:
        print("\nFirst day of year example:")
        first_year_date = year_starts[len(year_starts)//2]  # Take a middle example
        print(f"Date: {first_year_date}")
        print(f"TSX YTD%: {df.loc[first_year_date, 'tsx_ytd']:.2f}%")

# Calculate all changes
print("Calculating changes...")
final_df = add_all_changes(final_df)

# Run comprehensive verification
print("\nRunning verification...")
verify_calculations(final_df)

# Show basic info about the final dataset
print("\nFinal Dataset Info:")
print("=" * 80)
final_df.info()

Calculating changes...

Running verification...
Running verification checks...

1. Random Date Samples:

Date: 2003-10-29
Previous Quarter End: 2003-09-30
Previous Year End: 2002-12-31


IndexError: single positional indexer is out-of-bounds

In [13]:
import vectorbt as vbt
from vectorbt.portfolio.enums import SizeType
import pandas as pd
import numpy as np
from deap import base, creator, tools, algorithms
import random
import warnings
warnings.filterwarnings('ignore')

# Clear any existing DEAP types
if 'FitnessMax' in creator.__dict__:
    del creator.FitnessMax
if 'Individual' in creator.__dict__:
    del creator.Individual

# Define our indicators and their types
positive_indicators = ['cad_ig_er_index', 'us_hy_er_index', 'us_ig_er_index', 'tsx']
negative_indicators = ['cad_oas', 'us_hy_oas', 'us_ig_oas', 'vix']
intervals = ['3m', '1yr', 'qtd', 'ytd']

# Print data information
print("\nData Overview:")
print(f"final_df shape: {final_df.shape}")
print(f"final_df columns: {final_df.columns.tolist()}")
print(f"final_df index: {final_df.index[0]} to {final_df.index[-1]}")

# Vectorized signal generation
def generate_signals(indicators, intervals, final_df, condition_func):
    signals = {}
    for indicator in indicators:
        for interval in intervals:
            col_name = f'{indicator}_{interval}'
            if col_name not in final_df.columns:
                print(f"Warning: Column {col_name} not found in DataFrame")
                continue
                
            signal_name = f'{indicator}_{interval}_{condition_func.__name__}'
            signal_value = condition_func(final_df[col_name])
            signals[signal_name] = signal_value
            
            print(f"\nGenerating signal for {signal_name}:")
            print(f"Signal True values: {signal_value.sum()}")
            print(f"Signal False values: {(~signal_value).sum()}")
    return signals

# Generate signals using vectorized operations
print("\nGenerating positive signals...")
positive_signals = generate_signals(positive_indicators, intervals, final_df, lambda x: x > 0)
print("\nGenerating negative signals...")
negative_signals = generate_signals(negative_indicators, intervals, final_df, lambda x: x < 0)
signal_dict = {**positive_signals, **negative_signals}

# Convert to numpy arrays for faster operations
signal_names = list(signal_dict.keys())
signals = np.array([signal_dict[name].values for name in signal_names])

print("\nSignal Generation Summary:")
print(f"Number of signals generated: {len(signal_names)}")
print(f"Signal array shape: {signals.shape}")

# Dictionary to store all portfolios
all_portfolios = {}

def evaluate(individual):
    used_signals = set()
    operations = []
    combined_signal = None
    
    # Count active signals for debugging
    active_signals = sum(1 for i in range(len(signals)) if individual[i * 2])
    if active_signals < 2:
        return -np.inf,
    
    for i in range(len(signals)):
        if individual[i * 2]:  # If signal is used
            signal = signals[i]
            signal_name = signal_names[i]
            
            if signal_name in used_signals:
                continue
                
            used_signals.add(signal_name)
            
            if combined_signal is None:
                combined_signal = signal
            else:
                op_idx = i * 2 - 1
                if op_idx < len(individual):
                    operation = "OR" if individual[op_idx] else "AND"
                    operations.append(operation)
                    
                    if operation == "OR":
                        combined_signal = combined_signal | signal
                    else:
                        combined_signal = combined_signal & signal
    
    if combined_signal is None or len(used_signals) < 2:
        return -np.inf,
    
    try:
        # Convert combined signal to pandas Series with proper index
        combined_signal = pd.Series(combined_signal, index=final_df.index)
        
        # Create portfolio
        pf = vbt.Portfolio.from_signals(
            close=final_df['cad_ig_er_index'],
            entries=combined_signal,
            exits=~combined_signal,
            freq='1D',
            init_cash=100000,
            size=1.0,
            size_type=SizeType.Percent,
            accumulate=False
        )
        
        # Calculate total return
        total_return = pf.total_return()
        
        if not np.isfinite(total_return):
            return -np.inf,
            
        # Store successful portfolio
        strategy_key = "Combined:" + ":".join(used_signals)
        operations_str = ":".join(operations)
        key = f"{strategy_key}:{operations_str}"
        all_portfolios[key] = pf
        
        return float(total_return),
        
    except Exception as e:
        print(f"Error in evaluate function: {str(e)}")
        return -np.inf,

# Genetic Algorithm Setup
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

def create_diverse_individual():
    ind = [0] * n_bits
    n_signals_to_use = random.randint(2, 4)
    signal_positions = random.sample(range(n_signals), n_signals_to_use)
    
    for pos in signal_positions:
        ind[pos * 2] = 1
        if pos > 0:
            ind[pos * 2 - 1] = random.randint(0, 1)
    
    return creator.Individual(ind)

# Initialize genetic algorithm
toolbox = base.Toolbox()
n_signals = len(signals)
n_bits = 2 * n_signals - 1

toolbox.register("individual", create_diverse_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("evaluate", evaluate)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", tools.mutFlipBit, indpb=0.3)  # Increased mutation rate
toolbox.register("select", tools.selTournament, tournsize=3)

# GA Parameters
POPULATION_SIZE = 50
MAX_GENERATIONS = 30
HALL_OF_FAME_SIZE = 10

# Create hall of fame
hall_of_fame = tools.HallOfFame(HALL_OF_FAME_SIZE)

# Initialize population
population = toolbox.population(n=POPULATION_SIZE)

# Run genetic algorithm
print("\nStarting genetic algorithm optimization...")
best_fitness = -np.inf
best_individual = None

for gen in range(MAX_GENERATIONS):
    # Generate offspring
    offspring = algorithms.varAnd(population, toolbox, cxpb=0.7, mutpb=0.3)
    
    # Evaluate individuals
    fits = map(toolbox.evaluate, offspring)
    for fit, ind in zip(fits, offspring):
        ind.fitness.values = fit
        
        # Update best individual
        if fit[0] > best_fitness:
            best_fitness = fit[0]
            best_individual = ind.copy()
    
    # Select next generation
    population = toolbox.select(offspring, len(population))
    
    # Update hall of fame
    hall_of_fame.update(population)
    
    # Print progress
    gen_fits = [ind.fitness.values[0] for ind in population]
    gen_best = max(gen_fits)
    print(f"Generation {gen}: Best Fitness = {gen_best:.4f}, Overall Best = {best_fitness:.4f}")
    
    # Early stopping if we find a good solution
    if best_fitness > 0.5:  # 50% return
        print(f"Found good solution at generation {gen}")
        break

print("\nOptimization completed.")

# Process results
if all_portfolios:
    # Create results DataFrame with debugging
    all_results = []
    print("\nProcessing portfolios...")
    
    for name, pf in all_portfolios.items():
        try:
            # Get metrics with percentage formatting
            total_return = float(pf.total_return()) * 100  # Convert to percentage
            sharpe = float(pf.sharpe_ratio())
            max_dd = float(pf.max_drawdown()) * 100  # Convert to percentage
            
            result = {
                'Strategy': name,
                'Total_Return_%': total_return,
                'Sharpe_Ratio': sharpe,
                'Max_Drawdown_%': max_dd
            }
            
            print(f"\nProcessed {name}:")
            print(f"Total Return: {total_return:.2f}%")
            print(f"Sharpe Ratio: {sharpe:.2f}")
            print(f"Max Drawdown: {max_dd:.2f}%")
            
            all_results.append(result)
            
        except Exception as e:
            print(f"Error processing portfolio {name}: {str(e)}")
            continue

    if all_results:
        # Create and sort results DataFrame
        results_df = pd.DataFrame(all_results)
        results_df = results_df.sort_values('Total_Return_%', ascending=False)

        print("\nTop 10 Strategies Overall:")
        print(results_df.head(10))

        # Get best strategy
        best_strategy_name = results_df.iloc[0]['Strategy']
        best_portfolio = all_portfolios[best_strategy_name]

        # Create buy & hold portfolio for comparison
        bh_pf = vbt.Portfolio.from_holding(
            final_df['cad_ig_er_index'],
            freq='1D',
            init_cash=100000,
            size=1.0,
            size_type=SizeType.Percent
        )

        print("\nBest Strategy:", best_strategy_name)
        print("\nBest Strategy Performance:")
        print(f"Total Return: {best_portfolio.total_return():.2%}")
        print(f"Sharpe Ratio: {best_portfolio.sharpe_ratio():.2f}")
        print(f"Max Drawdown: {best_portfolio.max_drawdown():.2%}")

        print("\nBuy & Hold Performance:")
        print(f"Total Return: {bh_pf.total_return():.2%}")
        print(f"Sharpe Ratio: {bh_pf.sharpe_ratio():.2f}")
        print(f"Max Drawdown: {bh_pf.max_drawdown():.2%}")

        # Save best strategy signals for analysis
        if best_individual is not None:
            print("\nBest Strategy Signal Composition:")
            used_signals = []
            for i in range(len(signals)):
                if best_individual[i * 2]:
                    used_signals.append(signal_names[i])
            print("Signals used:", used_signals)
    else:
        print("No valid results were processed.")
else:
    print("No valid strategies were found.")

ModuleNotFoundError: No module named 'deap'