# Deployment: a trading strategy
> **Warning!** Please run `01_cleaning.ipynb` and `03_prediction.ipynb` first if you haven't already

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from functions.constants import BM_NAME, STARTDATE, ENDDATE, N_THRESHOLD_BPS, DATA_DIR, EVAL_START_DATE, TEST_START_DATE  # noqa: F401

from functions.helper_fns import remove_BBG_suffixes, melt_data

In [None]:
chosen_period = "1w"
optimum_model_name = "Stacking"
eval_dataset_mode = False

predictions_path = f"{DATA_DIR}/{BM_NAME}_eval_predictions_{optimum_model_name}.csv" if eval_dataset_mode else f"{DATA_DIR}/{BM_NAME}_{chosen_period}_outperformance_predictions_{optimum_model_name}.csv"
predictions = pd.read_csv(predictions_path,parse_dates=["Date"])
predictions

In [None]:
if eval_dataset_mode:
    predictions_test = predictions
else:
    predictions_train_and_eval = predictions[predictions.Date < TEST_START_DATE] #not spectacularly useful but good for debugging
    predictions_test = predictions[predictions.Date >= TEST_START_DATE]
    predictions_test
(predictions_test.head())

### Load prices by ticker and by BM

In [None]:
# Load data
bm_index_prices_df_path = DATA_DIR + BM_NAME + "_BM_prices.csv"
bm_holdings_prices_df_path = DATA_DIR + BM_NAME + "_constituents_prices.csv"

def load_bm_index_df(path):
    df = pd.read_csv(path, skiprows=3)
    df = df.rename(columns={df.columns[0]: "Date"})
    df = df[2:]
    #ensure all cols except Date are numeric. Coerce nan if not
    for col in df.columns[1:]:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df["Date"] = pd.to_datetime(df["Date"])
    return df

bm_index_prices_raw = load_bm_index_df(bm_index_prices_df_path)
bm_holdings_prices_raw = load_bm_index_df(bm_holdings_prices_df_path)

In [None]:
def preprocess_and_clean_data_for_trading(df):
    df = remove_BBG_suffixes(df)
    df = melt_data(df)
    return df

bm_index_prices = preprocess_and_clean_data_for_trading(bm_index_prices_raw)
bm_holdings_prices = preprocess_and_clean_data_for_trading(bm_holdings_prices_raw)

In [None]:
# pricing_date = predictions_test.Date.iloc[0]
portfolio_df_array = []
unique_dates = predictions_test.Date.unique()
for pricing_date in unique_dates:
    this_date_predictions = predictions_test[predictions_test.Date == pricing_date]
    outperform_prob_column = f"outperform_{chosen_period}_probability"
    # print("This date predictions df") 
    # print(this_date_predictions.head())
    this_date_prices = bm_holdings_prices[bm_holdings_prices.Date == pricing_date]
    #if this_date_prices is empty, use proxy_pricing_date = pricing_date - 1 biz day. Look back 5 days until we find a non-empty df
    if not len(this_date_prices):
        proxy_pricing_date = pricing_date
        lookback_days = 0
        lookback_window = 5
        while this_date_prices.empty:
            proxy_pricing_date = proxy_pricing_date - pd.Timedelta(days=1)
            this_date_prices = bm_holdings_prices[bm_holdings_prices.Date == proxy_pricing_date]
            lookback_days += 1
            if lookback_days > lookback_window:
                raise ValueError(f"Could not find a non-empty prices df in the last {lookback_window} days for {pricing_date}")
    this_date_prices = this_date_prices.drop(columns=["Date"])
    this_date_predictions = this_date_predictions.merge(this_date_prices, on="Ticker", how="left")
    #if prices are null for a ticker set the outperform probability to 0 and also outperform_1w_predicted etc to 0
    this_date_predictions[outperform_prob_column] = this_date_predictions[outperform_prob_column].where(this_date_predictions.Price.notnull(), 0)
    this_date_predictions[f"outperform_{chosen_period}_predicted"] = this_date_predictions[f"outperform_{chosen_period}_predicted"].where(this_date_predictions.Price.notnull(), 0)
    probability_cutoff = 0.50
    num_outperformers = this_date_predictions[outperform_prob_column].gt(probability_cutoff).sum()
    print(f"Pricing Date: {pricing_date}, outperformer count: {num_outperformers}")
    num_non_null_prices = this_date_predictions.Price.notnull().sum()
    if num_outperformers == 0:
        print(f"No outperformers forecasted on {pricing_date}")
    wt_per_outperformer = 1 / num_outperformers if num_outperformers > 0 else 1 / num_non_null_prices
    if num_outperformers > 0:
        this_date_predictions["decision_port_weight"] = this_date_predictions[outperform_prob_column].apply(lambda x: wt_per_outperformer if x > probability_cutoff else 0)
    else:
        #assign wt_per_outperformer to all with non-null prices
        this_date_predictions["decision_port_weight"] = this_date_predictions.Price.apply(lambda x: wt_per_outperformer if pd.notnull(x) else 0)
        # this_date_predictions["decision_port_weight"] = this_date_predictions[outperform_prob_column].apply(lambda x: wt_per_outperformer if x > probability_cutoff else 0)
    this_date_portfolio = this_date_predictions[["Date","Ticker","decision_port_weight","Price"]]
    # this_date_portfolio
    portfolio_df_array.append(this_date_portfolio)

In [None]:
proposed_buys_df = pd.concat(portfolio_df_array)
proposed_buys_df

In [None]:
#extend portfolio_df with 'last_date_port_weight' and 'last_date_price' columns
portfolio_df = pd.concat(portfolio_df_array)
trades_df_array = []

seed_capital = 1e6
#extend with last week's held weights and prices
# portfolio_df["last_date_port_weight"] = portfolio_df.groupby("Ticker")["decision_port_weight"].shift(1)
# portfolio_df["last_date_price"] = portfolio_df.groupby("Ticker")["Price"].shift(1)
# portfolio_df

#iterate by date
# for pricing_date in unique_dates:
pricing_date = unique_dates[0]
for pricing_date in unique_dates:
    this_period_portfolio = portfolio_df[portfolio_df.Date == pricing_date]
    #check if this is the first date (i.e. last_date_port_weight is null)
    is_first_date = pricing_date == min(unique_dates)
    if is_first_date:
        this_period_portfolio['shares_buying'] = seed_capital * this_period_portfolio.decision_port_weight / this_period_portfolio.Price
        this_period_portfolio['shares_selling'] = 0
        this_period_portfolio['cash_flow'] = -seed_capital * this_period_portfolio.decision_port_weight
    else:
        #get last period from unique_dates
        last_period = unique_dates[unique_dates < pricing_date].max()
        last_period_portfolio = trades_df_array[-1]
        this_period_portfolio['shares_selling'] = last_period_portfolio['shares_buying']
        revenue_from_selling = (last_period_portfolio['shares_buying'] * this_period_portfolio['Price']).sum()
        print(f"Revenue from selling as of {pricing_date}: ${revenue_from_selling.sum()}")
        this_period_portfolio['shares_buying'] = revenue_from_selling * this_period_portfolio.decision_port_weight / this_period_portfolio.Price
        this_period_portfolio['cash_flow'] = 0
    this_period_portfolio['value_at_close'] = this_period_portfolio['shares_buying'] * this_period_portfolio['Price']
    trades_df_array.append(this_period_portfolio)

In [None]:
trades_df = pd.concat(trades_df_array)
portfolio_value = trades_df.groupby("Date")["value_at_close"].sum()

In [None]:
#match dates to portfolio_value min and max dates
min_date = portfolio_value.index.min()
max_date = portfolio_value.index.max()
bm_index_prices_test_period = bm_index_prices[(bm_index_prices.Date >= min_date) & (bm_index_prices.Date <= max_date)]
# bm_index_prices_test_period = bm_index_prices[bm_index_prices.Date >= TEST_START_DATE]
bm_index_prices_test_period.sort_values("Date", inplace=True)
#rescale to start at 1 million
bm_index_prices_test_period["Price"] = bm_index_prices_test_period["Price"] / bm_index_prices_test_period["Price"].iloc[0] * seed_capital
num_trading_days = len(bm_index_prices_test_period)
# Calculate the annualized return
annualized_port_return = (portfolio_value.iloc[-1] / portfolio_value.iloc[0]) ** (252/num_trading_days) - 1
annualized_bm_return = (bm_index_prices_test_period["Price"].iloc[-1] / bm_index_prices_test_period["Price"].iloc[0]) ** (252/num_trading_days) - 1
annualized_active_return = annualized_port_return - annualized_bm_return
print(f"Annualized Portfolio Return: {annualized_port_return:.2%}")
print(f"Annualized Benchmark Return: {annualized_bm_return:.2%}")
print(f"Annualized Active Return: {annualized_active_return:.2%}")
portfolio_value

In [None]:
#plot both make big plot
plt.figure(figsize=(12,6))
plt.plot(portfolio_value, label="Portfolio", color="blue")
plt.plot(bm_index_prices_test_period.Date, bm_index_prices_test_period.Price, label="Benchmark Index", color="red")
plt.xlabel("Date")
plt.ylabel("Portfolio Value")
plot_title = "Portfolio Value vs Benchmark Index over Time (USD)"
if eval_dataset_mode:
    plot_title = "Evaluation Period: " + plot_title
    plot_file_path = f"{DATA_DIR}/{BM_NAME}_plot_eval_{optimum_model_name}_port_vs_bm.png"
else:
    plot_title = "Test Period: " + plot_title
    plot_file_path = f"{DATA_DIR}/{BM_NAME}_plot_test_{optimum_model_name}_port_vs_bm.png"
plot_title += ", using " + optimum_model_name
plt.title(plot_title)
#print annualized port and bm and active returns, small font
plt.text(0.15, 0.75, f"Annualized Portfolio Return: {annualized_port_return:.2%}", fontsize=9, transform=plt.gcf().transFigure)
plt.text(0.15, 0.725, f"Annualized Benchmark Return: {annualized_bm_return:.2%}", fontsize=9, transform=plt.gcf().transFigure)
plt.text(0.15, 0.7, f"Annualized Active Return: {annualized_active_return:.2%}", fontsize=9, transform=plt.gcf().transFigure)
plt.legend(loc="upper left")
plt.savefig(plot_file_path)
plt.show()