In [14]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import bocd
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
import yfinance as yf

from machine_learning_finance import (analyze_trades, 
    calc_probabilties_without_lookahead, plot_backtest_analysis, 
    metrics_to_dataframe, create_train_test_windows, make_inverse_env_for,
    calculate_polynomial_regression)

import plotly.graph_objs as go
from plotly.offline import init_notebook_mode

init_notebook_mode(connected=True)
windows = [300, 600, 900, 1500]

symbol = "QQQ"
file = f"../backtests/{symbol}-model-back-test.csv"
period = 365
pd.options.display.max_rows = None
inverse = None # set to anything to graph inverse longs

def plot_win_loss(file, metrics):
    ledger = pd.read_csv(file)
    # Set the style for the plots
    sns.set(style='whitegrid')

    # Create a bar plot for profit and loss stats
    profit_stats, loss_stats = metrics['profit_stats'], metrics['loss_stats']
    profit_loss_df = pd.DataFrame([profit_stats, loss_stats], columns=['min', 'max', 'mean', 'median', 'std'], index=['profit', 'loss'])

    plt.figure(figsize=(12, 6))
    ax = sns.barplot(data=profit_loss_df.transpose(), palette='muted')
    ax.set_title('Profit and Loss Statistics')
    ax.set_ylabel('Value')

    # Show the plot
    plt.show()


def plot_polynomial(fig, df, name, chop=0):
    pred = calculate_polynomial_regression(df.head(len(df)-chop).copy())
    fig.add_trace(go.Scatter(x=df.index, y=pred, mode="lines", name=name))

def plot_change_points(fig, df, data, name, hazard, color):
    bc = bocd.BayesianOnlineChangePointDetection(bocd.ConstantHazard(hazard), bocd.StudentT(mu=0, kappa=1, alpha=1, beta=1))
    rt_mle = np.empty(data.shape)
    for i, d in enumerate(data):
        bc.update(d)
        rt_mle[i] = bc.rt 
    column = name
    rt_mle_padded = np.insert(rt_mle, 0, np.full(len(df)-len(rt_mle)+1, 0))
    df[column] = np.where(np.diff(rt_mle_padded)<0, df['Close'], np.nan)
    fig.add_trace(go.Scatter(x=df.index, y=df[column] , mode='markers', name=column,
                             marker=dict(symbol='diamond', size=8, color=color)))
    return df
    

def plot_regressions_between_change_points(fig, df, column_name):
    # Compute change points and prepend a dummy change point at the start
    change_points = [df.index[0]] + list(df[column_name].dropna().index)

    # Initialize polynomial features transformer and linear regression model
    poly = PolynomialFeatures(degree=2)
    lin_reg = LinearRegression()

    # Loop through pairs of change points
    for i in range(len(change_points) - 1):
        # Extract data between change points
        segment = df.loc[change_points[i]:change_points[i + 1]]

        y_pred, model, x, y = calculate_polynomial_regression(segment)

        ridge = model.named_steps['linearregression']
        deriv = np.polyder(ridge.coef_[::-1])
        yd_plot = np.polyval(deriv,x)
    

        
        # Add regression line to plot
        fig.add_trace(go.Scatter(x=segment.index, y=y_pred, mode='lines',
                                 name=f'Poly Regression {i + 1}'))

    return fig

    
def analyze_and_graph_range(file, symbol, start, end, df):
    ledger = pd.read_csv(file)
    hist_df, test_df = create_train_test_windows(df, start=start, end=end)    
    test_df = calc_probabilties_without_lookahead(test_df, hist_df, hist_cutoff="2023-05-15")
    plot_backtest_analysis(test_df, ledger, inverse=symbol)
    return ledger, test_df
   
def analyze_and_graph(file, symbol, period, df):
    ledger = pd.read_csv(file)
    hist_df, test_df = create_train_test_windows(df)  
    test_df = calc_probabilties_without_lookahead(test_df, hist_df)
    full_df = pd.concat([hist_df, test_df])
    
   

    # Online estimation and get the maximum likelihood r_t at each time point
    moving_avg_graphing = full_df["Close"].rolling(30).mean().tail(
            len(test_df))

   
    
    fig = plot_backtest_analysis(test_df, ledger, inverse=inverse)

    plot_change_points(fig, test_df, moving_avg_graphing, "cp moving avg", 30, "blue")
    plot_change_points(fig, test_df, test_df["Close"].values, "cp price", 90, "purple")
    plot_regressions_between_change_points(fig, test_df, "cp moving avg")
    
       
    test_df["moving_avg"] = moving_avg_graphing

    
    fig.add_trace(go.Scatter(x=test_df.index, y=moving_avg_graphing, mode="lines", name="Moving Average"))
        
    
    fig.update_layout(title='Backtest Analysis', xaxis_title='Date', height=800)
    
    
    fig.show()

    return ledger, test_df

ticker_obj = yf.download(tickers=symbol)
df = pd.DataFrame(ticker_obj)


ledger, df2 = analyze_and_graph(file, symbol, 365*4, df)    
#ledger, df2 = analyze_and_graph_range(file, symbol, "03-27-2020", "12-17-2021", df)

# Display metrics as text
metrics = analyze_trades(ledger, symbol, period)
metrics["file"] = file
metrics_df = metrics_to_dataframe(metrics) 
metrics_df
#df2[["moving_avg", "change_points", "rt", "Close"]]

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,test,duration_min,duration_max,duration_mean,duration_median,duration_std,total_return,buy_and_hold_performance,volatility,maximum_drawdown,...,profit_std,loss_min,loss_max,loss_mean,loss_median,loss_std,patience_min,patience_max,patience_mean,patience_median
0,../backtests/QQQ-model-back-test.csv,1,281,41.916667,11.0,78.808696,42.017003,4.681267,0.893602,0.105414,...,4.653745,-6.38317,-0.071407,-3.154137,-3.080986,3.012526,-15.402005,17.491131,-1.772741,-1.461655
