In [None]:
#####################
#Bezier Density Network - constrained version - appled to valuation model
#####################

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math
plt.rcParams["figure.figsize"]=10,10
model_data_path ='./model_input.txt'

In [None]:
#### Define the Bezier functions
def bezier(n,t):
    l = t.shape[0]
    b = torch.zeros(l,n+1).to(device)
    for i in range(n+1):
        b[:,i] = (math.factorial(n)/(math.factorial(i)*math.factorial(n-i)))*(t**i)*(1-t)**(n-i)
    return b

def bezier_pdf(t,p):
    n = int(p.shape[1]/2)
    b = bezier(n-2,t)     
    x_delta = p[:,1:n] - p[:,:(n-1)]
    z_delta = p[:,(n+1):] - p[:,n:-1]
    denom=torch.sum(b*x_delta,1)
    prob = torch.sum(b*z_delta,1)/denom
    return prob

def bezier_cdf(t,p):
    n = int(p.shape[1]/2)
    z = p[:,n:]
    x = p[:,:n]
    b = bezier(n-1,t)
    prob = torch.sum(b*z,1)
    return prob

def bezier_inv_cdf(probs,p):
    #now we need to find the t values that correspond to these selections
    l = p.shape[0]
    threshold =0.000001
    max_loops=200
    t_min=torch.zeros(l).to(device).view(-1,1)
    t_max=torch.ones(l).to(device).view(-1,1)
    while((max_loops>0) and (torch.sum(t_max-t_min)>=threshold)):
        max_loops-=1
        t_mid = (t_min+t_max)/2
        prob_mid = bezier_cdf(t_mid.view(-1),p).view(-1,1)
        t_max = t_max*(probs>=prob_mid)+t_mid*(probs<prob_mid)
        t_min = t_min*(probs<prob_mid)+t_mid*(probs>=prob_mid)
    #now we have the t values convert these into x's and return    
    x=t_mid
    return x

def bezier_rnd(p):
    #takes a vector of bezier curves and returns random selections
    #first we get random selections in 0,1
    l = p.shape[0]
    probs = torch.FloatTensor(l).uniform_(0.0, 1.0).to(device)
    #now we need to find the t values that correspond to these selections
    threshold =0.000001
    max_loops=200
    t_min=torch.zeros(l).to(device)
    t_max=torch.ones(l).to(device)
    while((max_loops>0) and (torch.sum(t_max-t_min)>=threshold)):
        max_loops-=1
        t_mid = (t_min+t_max)/2
        prob_mid = bezier_cdf(t_mid,p)
        t_max = t_max*(probs>=prob_mid)+t_mid*(probs<prob_mid)
        t_min = t_min*(probs<prob_mid)+t_mid*(probs>=prob_mid)
    #now we have the t values convert these into x's and return    
    x=t_mid
    return x

def bezier_x(p,t):
    #given a t return corresponding x under bezier p
    n = int(p.shape[1]/2)
    b = bezier(n-1,t)
    return torch.sum(b*p[:,:n],1)

def bezier_t(p,x):
    l = x.shape[0]
    #given an x return correspoding t under bezier p
    #interpolate
    threshold =0.0001
    max_loops=50
    t_min=torch.zeros(l).to(device)
    t_max=torch.ones(l).to(device)
    while((max_loops>0) and (torch.sum(t_max-t_min)>=threshold)):
        max_loops-=1
        t_mid = (t_min+t_max)/2
        x_mid = bezier_x(p,t_mid)
        t_max = t_max*(x[:,0]>=x_mid)+t_mid*(x[:,0]<x_mid)
        t_min = t_min*(x[:,0]<x_mid)+t_mid*(x[:,0]>=x_mid)
    return t_mid


In [None]:
def random_choice_prob_index(a, axis=1):
    r = np.expand_dims(np.random.rand(a.shape[1-axis]), axis=axis)
    return (a.cumsum(axis=axis) > r).argmax(axis=axis)

def BDN_predict(x,percentile,denorm_fn):
    pnt_np = np.ones((x.shape[0],1))*percentile
    raw_result = bezier_inv_cdf(torch.from_numpy(pnt_np).type(torch.FloatTensor).to(device),torch.from_numpy(x).type(torch.FloatTensor).to(device))
    result = denorm_fn(raw_result).to(device).numpy()
    return result

class FinancialDataset(Dataset):
    def __init__(self,train_dates,eval_dates,df_in,cutoff_year):
        self.all_valuation_dates = ['2019-12-31','2018-12-31','2017-12-31','2016-12-31','2015-12-31','2014-12-31','2013-12-31','2012-12-31','2011-12-31','2010-12-31','2009-12-31','2008-12-31','2007-12-31']
        
        #process the dataframe
        df,symbols,marketcaps = self.__process_df__(train_dates,eval_dates,df_in,cutoff_year)
        
        #init with the df
        self.model_X, self.model_Y, self.marketcaps_future,self.symbols = self.prepare_data(df,symbols,marketcaps)        
                
        #normalise the data
        self.model_X,self.X_mean,self.X_min_max = self.normalise_X_data(self.model_X)
        self.model_Y,self.Y_min,self.Y_min_max = self.normalise_Y_data(self.model_Y)
        
        #create a random sorted index    
        self.rnd_idx = np.arange(self.symbols.shape[0])
        np.random.seed(42)
        np.random.shuffle(self.rnd_idx)
        
        #shuffle the data
        self.marketcaps_future = self.marketcaps_future[self.rnd_idx,:]
        self.symbols = self.symbols[self.rnd_idx]
        self.model_X = self.model_X[self.rnd_idx,:]
        self.model_Y = self.model_Y[self.rnd_idx]        

        
    def __process_df__(self,train_dates,eval_dates,df,cutoff_year):
        df_new = df.loc[df['Source_Year'] <cutoff_year].reset_index(drop=True)
        #drop all entries that lack valuation data during the periods of interest
        for dt in train_dates+eval_dates:
            df_new = df_new.loc[df_new[dt] >0].reset_index(drop=True)
        #store the eval date marketcaps seperatly
        marketcaps = df_new[eval_dates]
        #store the symbols seperatly
        symbols = df_new['Symbol']
        #remove all the columns we don't want to pass to the model for training
        drop_valuation_dates = list(set(self.all_valuation_dates)-set(train_dates))
        df_new = df_new.drop(columns=drop_valuation_dates+['Symbol'])
        #create and return the dataset
        return (df_new,symbols,marketcaps)
    
    def __get_norm_params__(self):
        return ((self.X_mean,self.X_min_max),(self.Y_min,self.Y_min_max))

    def normalise_X_data(self,model_data):
        log_model_data = torch.log(torch.abs(model_data)+torch.ones(model_data.shape) )*torch.sign(model_data)
        min_max_v = torch.max(torch.max(log_model_data,0)[0],0)[0] - torch.min(torch.min(log_model_data,0)[0],0)[0]
        mean_v = torch.mean(torch.mean(log_model_data,0),0)
        #ensure no divide by zero
        min_max_v = min_max_v+ (min_max_v==0)
        return ((log_model_data-mean_v)/min_max_v,mean_v,min_max_v)

    def normalise_Y_data(self,model_data):
        log_model_data = torch.log(model_data)
        min_v =torch.min(torch.min(log_model_data,0)[0],0)[0]
        min_max_v = torch.max(torch.max(log_model_data,0)[0],0)[0] - min_v
        #ensure no divide by zero
        min_max_v = min_max_v+ (min_max_v==0)
        return ((log_model_data - min_v )/min_max_v,min_v,min_max_v)

    def __denormalise__(self,Y_in):
        return (torch.exp(Y_in*self.Y_min_max+self.Y_min))
    
    def __len__(self):
        return len(self.model_X)
    
    def __getitem__(self,idx):
        return(self.model_X[idx],self.model_Y[idx],self.marketcaps_future[idx],self.symbols[idx])

    def prepare_data(self,df,symbols,marketcaps):
        last_symbol=''
        count=0
        list_X =[]
        list_Y =[]
        list_marketcaps_future =[]
        list_symbols = []
        for idx in range(len(df)):
            if(symbols[idx]!=last_symbol):
                #process a new symbol
                count=0
                X_seq = []
                Y = marketcaps.iloc[idx,0]
                mcf = marketcaps.iloc[idx,1:].tolist()
                sym = symbols[idx]
            #process data
            if (count>2):
                #already have 3 entries
                pass
            else:
                #process entry
                X_seq.append(df.iloc[idx,:].to_list())
            #increment the count        
            count+=1
            last_symbol = symbols[idx]
            #store the sequence
            if(count==3):
                list_X.append(X_seq)
                list_Y.append(Y)
                list_marketcaps_future.append(mcf)
                list_symbols.append(sym)
        #convert to Log
        return (torch.FloatTensor(list_X),torch.FloatTensor(list_Y),np.array(list_marketcaps_future),np.array(list_symbols))


In [None]:
############################
#load the data for the model
############################

df_data = pd.read_csv(model_data_path,sep='\t')
df_data.drop(columns='Unnamed: 0',inplace=True)

#one hot encodings
onehot_cols=[]
for col in df_data.columns:
    if(df_data[col].dtype==np.object and col !="Symbol"):
        onehot_cols.append(col)
df_data = pd.get_dummies(df_data,columns=onehot_cols,prefix=onehot_cols)

#Build the validation dataset
train_dates = []
eval_dates = ['2009-12-31','2009-12-31','2010-12-31','2011-12-31','2012-12-31']
cutoff_year = 2010
dataset_validation = FinancialDataset(train_dates,eval_dates,df_data,cutoff_year)

#Build the test dataset
train_dates = []
eval_dates = ['2016-12-31','2016-12-31','2017-12-31','2018-12-31','2019-12-31']
cutoff_year = 2020
dataset_test = FinancialDataset(train_dates,eval_dates,df_data,cutoff_year)

In [None]:
class ModelResults():
    def __init__(self):
        #a class to hold model results
        self.df = None
        self.Znames = []
        self.Ynames = []
        self.denorm_fn=None

    def append(self,Y,Yhat,Z,Symbols,version,type_data,denorm_fn):
        self.denorm_fn=denorm_fn
        #create a dataframe and append it to the main dataframe 
        df_temp = pd.DataFrame(Y.cpu().numpy(),columns=['Y'])
        Y_temp = Yhat.detach().cpu().numpy()
        
        self.Ynames = []
        for i in range(Y_temp.shape[1]):
            df_temp['Y_' + str(i)] = Y_temp[:,i]
            self.Ynames.append('Y_' + str(i))

        #create Z columns
        Z_temp=Z.cpu().numpy()
        
        self.Znames = []
        for i in range(Z_temp.shape[1]):
            df_temp['Z_' + str(i)] = Z_temp[:,i]
            self.Znames.append('Z_' + str(i))
        
        df_temp['Symbol'] = Symbols
        df_temp['Version']=version
        df_temp['Type']=type_data
        
        if (self.df is None):
            self.df = df_temp
        else:
            self.df = self.df.append(df_temp)
        return

    def append_train(self,Y,Yhat,Z,Symbols,version,denorm_fn):
        self.append(Y,Yhat,Z,Symbols,version,"train",denorm_fn)
        return

    def append_val(self,Y,Yhat,Z,Symbols,version,denorm_fn):
        self.append(Y,Yhat,Z,Symbols,version,"val",denorm_fn)
        return

    def append_test(self,Y,Yhat,Z,Symbols,version,denorm_fn):
        self.append(Y,Yhat,Z,Symbols,version,"test",denorm_fn)
        return

    def save_results(self,file_path):
        self.df.to_csv(file_path,index=False)
        
    def get_all_predictions(self):
        
        #compute the average predictions for each symbol and return 
        df_avg = self.df.groupby(['Symbol']).mean().reset_index()
        return (df_avg['Y'].to_numpy(),df_avg['Yhat'].to_numpy(),df_avg[self.Znames].to_numpy(),df_avg['Symbol'])
    
    def get_predictions_cap_band(self,cap_from,cap_to,percentile):
        #compute the average predictions for each symbol and return 
        df_avg = self.df.copy()
        #now compute the distibution values at the specified return period
        #get the numpy distributions
        #chunk the operation in max 1000 at a time
        chunk_size=1000
        df_avg['predictions']=0
        for i in range(int(len(df_avg)/chunk_size)+1):
            start_rng = i*chunk_size
            end_rng = min(start_rng+chunk_size,len(df_avg))
            
            df_avg['predictions'].iloc[start_rng:end_rng]=BDN_predict((df_avg[self.Ynames].to_numpy())[start_rng:end_rng,:],percentile,self.denorm_fn).reshape(-1)
        
        df_avg['Y'] =self.denorm_fn(torch.from_numpy(df_avg['Y'].to_numpy()).type(torch.FloatTensor)).numpy()
        #display(df_avg)        
        df_avg = df_avg.groupby(['Symbol']).mean().reset_index()        
        df_avg = df_avg.sort_values(by='Y',ascending=True).reset_index()
        total_entries = len(df_avg)
        idx_from = int(cap_from*total_entries)
        idx_to = int(cap_to*total_entries)
        df_avg = df_avg.iloc[idx_from:idx_to,:].reset_index()
        return (df_avg['Y'].to_numpy(),df_avg['predictions'].to_numpy(),df_avg[self.Znames].to_numpy(),df_avg['Symbol'])

In [None]:
#######################
#### Use a neural network to fit the function
#######################
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")
 
#######################
#### Bezier Density Network
#######################

def Bezier_Output_Layer(x):
    #we convert the final layer of the network into the Bezier outputs
    with torch.no_grad():
        #compute the number of control points
        n=int((x.shape[1]+2)/2)
        l = x.shape[0]
    p_x = x[:,:(n-1)] 
    #constrain the x's
    p_x =torch.exp(p_x)
    p_x = torch.cumsum(p_x,1)/torch.sum(p_x,1).view(-1,1)
    p_x0 = torch.zeros((l,1)).to(device)
    p_z0 = torch.zeros((l,1)).to(device)
    p_z = x[:,(n-1):(2*n-2)]    
    #constrain the z's
    p_z = torch.exp(p_z)
    p_z = torch.cumsum(p_z,1)/torch.sum(p_z,1).view(-1,1)
        
    #apply formulas to allow more z flexability
    p_x = torch.cat((p_x0,p_x),1)
    p_z = torch.cat((p_z0,p_z),1)    
    #we create an output of the form p_x p_z
    p = torch.cat((p_x,p_z),1)
    return p

def Bezier_loss(x,y):
    #we compute the MDN loss using a custom loss function
    with torch.no_grad():
        n=int(x.shape[1]/2)
        batch_len=x.shape[0]
    t=y.view(-1)
    #compute the finitie difference of the cdf
    prob = (-bezier_cdf(t-0.0001,x)+bezier_cdf(t+0.0001,x))/.0002
    #take the log, with an additonal term to avoid log(0)
    prob = torch.log((prob)*(prob>0)+(prob<=0))
    loss=-torch.sum(prob  )/x.shape[0]
    return loss

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.hidden_dim = 200
        self.n_layers = 3

        self.model = nn.ModuleDict({
            'LSTM': nn.LSTM(213,self.hidden_dim,num_layers = self.n_layers,batch_first = True,dropout=0.5),
            'Linear1': nn.Linear(200,50),
            'ReLU': nn.ReLU(),
            'Tanh': nn.Tanh(),
            'Sigmoid': nn.Sigmoid(),
            'Dropout': nn.Dropout(p=0.5),
            'Linear2': nn.Linear(50,50),
            'Linear3': nn.Linear(50,50),
            'Linear4': nn.Linear(50,20*2-2)
        })
        
    def init_hidden(self, batch_size):
        # This method generates the first hidden state of zeros which we'll use in the forward pass
        hidden = torch.zeros(self.n_layers, batch_size, self.hidden_dim).to(device)
         # We'll send the tensor holding the hidden state to the device we specified earlier as well
        return hidden
    
    def forward(self, x):
        #print(x.shape)
        batch_size = x.size(0)
        #print(batch_size)
        h0 = self.init_hidden(batch_size)
        c0 = self.init_hidden(batch_size)
        #print(hidden.shape)
        output, (h_n,c_n) = self.model['LSTM'](x,(h0,c0))
        x = output[:,-1,:]
        x = self.model['Dropout'](x)
        x = self.model['Linear1'](x)
        x = self.model['Tanh'](x)
        x = self.model['Linear2'](x)
        x = self.model['Tanh'](x)
        x = self.model['Linear3'](x)
        x = self.model['Tanh'](x)
        x = self.model['Linear4'](x)
        x = Bezier_Output_Layer(x)
        return x

    
def run_model(train_dataset,val_dataset,net_results,run_version):
    #train / val / test
    train_data_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)
    val_data_loader = DataLoader(dataset=val_dataset, batch_size=64, shuffle=True)    
    #run the model with this dataset
    net = Net().to(device)
    params = list(net.parameters())
    criterion = Bezier_loss
    optimizer = optim.Adam(net.parameters(), lr=0.001)
    #store the losses
    loss_v=[]
    loss_t=[]
    
    #moving average window
    window=40
    epoch=0
    while ( (epoch < window) or ( np.mean( np.array(loss_v)[-window:] ) < np.mean( np.array(loss_v)[-(window+1):-1] ) )):
    #for epoch in range(300):  # loop over the dataset multiple times (100 for test)
        net.train()
        t_loss=0
        t_count=0
        for batch_idx, (X, Y,_,_) in enumerate(train_data_loader):
            # forward + backward + optimize
            optimizer.zero_grad()
            outputs = net(X.to(device))
            loss = criterion(outputs, Y.to(device))
            t_loss+=loss.item()
            t_count+=1
            loss.backward()
            optimizer.step()
        loss_t.append(t_loss/t_count) 
        #evaluate the loss on validation set    
        v_loss=0
        v_count=0
        for batch_idx, (X_val, Y_val,_,_) in enumerate(val_data_loader):
            v_loss+= criterion(net(X_val.to(device)), Y_val.to(device)).item()
            v_count+=1
        loss_v.append(v_loss/v_count)
        #if epoch%100 ==0:    
        print(f"epoch = {epoch} train loss = {loss_t[-1]} validation loss = {loss_v[-1]}")
        epoch+=1
        
    print('Finished Training')
    #put model into eval mode
    net.eval()
    plt.plot(loss_t, "-b", label="train")
    plt.plot(loss_v , "-r", label="validation")
    plt.legend(loc="upper right")
    plt.xlabel("epochs")
    plt.ylabel("loss")
    plt.title("Train vs validation loss")
    plt.show()
    
    #generate validation results
    for batch_idx, (X_val, Y_val,Z,sym) in enumerate(val_data_loader):
        net_results.append_val(
            Y_val,
            net(X_val.to(device)),
            Z,
            sym,
            run_version,val_dataset.dataset.__denormalise__)

    #generate train results
    for batch_idx, (X_val, Y_val,Z,sym) in enumerate(train_data_loader):
        net_results.append_train(
            Y_val,
            net(X_val.to(device)),
            Z,
            sym,
            run_version,val_dataset.dataset.__denormalise__)
    #now compute the results for the train and va

In [None]:
#set a random seed on the split 
torch.manual_seed(41)

CV_Folds = 5
CV_data =(None,)*CV_Folds
split_sizes = [int(dataset_test.__len__()/CV_Folds)]*CV_Folds
split_sizes[-1] = dataset_test.__len__() - (CV_Folds-1)*split_sizes[0]
CV_data = torch.utils.data.random_split(dataset_test, split_sizes)
CV_data = list(CV_data)
net_results = ModelResults()

#random runs for each fold
Repeat = 1
cnt =0
for j in range(Repeat):
    for i in range(CV_Folds):
    
        #take first element as validation and rest to train
        print(f"running fold {i+1} on repeat {j+1}")
        train_dataset = torch.utils.data.ConcatDataset(CV_data[1:])
        val_dataset = CV_data[0]
        print(f"train dataset size ={train_dataset.__len__()} val dataset size = {val_dataset.__len__()}")
        run_model(train_dataset, val_dataset, net_results,cnt)
        cnt+=1
        #now change the order of the dataset
        CV_data.append(CV_data.pop(0))

#now switch to cpu
device = torch.device("cpu")

In [None]:
#Create P&L charts
def p_and_l_chart(return_dist,title):
    Ys = np.sort(return_dist)
    avg = np.mean(Ys)
    mdn = np.median(Ys)
    avg_log = np.log(1+avg*(avg>=0)) -np.log(1-avg*(avg<0))
    Ys_pos = Ys*(Ys>=0)
    Ys_neg = Ys*(Ys<0)
    Ys_pos = np.log(1+Ys_pos)
    Ys_neg = -np.log(1-Ys_neg)
    labels = np.arange(Ys.shape[0])
    
    Y_t = np.array([-10,-1,0,1,10,100,1000,10000,100000,1000000])
    Y_t_val = np.log(1+Y_t*(Y_t>=0)) -np.log(1-Y_t*(Y_t<0))
    Y_t_labels = (Y_t*100).tolist()
    Y_t_labels =[f"{s:,.0f}%" for s in Y_t_labels]
    plt.yticks(Y_t_val, Y_t_labels)
    plt.bar(labels,Ys_pos,width=1,color='green',label="Positive Return")
    plt.bar(labels,Ys_neg,width=1,color='red',label="Negative Return")
    plt.title(title)
    ax = plt.gca()
    ax.plot([0., labels[-1]], [avg_log, avg_log], "k--",label="Mean Return = " + f"{avg*100:,.0f}%")
    ax.plot([Ys.shape[0]/2, Ys.shape[0]/2], [-np.log(2), np.log(2)], "b--",label="Median Return = "+ f"{mdn*100:,.0f}%")
    plt.ylabel('Compound Return % after 3 Years')
    plt.xlabel('Companies - sorted from low to high return')
    ax.legend(loc='upper left', frameon=False)
    plt.show()

def training_v_actual_chart(Y,Y_hat,Ycheap,Y_hatcheap,title):
    correl = np.corrcoef(Y,Y_hat)[1,0]
    plt.scatter(Y, Y_hat, marker="o",color='k',s=1.5,label="all companies")
    plt.scatter(Ycheap, Y_hatcheap, marker="o",color='r',s=1.5,label="cheap companies")

    plt.legend(loc="upper left")
    plt.xlabel("log(Actual Market Cap)")
    plt.ylabel("log(Predicted Market Cap)")
    plt.loglog()
    axes = plt.gca()
    plt.title(title)
    plt.show()

def n_cheapest(Y,Yhat,n):
    #return an array with n values = True for the largest differences
    diff = Yhat/Y
    #negate the sign to sortt descending
    idxs = (-diff).argsort()
    res = np.zeros(Y.shape).astype(bool)
    
    for i in range(n):
        res[idxs[i]]=True#*(Yhat[idxs[i]]>Y[idxs[i]])
    return res


In [None]:
#Y, Y_hat, Z, Symbols = net_results.get_all_predictions()
percentile =0.5
bands = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
df_results = None
for b in bands:
    b_up=b+bands[1]
    Y, Y_hat, Z, Symbols = net_results.get_predictions_cap_band(b,b+0.1,percentile)
    y_ret=[]
    y_cap=[]
    for i in range(3):
        y_ret.append((Z[:,i+1]/Z[:,i]-np.ones(Z.shape[0])))
        y_cap.append(Z[:,i])

    #create a dataframe with the results
    df_temp = pd.DataFrame({'Results':f"All {b:,.1f}-{b_up:,.1f}", 
                               'Companies': Y.shape[0],
                                'Yr1 Mean Return %': [f"{100*np.mean(y_ret[0]):,.0f}%"], 
                                'Yr2 Mean Return %': [f"{100*np.mean(y_ret[1]):,.0f}%"],                                        
                                'Yr3 Mean Return %': [f"{100*np.mean(y_ret[2]):,.0f}%"], 
                              })
    if df_results is None:
        df_results = df_temp
    else:
        df_results = df_results.append(df_temp)
    all_ret_dist = Z[:,-1]/Y - 1
    p_and_l_chart(all_ret_dist,f"3 Year Results - all companies - market caps {b:,.1f}-{b_up:,.1f}")

    #generate a return chart for different margin levels
    cheapest = [40] #look at the 5% cheapest in each bracket of marketcap on test set and 25% on validation
    for cheap in cheapest:
        cheap_idx = n_cheapest(Y,Y_hat,cheap)
        y_ret=[]
        y_cap=[]
        for i in range(3):
            y_ret.append((Z[:,i+1]/Z[:,i]-np.ones(Z.shape[0]))[cheap_idx])
            y_cap.append(Z[:,i][cheap_idx])
        
        #generate a scatter chart of the predictions vs actual market caps
        training_v_actual_chart(Y,Y_hat,Y[cheap_idx],Y_hat[cheap_idx],f"Actual vs Predicted Market Caps in {b:,.1f}-{b_up:,.1f}")
      
        long_ret_dist = all_ret_dist[cheap_idx]
        p_and_l_chart(long_ret_dist,f"3 Year Results - cheapest {cheap:.0f} companies - market caps {b:,.1f}-{b_up:,.1f}")
        df_temp = pd.DataFrame({'Results':f"Model {b:,.1f}-{b_up:,.1f} Cheapest = {cheap:.0f}", 
                            'Companies': long_ret_dist.shape[0],
                            'Yr1 Mean Return %': [f"{100*np.mean(y_ret[0]):,.0f}%"], 
                            'Yr2 Mean Return %': [f"{100*np.mean(y_ret[1]):,.0f}%"],                                        
                            'Yr3 Mean Return %': [f"{100*np.mean(y_ret[2]):,.0f}%"], 
                           })
        df_results = df_results.append(df_temp)
net_results.save_results('test_results_bdn_model.csv')        
display(df_results)