# Building Portfolios from Synthetic Assets

In [None]:
################
# Import dependencies
################
import numpy as np
import matplotlib.pyplot as plt
#import correlations as corr
import distcorr as corr
#import data

In [None]:
##################
# Define global variables
##################
L = 20 #lookback window
rebalance = 5 #rebalancing frequency
# load in data
# Default data for testing
import 

tickers = ['AAPL', 'MSFT', 'GOOG']
data = yf.download(tickers, start="2020-01-01", end="2020-06-01")['Adj Close']
returns = data.pct_change().dropna()

## Lead-Lag Metrics

In [None]:
def ccf(x, y, lags=None):
    """
    Cross-correlation function
    assume x,y are already loaded in for lookback window length
    """
    if lags is None:
        lags = np.arange(-len(x) + 1, len(y))
    
    ccf_values = []
    for lag in lags:
        if lag < 0: #this refers to x leading y
            cross_corr = corr.distance_correlation(x[:lag], y[-lag:]) #can modify this to use a different correlation measure
            ccf_values.append(cross_corr)
        else: #this refers to y leading x
            cross_corr = corr.distance_correlation(x[lag:], y[:-lag])
            ccf_values.append(cross_corr)
    
    return np.array(ccf_values)

def ccf_auc(x, y, lag):
    """
    Area under the cross-correlation function
    """
    lags = list(range(1, lag + 1))
    ccf_values_xy = ccf(x, y, lags)
    ccf_values_yx = ccf(y, x, lags)
    I_xy, I_yx = 0 #corresponds to correlation of x leading y and y leading x
    for i in lags:
        I_xy += abs(ccf_values_xy[i - 1])
        I_yx += abs(ccf_values_yx[i - 1])
    max_I = max(I_xy, I_yx)
    auc = np.sign(I_xy - I_yx) * (max_I / (I_xy + I_yx))
    return auc

def compute_lead_lag_matrix(assets, lag):
    """
    Compute the skew-sym lead-lag matrix for a set of assets
    """
    n = len(assets)
    lead_lag_matrix = np.zeros((n, n))
    
    for i in range(n):
        for j in range(n):
            if i != j:
                lead_lag_matrix[i, j] = ccf_auc(assets[i], assets[j], lag)
    
    return lead_lag_matrix



# GlobalRank

In [None]:
## functions needed
def find_global_rank(lead_lag_matrix):
    """
    Find the global rank of each asset based on the column averages of the lead-lag matrix
    """
    n = lead_lag_matrix.shape[0]
    global_rank = np.zeros(n)
    for j in range(n):
        global_rank[j] = np.mean(lead_lag_matrix[:, j])
    return global_rank

def sort_assets_by_rank(assets, global_rank):
    """
    Sort assets by their global rank
    """
    sorted_indices = np.argsort(global_rank)[::-1]  # Sort in descending order
    sorted_assets = [assets[i] for i in sorted_indices]
    return sorted_assets, sorted_indices  

def get_top_m_assets(assets, global_rank, m):
    """
    Get the top m assets based on global rank
    """
    sorted_assets, sorted_indices = sort_assets_by_rank(assets, global_rank)
    return sorted_assets[:m], sorted_indices[:m]

def get_bottom_n_assets(assets, global_rank, n):
    """
    Get the bottom n assets based on global rank
    """
    sorted_assets, sorted_indices = sort_assets_by_rank(assets, global_rank)
    return sorted_assets[-n:], sorted_indices[-n:]

def compute_returns_daily_rebalance(leaders, followers, r_mkt, t):
    """
    Computes returns of portfolio with an equal weight of leaders and followers, and daily rebalancing.
    """
    av_leader_returns, av_follower_returns = 0, 0
    #compute returns of leaders 
    for i in range(len(leaders)): #leaders is an mxT matrix
        av_leader_returns+=leaders[i][t-1] 
    av_leader_returns /= len(leaders)
    for i in range(len(followers)): #followers is an nxT matrix
        av_follower_returns+=followers[i][t]
    av_follower_returns /= len(followers)
    if av_leader_returns >=0:
        port_return = av_follower_returns - r_mkt[t]
    else:
        port_return = r_mkt[t] - av_follower_returns
    return port_return

## Running Code