In [1]:
import pandas as pd
import numpy as np

import helper
import project_helper
import project_tests

# Market Data

In [2]:
df = pd.read_csv('./data/eod-quotemedia.csv')

percent_top_dollar = 0.2
high_volume_symbols = project_helper.large_dollar_volume_stocks(df, 'adj_close', 'adj_volume', percent_top_dollar)
df = df[df['ticker'].isin(high_volume_symbols)]

close = df.reset_index().pivot(index='date', columns='ticker', values='adj_close')
volume = df.reset_index().pivot(index='date', columns='ticker', values='adj_volume')
dividends = df.reset_index().pivot(index='date', columns='ticker', values='dividends')

In [3]:
project_helper.print_dataframe(close)

# Part 1: Smart Beta Portfolio

## Index Weights

In [4]:
def generate_dollar_volume_weights(close, volume):
    """
    Generate dollar volume weights.

    Parameters
    ----------
    close : DataFrame
        Close price for each ticker and date
    volume : str
        Volume for each ticker and date

    Returns
    -------
    dollar_volume_weights : DataFrame
        The dollar volume weights for each ticker and date
    """
    assert close.index.equals(volume.index)
    assert close.columns.equals(volume.columns)
    
    product = close * volume
    
    result = product.copy()
    
    for index, row in result.iterrows():
        result.at[index] = row/np.sum(row)

    return result

In [5]:
project_tests.test_generate_dollar_volume_weights(generate_dollar_volume_weights)

Tests Passed


In [6]:
index_weights = generate_dollar_volume_weights(close, volume)
project_helper.plot_weights(index_weights, 'Index Weights')

## Portfolio Weights

In [7]:
def calculate_dividend_weights(dividends):
    """
    Calculate dividend weights.

    Parameters
    ----------
    dividends : DataFrame
        Dividend for each stock and date

    Returns
    -------
    dividend_weights : DataFrame
        Weights for each stock and date
    """
    
    result = dividends.copy()
    
    dividend_sum = np.zeros(len(dividends.columns))
    
    for index, row in result.iterrows():
        
        dividend_sum += row
        
        result.at[index] = dividend_sum / np.sum(dividend_sum)

    return result

In [8]:
project_tests.test_calculate_dividend_weights(calculate_dividend_weights)

Tests Passed


In [9]:
etf_weights = calculate_dividend_weights(dividends)
project_helper.plot_weights(etf_weights, 'ETF Weights')

## Returns

In [10]:
def generate_returns(prices):
    """
    Generate returns for ticker and date.

    Parameters
    ----------
    prices : DataFrame
        Price for each ticker and date

    Returns
    -------
    returns : Dataframe
        The returns for each ticker and date
    """
    
    return (prices - prices.shift(1)) / prices.shift(1)

In [11]:
project_tests.test_generate_returns(generate_returns)

Tests Passed


In [12]:
returns = generate_returns(close)
project_helper.plot_returns(returns, 'Close Returns')

## Weighted Returns

In [13]:
def generate_weighted_returns(returns, weights):
    """
    Generate weighted returns.

    Parameters
    ----------
    returns : DataFrame
        Returns for each ticker and date
    weights : DataFrame
        Weights for each ticker and date

    Returns
    -------
    weighted_returns : DataFrame
        Weighted returns for each ticker and date
    """
    assert returns.index.equals(weights.index)
    assert returns.columns.equals(weights.columns)

    return returns * weights

In [14]:
project_tests.test_generate_weighted_returns(generate_weighted_returns)

Tests Passed


In [15]:
index_weighted_returns = generate_weighted_returns(returns, index_weights)
etf_weighted_returns = generate_weighted_returns(returns, etf_weights)
project_helper.plot_returns(index_weighted_returns, 'Index Returns')
project_helper.plot_returns(etf_weighted_returns, 'ETF Returns')

## Cumulative Returns

In [16]:
def calculate_cumulative_returns(returns):
    """
    Calculate cumulative returns.

    Parameters
    ----------
    returns : DataFrame
        Returns for each ticker and date

    Returns
    -------
    cumulative_returns : Pandas Series
        Cumulative returns for each date
    """
    
    results = (1 + returns.sum(axis=1, skipna=True)).cumprod()
    
    results[0] = np.nan
   
    return results

In [17]:
project_tests.test_calculate_cumulative_returns(calculate_cumulative_returns)

Tests Passed


In [18]:
index_weighted_cumulative_returns = calculate_cumulative_returns(index_weighted_returns)
etf_weighted_cumulative_returns = calculate_cumulative_returns(etf_weighted_returns)
project_helper.plot_benchmark_returns(index_weighted_cumulative_returns, etf_weighted_cumulative_returns, 'Smart Beta ETF vs Index')

## Tracking Error

In [19]:
def tracking_error(benchmark_returns_by_date, etf_returns_by_date):
    """
    Calculate the tracking error.

    Parameters
    ----------
    benchmark_returns_by_date : Pandas Series
        The benchmark returns for each date
    etf_returns_by_date : Pandas Series
        The ETF returns for each date

    Returns
    -------
    tracking_error : float
        The tracking error
    """
    assert benchmark_returns_by_date.index.equals(etf_returns_by_date.index)
    
    return np.sqrt(252) * np.std(etf_returns_by_date - benchmark_returns_by_date, ddof=1)

In [20]:
project_tests.test_tracking_error(tracking_error)

Tests Passed


In [21]:
smart_beta_tracking_error = tracking_error(np.sum(index_weighted_returns, 1), np.sum(etf_weighted_returns, 1))
print('Smart Beta Tracking Error: {}'.format(smart_beta_tracking_error))

Smart Beta Tracking Error: 0.15765422631922218


# Part 2: Portfolio Optimization

## Covariance

In [22]:
def get_covariance_returns(returns):
    """
    Calculate covariance matrices.

    Parameters
    ----------
    returns : DataFrame
        Returns for each ticker and date

    Returns
    -------
    returns_covariance  : 2 dimensional Ndarray
        The covariance of the returns
    """
    
    return np.cov(returns.fillna(0).T)

In [23]:
project_tests.test_get_covariance_returns(get_covariance_returns)

Tests Passed


In [24]:
covariance_returns = get_covariance_returns(returns)
covariance_returns = pd.DataFrame(covariance_returns, returns.columns, returns.columns)

covariance_returns_correlation = np.linalg.inv(np.diag(np.sqrt(np.diag(covariance_returns))))
covariance_returns_correlation = pd.DataFrame(
    covariance_returns_correlation.dot(covariance_returns).dot(covariance_returns_correlation),
    covariance_returns.index,
    covariance_returns.columns)

project_helper.plot_covariance_returns_correlation(
    covariance_returns_correlation,
    'Covariance Returns Correlation Matrix')

## optimization

In [25]:
import cvxpy as cvx

In [26]:
def get_optimal_weights(covariance_returns, index_weights, scale=2.0):
    """
    Find the optimal weights.

    Parameters
    ----------
    covariance_returns : 2 dimensional Ndarray
        The covariance of the returns
    index_weights : Pandas Series
        Index weights for all tickers at a period in time
    scale : int
        The penalty factor for weights the deviate from the index 
    Returns
    -------
    x : 1 dimensional Ndarray
        The solution for x
    """
    assert len(covariance_returns.shape) == 2
    assert len(index_weights.shape) == 1
    assert covariance_returns.shape[0] == covariance_returns.shape[1]  == index_weights.shape[0]
    
    n = covariance_returns.shape[0]
    x = cvx.Variable(n)
    
    portfolio_variance = cvx.quad_form(x, covariance_returns)
    distance_to_index = cvx.norm(x - index_weights)
    
    objective = cvx.Minimize(portfolio_variance + scale * distance_to_index)
    constraints = [x >= 0, sum(x) == 1]

    cvx.Problem(objective, constraints).solve()
    x_values = x.value
    return x_values

In [27]:
project_tests.test_get_optimal_weights(get_optimal_weights)

Tests Passed


## Optimized Portfolio

In [28]:
raw_optimal_single_rebalance_etf_weights = get_optimal_weights(covariance_returns.values, index_weights.iloc[-1])
optimal_single_rebalance_etf_weights = pd.DataFrame(
    np.tile(raw_optimal_single_rebalance_etf_weights, (len(returns.index), 1)),
    returns.index,
    returns.columns)

In [29]:
optim_etf_returns = generate_weighted_returns(returns, optimal_single_rebalance_etf_weights)
optim_etf_cumulative_returns = calculate_cumulative_returns(optim_etf_returns)
project_helper.plot_benchmark_returns(index_weighted_cumulative_returns, optim_etf_cumulative_returns, 'Optimized ETF vs Index')

optim_etf_tracking_error = tracking_error(np.sum(index_weighted_returns, 1), np.sum(optim_etf_returns, 1))
print('Optimized ETF Tracking Error: {}'.format(optim_etf_tracking_error))

Optimized ETF Tracking Error: 0.08866738037078957


## Rebalance Portfolio Over Time

In [30]:
def rebalance_portfolio(returns, index_weights, shift_size, chunk_size):
    """
    Get weights for each rebalancing of the portfolio.

    Parameters
    ----------
    returns : DataFrame
        Returns for each ticker and date
    index_weights : DataFrame
        Index weight for each ticker and date
    shift_size : int
        The number of days between each rebalance
    chunk_size : int
        The number of days to look in the past for rebalancing

    Returns
    -------
    all_rebalance_weights  : list of Ndarrays
        The ETF weights for each point they are rebalanced
    """
    assert returns.index.equals(index_weights.index)
    assert returns.columns.equals(index_weights.columns)
    assert shift_size > 0
    assert chunk_size >= 0
    
   # weights at each rebalance event
    all_rebalance_weights = []
    
    for i in range(chunk_size, len(returns), shift_size):
        # chunk of returns
        chunk_returns = returns[i - chunk_size : i]
        
        # calculate the covariance matrix
        cov = get_covariance_returns(chunk_returns)
        
        # index weights from the last day
        last_index_weights = index_weights.iloc[i-1]
        
        # calculate the optimal weights
        optimal_weights = get_optimal_weights(cov, last_index_weights)
        
        # add the optimal weights to the list of all rebalanceing weights
        all_rebalance_weights.append(optimal_weights)
        
    return all_rebalance_weights

In [31]:
project_tests.test_rebalance_portfolio(rebalance_portfolio)

Tests Passed


In [32]:
chunk_size = 250
shift_size = 5
all_rebalance_weights = rebalance_portfolio(returns, index_weights, shift_size, chunk_size)

## Portfolio Turnover

In [33]:
def get_portfolio_turnover(all_rebalance_weights, shift_size, rebalance_count, n_trading_days_in_year=252):
    """
    Calculage portfolio turnover.

    Parameters
    ----------
    all_rebalance_weights : list of Ndarrays
        The ETF weights for each point they are rebalanced
    shift_size : int
        The number of days between each rebalance
    rebalance_count : int
        Number of times the portfolio was rebalanced
    n_trading_days_in_year: int
        Number of trading days in a year

    Returns
    -------
    portfolio_turnover  : float
        The portfolio turnover
    """
    assert shift_size > 0
    assert rebalance_count > 0
    
    # sum of the changes in the weights
    sum_total_turnover = 0
    
    for i in range(0, len(all_rebalance_weights)-1):
        turnover = np.abs(all_rebalance_weights[i+1] - all_rebalance_weights[i])
        sum_total_turnover += sum(turnover)
        
    # number of rebalance events per year
    rebalance_events_per_year = n_trading_days_in_year / shift_size
    
    # annualized turnover
    annualized_turnover = sum_total_turnover * rebalance_events_per_year / rebalance_count
    
    return annualized_turnover

In [34]:
project_tests.test_get_portfolio_turnover(get_portfolio_turnover)

Tests Passed


In [35]:
print(get_portfolio_turnover(all_rebalance_weights, shift_size, len(all_rebalance_weights) - 1))

15.155411754607377
