In [142]:
%matplotlib inline
import yahoo_finance
from yahoo_finance import Share
import numpy as np
import pandas
import matplotlib.pyplot as plt
import datetime
import cvxopt as opt
from cvxopt import blas, solvers
solvers.options['show_progress'] = False

getTimeSeries( ticker, start_date, end_date) 
--------------
#### What it does:
getTimeSeries() takes in a date range and a ticker and returns a timeseries of adjusted closing prices.
#### Inputs
* *ticker*: a string indiciating the security for which the time series will be generated.
* *start_date*: a string of the form 'YYYY-MM-DD' declaring the beginning of the historical window.
* *end_date*: a string of the form 'YYYY-MM-DD' declaring the end of the historical window

#### Returns
* *time_series*: a single column Pandas DataFrame containing the time series of adjusted close prices
    for the indicated ticker.


In [152]:
def getTimeSeries( ticker, start_date='2012-01-01', end_date='2012-02-01'):
    # yahoo_finance API to load list of dictionaries 
    obj = Share(ticker)
    ts = obj.get_historical(start_date,end_date)
    # yahoo_finance indexes most recent date first, reverse this
    ts = list(reversed(ts))
    # Convert date strings to python datetime objects for easier manipulation
    dates = [datetime.datetime.strptime(ts[i]['Date'],'%Y-%m-%d').date() for i in range(len(ts))]
    # Convert close price strings to floats for numerical manipulation
    prices = [float(ts[i]['Adj_Close']) for i in range(len(ts))]
    # Create DataFrame from the list produced - python will recognize as Series
    time_series = pandas.DataFrame( prices, index = dates, columns = [ticker])
    return time_series

getMultTimeSeries( tickers, start_date, end_date) 
--------------
#### What it does:
getMultTimeSeries() takes in a list of tickers and a specified date range and returns a Pandas DataFrame containing timeseries of adjusted closing prices. 
#### Inputs
* *tickers*: a list of strings indicating which tickers to include. Defaults to these 9 SPDR ETFs: 'XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLK','XLU'.
* *start_date*: a string of the form 'YYYY-MM-DD' declaring the beginning of the historical window.
* *end_date*: a string of the form 'YYYY-MM-DD' declaring the end of the historical window

#### Returns
* *time_series_dataframe*: a dataframe of adjusted closing price timeseries over the specified date range for the specified group of tickers

In [144]:
def getMultTimeSeries( tickers = ['XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLK','XLU'],
                     start_date = '2012-01-01', end_date = '2012-02-01'):
    # Initialize DataFrame
    time_series_dataframe = pandas.DataFrame()
    # Iterate over all tickers and append column to DataFrame
    for ticker in tickers:
        # Use helper function to get single column DataFrame
        df = getTimeSeries( ticker, start_date, end_date)
        # Concatanate on axis = 1
        time_series_dataframe = pandas.concat([time_series_dataframe,df],axis = 1)
    return time_series_dataframe    

markowitzReturns( returns)
-------------------------
#### What it does
markowitzReturns() takes in a Pandas DataFrame (or any container which can be converted to a numpy matrix) of returns and uses mean-variance portfolio theory to return an optimally weighted portfolio. It does so by minimizing $\omega^{T}\Sigma\omega -qR^{T}\omega$ (the Markowitz mean - variance framework) for portfolio weights $\omega$. Where $\Sigma$ is the covariance matrix of the securities, $R$ is the expected return matrix and $q$ is the mean return vector of all securities. The optimization is performed using the CVXOPT package employing the use of the solvers.qp() quadratic programming method. This method minimizes $(1/2)x^{T}Px + q^{T}x$ subject to $Gx \preceq h$ and $Ax = b$. It also utilizes CVXOPT's BLAS methods for performing linear algebra computations. Inspiration for this process was found in Dr. Thomas Starke, David Edwards and Dr. Thomas Wiecki's quantopian blog post located at: http://blog.quantopian.com/markowitz-portfolio-optimization-2/.

#### Inputs
* *returns*: a Pandas DataFrame(or other container which can be converted to a numpy matrix). NOTE: the dataframe produced by getMultTimeSeries must be transposed (returns.T) for meaningful results. 

#### Returns:
* *optimal_weights*: the weights of the optimal portfolio in array form.
* *returns*: the returns of all portfolios calculated across the effecient frontier.
* *risks*: list of risks of all portfolios calculated across the efficient frontier.

In [145]:
def markowitzReturns( returns):
    n = len(returns)
    returns = np.asmatrix(returns)
    mus = [10**(5.0 * t/50 - 1.0) for t in range(50)]
    # Convert to cvxopt matrices
    Sigma = opt.matrix(np.cov(returns))
    q = opt.matrix(np.mean(returns, axis=1))
    # Create constraint matrices
    G = -opt.matrix(np.eye(n))   # negative n x n identity matrix
    h = opt.matrix(0.0, (n ,1))  # -I*w < 0 i.e. no shorts
    A = opt.matrix(1.0, (1, n))  # A is all ones so A*w = w
    b = opt.matrix(1.0)          # Dot product sums to 1 
    # Calculate efficient frontier weights using quadratic programming
    ports = [solvers.qp(mu*Sigma, -q, G, h, A, b)['x'] for mu in mus]
    # Calculate risks and returns of frontier 
    returns = [blas.dot(q, x) for x in ports]
    risks = [np.sqrt(blas.dot(x, Sigma*x)) for x in ports]
    # Fit polynomial to frontier curve 
    m = np.polyfit(returns, risks, 2)
    x = np.sqrt(m[2]/m[0])
    # Calculate optimal portfolio weights
    optimal_weights = np.asarray(solvers.qp(opt.matrix(x * Sigma), -q, G, h, A, b)['x'])
    return optimal_weights, returns, risks

backtest()
----------

In [156]:
def backtest( tickers = ['XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLK','XLU'],
             start_date = '2012-01-01', end_date = '2012-02-01', lookback = 10,
             justify = False):
    timeseries = getMultTimeSeries( tickers, start_date, end_date)
    returns = timeseries.pct_change().dropna()
    for i in range(len(returns)):
        print(i)
        if ( i > lookback ):
            returns_window = returns[(i-lookback):i]
            weights, returns_window, risks = markowitzReturns(returns_window)
            print(weights)
    return    