# Factor Model of Portfolio Return


In [None]:
import sys
!{sys.executable} -m pip install -r requirements.txt

In [None]:
import cvxpy as cvx
import numpy as np
import pandas as pd
import time
import os
import quiz_helper
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)

### data bundle

In [None]:
import os
import quiz_helper
from zipline.data import bundles

In [None]:
os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), '..', '..','data','project_4_eod')
ingest_func = bundles.csvdir.csvdir_equities(['daily'], quiz_helper.EOD_BUNDLE_NAME)
bundles.register(quiz_helper.EOD_BUNDLE_NAME, ingest_func)
print('Data Registered')

### Build pipeline engine

In [None]:
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar

universe = AverageDollarVolume(window_length=120).top(500) 
trading_calendar = get_calendar('NYSE') 
bundle_data = bundles.load(quiz_helper.EOD_BUNDLE_NAME)
engine = quiz_helper.build_pipeline_engine(bundle_data, trading_calendar)

### View Data¶
With the pipeline engine built, let's get the stocks at the end of the period in the universe we're using. We'll use these tickers to generate the returns data for the our risk model.

In [None]:
universe_end_date = pd.Timestamp('2016-01-05', tz='UTC')

universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()
    
universe_tickers

In [None]:
len(universe_tickers)

In [None]:
from zipline.data.data_portal import DataPortal

data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_minute_reader=None,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)

## Get pricing data helper function

In [None]:
from quiz_helper import get_pricing

## get pricing data into a dataframe

In [None]:
returns_df = \
    get_pricing(
        data_portal,
        trading_calendar,
        universe_tickers,
        universe_end_date - pd.DateOffset(years=5),
        universe_end_date)\
    .pct_change()[1:].fillna(0) #convert prices into returns

returns_df

## Let's look at a two stock portfolio

Let's pretend we have a portfolio of two stocks.  We'll pick Apple and Microsoft in this example.

In [None]:
aapl_col = returns_df.columns[3]
msft_col = returns_df.columns[312]
asset_return_1 = returns_df[aapl_col].rename('asset_return_aapl')
asset_return_2 = returns_df[msft_col].rename('asset_return_msft')
asset_return_df = pd.concat([asset_return_1,asset_return_2],axis=1)
asset_return_df.head(2)

## Factor returns
Let's make up a "factor" by taking an average of all stocks in our list.  You can think of this as an equal weighted index of the 490 stocks, kind of like a measure of the "market".  We'll also make another factor by calculating the median of all the stocks.  These are mainly intended to help us generate some data to work with.  We'll go into how some common risk factors are generated later in the lessons.

Also note that we're setting axis=1 so that we calculate a value for each time period (row) instead of one value for each column (assets).

In [None]:
factor_return_1 = returns_df.mean(axis=1)
factor_return_2 = returns_df.median(axis=1)
factor_return_l = [factor_return_1, factor_return_2]

## Factor exposures

Factor exposures refer to how "exposed" a stock is to each factor.  We'll get into this more later.  For now, just think of this as one number for each stock, for each of the factors.

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
"""
For now, just assume that we're calculating a number for each 
stock, for each factor, which represents how "exposed" each stock is
to each factor. 
We'll discuss how factor exposure is calculated later in the lessons.
"""
def get_factor_exposures(factor_return_l, asset_return):
    lr = LinearRegression()
    X = np.array(factor_return_l).T
    y = np.array(asset_return.values)
    lr.fit(X,y)
    return lr.coef_

In [None]:
factor_exposure_l = []
for i in range(len(asset_return_df.columns)):
    factor_exposure_l.append(
        get_factor_exposures(factor_return_l,
                             asset_return_df[asset_return_df.columns[i]]
                            ))
    
factor_exposure_a = np.array(factor_exposure_l)

In [None]:
print(f"factor_exposures for asset 1 {factor_exposure_a[0]}")
print(f"factor_exposures for asset 2 {factor_exposure_a[1]}")

## Quiz 1 Portfolio's factor exposures

Let's make up some portfolio weights for now; in a later lesson, we'll look at how portfolio optimization combines alpha factors and a risk factor model to choose asset weights.

$\beta_{p,k} = \sum_{i=1}^{N}(x_i \times \beta_{i,k})$

In [None]:
weight_1 = 0.60 #let's give AAPL a portfolio weight
weight_2 = 0.40 #give MSFT a portfolio weight
weight_a = np.array([weight_1, weight_2])

For the sake of understanding, try saving each of the values
into a separate variable to perform the multipliations and additions
Check that your calculations for portfolio factor exposure match
the output of this dot product:
```
weight_a.dot(factor_exposure_a)
```

In [None]:
# TODO: calculate portfolio's exposure to factor 1
factor_exposure_1_1 = # ...
factor_exposure_2_1 = # ...
factor_exposure_p_1 = # ...
factor_exposure_p_1

In [None]:
# TODO: calculate portfolio's exposure to factor 2
factor_exposure_1_2 = # ...
factor_exposure_2_2 = # ...
factor_exposure_p_2 = # ...
factor_exposure_p_2

## Quiz 2 Calculate portfolio return

For clarity, try storing the pieces into their own 
named variables and writing out the multiplications and addition.

You can check if your answer matches this output:
```
asset_return_df.values.dot(weight_a)
```

In [None]:
# TODO calculate the portfolio return
asset_return_1 = # ...
asset_return_2 = # ...
portfolio_return = # ...

portfolio_return = pd.Series(portfolio_return,index=asset_return_df.index).rename('portfolio_return')
portfolio_return.head(2)

## Quiz 3 Contribution of Factors

The sum of the products of factor exposure times factor return is the contribution of the factors.  It's also called the "common return." calculate the common return of the portfolio, given the two factor exposures and the two factor returns.

In [None]:
# TODO: Calculate the contribution of the two factors to the return of this example asset
common_return = # ...
common_return = common_return.rename('common_return')

common_return.head(2)

## Quiz 4 Specific Return
The specific return is the part of the portfolio return that isn't explained by the factors.  So it's the actual return minus the common return.  
Calculate the specific return of the stock.

In [None]:
# TODO: calculate the specific return of this asset
specific_return = # ...
specific_return = specific_return.rename('specific_return')

## Visualize the common return and specific return


In [None]:
return_components = pd.concat([common_return,specific_return],axis=1)
return_components.head(2)

In [None]:
return_components.plot(title="asset return = common return + specific return");
pd.DataFrame(portfolio_return).plot(color='purple');

## Solution
[Solution notebook](factor_model_portfolio_return_solution.ipynb)