# 历史方差

下面我们看看如何不通过因子模型计算资产的协方差矩阵。

In [None]:
import sys
!{sys.executable} -m pip install --upgrade pip 
!{sys.executable} -m pip install --upgrade wheel setuptools build 
!{sys.executable} -m pip install -r requirements.txt

In [None]:
import numpy as np
import pandas as pd
import time
import os
import quiz_helper
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)

### 数据包

In [None]:
import os
import quiz_helper
from zipline.data import bundles

In [None]:
os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), '..', '..','data','module_4_quizzes_eod')
ingest_func = bundles.csvdir.csvdir_equities(['daily'], quiz_helper.EOD_BUNDLE_NAME)
bundles.register(quiz_helper.EOD_BUNDLE_NAME, ingest_func)
print('Data Registered')

### 构建管道引擎

In [None]:
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar

universe = AverageDollarVolume(window_length=120).top(500) 
trading_calendar = get_calendar('NYSE') 
bundle_data = bundles.load(quiz_helper.EOD_BUNDLE_NAME)
engine = quiz_helper.build_pipeline_engine(bundle_data, trading_calendar)

### 查看数据
构建管道引擎后，我们获取时段结束时股票池中的股票。我们将使用这些 ticker 生成风险模型的收益率数据。

In [None]:
universe_end_date = pd.Timestamp('2016-01-05', tz='UTC')

universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()
    
universe_tickers

In [None]:
len(universe_tickers)

In [None]:
from zipline.data.data_portal import DataPortal

data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_minute_reader=None,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)

## 获取股价数据的辅助函数

In [None]:
from quiz_helper import get_pricing

## 将股价数据放入 dataframe 中

In [None]:
returns_df = \
    get_pricing(
        data_portal,
        trading_calendar,
        universe_tickers,
        universe_end_date - pd.DateOffset(years=5),
        universe_end_date)\
    .pct_change()[1:].fillna(0) #convert prices into returns

returns_df

## 小测验 1

请参阅 [numpy.cov 文档](https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.cov.html)。然后思考下下面对 numpy.cov 的用法有何问题。

In [None]:
# What's wrong with this?
annualization_factor = 252
covariance_assets_not_correct = annualization_factor*np.cov(returns_df)

In [None]:
## TODO: Check the shape of the covariance matrix


## 在此输入你的答案 ：



## 小测验 2
如何调整输入，从而获得期望的资产协方差矩阵？

In [None]:
# TODO: calculate the covariance matrix of assets
annualization_factor = # ...
covariance_assets = # ...

In [None]:
covariance_assets.shape

## 在此输入你的答案：

## 可视化协方差矩阵

In [None]:
import seaborn as sns

In [None]:
# view a heatmap of the covariance matrix
sns.heatmap(covariance_assets,cmap='Paired');
## If the colors aren't distinctive, please try a couple of these color schemes:
## cmap = 'tab10'
# cmap = 'Accent'

## 小测验 3
观察色图后，你认为协方差更有可能为正还是为负？协方差有可能高于 0.10 或低于 0.10 吗？

## 在此输入你的答案：


## 趣味问题
你知道 [seaborn 可视化软件包](https://seaborn.pydata.org/index.html)是以什么命名的吗？

## 趣味问题答案 
请直接查看解答 notebook

## 解答
[这是解答 notebook](historical_variance_solution.ipynb)