# Feature Engineering and Labeling

We'll use the price-volume data and generate features that we can feed into a model.  We'll use this notebook for all the coding exercises of this lesson, so please open this notebook in a separate tab of your browser.  

Please run the following code up to and including "Make Factors."  Then continue on with the lesson.

In [1]:
# import sys
#!{sys.executable} -m pip install --quiet -r requirements.txt

In [1]:
import numpy as np
import pandas as pd
import time

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)

#### Registering data

In [3]:
import os
import project_helper
from zipline.data import bundles

BUNDLE_NAME = 'quandl'
# os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), 'data', 'module_4_quizzes_eod')

#ingest_func = bundles.csvdir.csvdir_equities(['daily'], project_helper.EOD_BUNDLE_NAME)
#ingest_func = bundles.csvdir.csvdir_equities(['daily'], BUNDLE_NAME)
#bundles.register(BUNDLE_NAME, ingest_func)

#print('Data Registered')

In [4]:
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar


universe = AverageDollarVolume(window_length=120).top(500) 
trading_calendar = get_calendar('NYSE') 
bundle_data = bundles.load(BUNDLE_NAME)
engine = project_helper.build_pipeline_engine(bundle_data, trading_calendar)

In [8]:
for asset in bundle_data.asset_finder.retrieve_all(bundle_data.asset_finder.equities_sids):
    print(asset)

Equity(0 [A])
Equity(1 [AA])
Equity(2 [AAL])
Equity(3 [AAMC])
Equity(4 [AAN])
Equity(5 [AAOI])
Equity(6 [AAON])
Equity(7 [AAP])
Equity(8 [AAPL])
Equity(9 [AAT])
Equity(10 [AAWW])
Equity(11 [ABAX])
Equity(12 [ABBV])
Equity(13 [ABC])
Equity(14 [ABCB])
Equity(15 [ABCO])
Equity(16 [ABFS])
Equity(17 [ABG])
Equity(18 [ABM])
Equity(19 [ABMD])
Equity(20 [ABT])
Equity(21 [ACAD])
Equity(22 [ACAS])
Equity(23 [ACAT])
Equity(24 [ACC])
Equity(25 [ACCL])
Equity(26 [ACCO])
Equity(27 [ACE])
Equity(28 [ACET])
Equity(29 [ACFN])
Equity(30 [ACGL])
Equity(31 [ACHC])
Equity(32 [ACHN])
Equity(33 [ACI])
Equity(34 [ACIW])
Equity(35 [ACLS])
Equity(36 [ACM])
Equity(37 [ACN])
Equity(38 [ACO])
Equity(39 [ACOR])
Equity(40 [ACRE])
Equity(41 [ACRX])
Equity(42 [ACTG])
Equity(43 [ACW])
Equity(44 [ACXM])
Equity(45 [ADBE])
Equity(46 [ADC])
Equity(47 [ADES])
Equity(48 [ADI])
Equity(49 [ADM])
Equity(50 [ADMS])
Equity(51 [ADNC])
Equity(52 [ADP])
Equity(53 [ADS])
Equity(54 [ADSK])
Equity(55 [ADT])
Equity(56 [ADTN])
Equity(57 

Equity(1811 [MGAM])
Equity(1812 [MGEE])
Equity(1813 [MGI])
Equity(1814 [MGLN])
Equity(1815 [MGM])
Equity(1816 [MGNX])
Equity(1817 [MGRC])
Equity(1818 [MHFI])
Equity(1819 [MHGC])
Equity(1820 [MHK])
Equity(1821 [MHLD])
Equity(1822 [MHO])
Equity(1823 [MHR])
Equity(1824 [MIDD])
Equity(1825 [MIG])
Equity(1826 [MIL])
Equity(1827 [MILL])
Equity(1828 [MIND])
Equity(1829 [MINI])
Equity(1830 [MITK])
Equity(1831 [MITT])
Equity(1832 [MJN])
Equity(1833 [MKC])
Equity(1834 [MKL])
Equity(1835 [MKSI])
Equity(1836 [MKTO])
Equity(1837 [MKTX])
Equity(1838 [MLAB])
Equity(1839 [MLHR])
Equity(1840 [MLI])
Equity(1841 [MLM])
Equity(1842 [MLNK])
Equity(1843 [MLR])
Equity(1844 [MM])
Equity(1845 [MMC])
Equity(1846 [MMI])
Equity(1847 [MMM])
Equity(1848 [MMS])
Equity(1849 [MMSI])
Equity(1850 [MN])
Equity(1851 [MNI])
Equity(1852 [MNK])
Equity(1853 [MNKD])
Equity(1854 [MNR])
Equity(1855 [MNRO])
Equity(1856 [MNST])
Equity(1857 [MNTA])
Equity(1858 [MNTX])
Equity(1859 [MO])
Equity(1860 [MOD])
Equity(1861 [MODN])
Equity(

In [9]:
bundle_data.asset_finder.

<zipline.assets.assets.AssetFinder at 0x7f1c2822a668>

In [12]:
universe_start_date = pd.Timestamp('2017-01-01', tz='UTC')
universe_end_date = pd.Timestamp('2017-12-31', tz='UTC')

universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_start_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()

ValueError: The first date of the lifetimes matrix does not match the start date of the pipeline. Did you forget to align the start_date to the trading calendar?

In [None]:
from zipline.data.data_portal import DataPortal

data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_minute_reader=None,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)

def get_pricing(data_portal, trading_calendar, assets, start_date, end_date, field='close'):
    end_dt = pd.Timestamp(end_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')
    start_dt = pd.Timestamp(start_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')

    end_loc = trading_calendar.closes.index.get_loc(end_dt)
    start_loc = trading_calendar.closes.index.get_loc(start_dt)

    return data_portal.get_history_window(
        assets=assets,
        end_dt=end_dt,
        bar_count=end_loc - start_loc,
        frequency='1d',
        field=field,
        data_frequency='daily')

# Make Factors

- We'll use the same factors we have been using in the lessons about alpha factor research.  Factors can be features that we feed into the model.


In [None]:
from zipline.pipeline.factors import CustomFactor, DailyReturns, Returns, SimpleMovingAverage
from zipline.pipeline.data import USEquityPricing

factor_start_date = universe_end_date - pd.DateOffset(years=3, days=2)
sector = project_helper.Sector()

def momentum_1yr(window_length, universe, sector):
    return Returns(window_length=window_length, mask=universe) \
        .demean(groupby=sector) \
        .rank() \
        .zscore()

def mean_reversion_5day_sector_neutral(window_length, universe, sector):
    return -Returns(window_length=window_length, mask=universe) \
        .demean(groupby=sector) \
        .rank() \
        .zscore()

def mean_reversion_5day_sector_neutral_smoothed(window_length, universe, sector):
    unsmoothed_factor = mean_reversion_5day_sector_neutral(window_length, universe, sector)
    return SimpleMovingAverage(inputs=[unsmoothed_factor], window_length=window_length) \
        .rank() \
        .zscore()

class CTO(Returns):
    """
    Computes the overnight return, per hypothesis from
    https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2554010
    """
    inputs = [USEquityPricing.open, USEquityPricing.close]
    
    def compute(self, today, assets, out, opens, closes):
        """
        The opens and closes matrix is 2 rows x N assets, with the most recent at the bottom.
        As such, opens[-1] is the most recent open, and closes[0] is the earlier close
        """
        out[:] = (opens[-1] - closes[0]) / closes[0]

        
class TrailingOvernightReturns(Returns):
    """
    Sum of trailing 1m O/N returns
    """
    window_safe = True
    
    def compute(self, today, asset_ids, out, cto):
        out[:] = np.nansum(cto, axis=0)

        
def overnight_sentiment(cto_window_length, trail_overnight_returns_window_length, universe):
    cto_out = CTO(mask=universe, window_length=cto_window_length)
    return TrailingOvernightReturns(inputs=[cto_out], window_length=trail_overnight_returns_window_length) \
        .rank() \
        .zscore()

def overnight_sentiment_smoothed(cto_window_length, trail_overnight_returns_window_length, universe):
    unsmoothed_factor = overnight_sentiment(cto_window_length, trail_overnight_returns_window_length, universe)
    return SimpleMovingAverage(inputs=[unsmoothed_factor], window_length=trail_overnight_returns_window_length) \
        .rank() \
        .zscore()

universe = AverageDollarVolume(window_length=120).top(500)
sector = project_helper.Sector()

pipeline = Pipeline(screen=universe)
pipeline.add(
    momentum_1yr(252, universe, sector),
    'Momentum_1YR')
pipeline.add(
    mean_reversion_5day_sector_neutral_smoothed(20, universe, sector),
    'Mean_Reversion_Sector_Neutral_Smoothed')
pipeline.add(
    overnight_sentiment_smoothed(2, 10, universe),
    'Overnight_Sentiment_Smoothed')

all_factors = engine.run_pipeline(pipeline, factor_start_date, universe_end_date)

all_factors.head()


#### Stop here and continue with the lesson section titled "Features".

# Universal Quant Features

* stock volatility: zipline has a custom factor called AnnualizedVolatility.  The [source code is here](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/factors/basic.py) and also pasted below:

```
class AnnualizedVolatility(CustomFactor):
    """
    Volatility. The degree of variation of a series over time as measured by
    the standard deviation of daily returns.
    https://en.wikipedia.org/wiki/Volatility_(finance)
    **Default Inputs:** :data:`zipline.pipeline.factors.Returns(window_length=2)`  # noqa
    Parameters
    ----------
    annualization_factor : float, optional
        The number of time units per year. Defaults is 252, the number of NYSE
        trading days in a normal year.
    """
    inputs = [Returns(window_length=2)]
    params = {'annualization_factor': 252.0}
    window_length = 252

    def compute(self, today, assets, out, returns, annualization_factor):
        out[:] = nanstd(returns, axis=0) * (annualization_factor ** .5)
```

In [None]:
from zipline.pipeline.factors import AnnualizedVolatility
AnnualizedVolatility()

#### Quiz
We can see that the returns `window_length` is 2, because we're dealing with daily returns, which are calculated as the percent change from one day to the following day (2 days).  The `AnnualizedVolatility` `window_length` is 252 by default, because it's the one-year volatility.  Try to adjust the call to the constructor of `AnnualizedVolatility` so that this represents one-month volatility (still annualized, but calculated over a time window of 20 trading days)

#### Answer

In [None]:
# TODO


#### Quiz: Create one-month and six-month annualized volatility.
Create `AnnualizedVolatility` objects for 20 day and 120 day (one month and six-month) time windows.  Remember to set the `mask` parameter to the `universe` object created earlier (this filters the stocks to match the list in the `universe`).  Convert these to ranks, and then convert the ranks to zscores.

In [None]:
# TODO
volatility_20d # ...
volatility_120d # ...

#### Add to the pipeline

In [None]:
pipeline.add(volatility_20d, 'volatility_20d')
pipeline.add(volatility_120d, 'volatility_120d')

#### Quiz: Average Dollar Volume feature
We've been using [AverageDollarVolume](http://www.zipline.io/appendix.html#zipline.pipeline.factors.AverageDollarVolume) to choose the stock universe based on stocks that have the highest dollar volume.  We can also use it as a feature that is input into a predictive model.  
Use 20 day and 120 day `window_length` for average dollar volume.  Then rank it and convert to a zscore.

In [None]:
"""already imported earlier, but shown here for reference"""
#from zipline.pipeline.factors import AverageDollarVolume 

# TODO: 20-day and 120 day average dollar volume
adv_20d = # ...
adv_120d = # ...

#### Add average dollar volume features to pipeline

In [None]:
pipeline.add(adv_20d, 'adv_20d')
pipeline.add(adv_120d, 'adv_120d')

### Market Regime Features
We are going to try to capture market-wide regimes:  Market-wide means we'll look at the aggregate movement of the universe of stocks.

High and low dispersion: dispersion is looking at the dispersion (standard deviation) of the cross section of all stocks at each period of time (on each day).  We'll inherit from [CustomFactor](http://www.zipline.io/appendix.html?highlight=customfactor#zipline.pipeline.CustomFactor).  We'll feed in [DailyReturns](http://www.zipline.io/appendix.html?highlight=dailyreturns#zipline.pipeline.factors.DailyReturns) as the `inputs`.  

#### Quiz
If the `inputs` to our market dispersion factor are the daily returns, and we plan to calculate the market dispersion on each day, what should be the `window_length` of the market dispersion class?

#### Answer


#### Quiz: market dispersion feature
Create a class that inherits from `CustomFactor`.  Override the `compute` function to calculate the population standard deviation of all the stocks over a specified window of time.

**mean returns**

$\mu = \sum_{t=0}^{T}\sum_{i=1}^{N}r_{i,t}$

**Market Dispersion**

$\sqrt{\frac{1}{T} \sum_{t=0}^{T}  \frac{1}{N}\sum_{i=1}^{N}(r_{i,t} - \mu)^2}$

Use [numpy.nanmean](https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.nanmean.html) to calculate the average market return $\mu$ and to calculate the average of the squared differences.


In [None]:
class MarketDispersion(CustomFactor):
    inputs = [DailyReturns()]
    window_length = # ...
    window_safe = True

    def compute(self, today, assets, out, returns):
        
        # TODO: calculate average returns
        mean_returns = # ...
        
        #TODO: calculate standard deviation of returns
        out[:] = # ...

#### Quiz

Create the MarketDispersion object.  Apply two separate smoothing operations using [SimpleMovingAverage](https://www.zipline.io/appendix.html?highlight=simplemovingaverage#zipline.pipeline.factors.SimpleMovingAverage).  One with a one-month window, and another with a 6-month window.  Add both to the pipeline.

In [None]:
# TODO: create MarketDispersion object
dispersion = # ...

# TODO: apply one-month simple moving average
dispersion_20d = # ...

# TODO: apply 6-month simple moving average
dispersion_120d = # ...

# Add to pipeline
pipeline.add(dispersion_20d, 'dispersion_20d')
pipeline.add(dispersion_120d, 'dispersion_120d')

#### Market volatility feature
* High and low volatility  
We'll also build a class for market volatility, which inherits from [CustomFactor](http://www.zipline.io/appendix.html?highlight=customfactor#zipline.pipeline.CustomFactor).  This will measure the standard deviation of the returns of the "market".  In this case, we're approximating the "market" as the equal weighted average return of all the stocks in the stock universe.

##### Market return
$r_{m,t} = \frac{1}{N}\sum_{i=1}^{N}r_{i,t}$ for each day $t$ in `window_length`.  

##### Average market return
Also calculate the average market return over the `window_length` $T$ of days:  
$\mu_{m} = \frac{1}{T}\sum_{t=1}^{T} r_{m,t}$

#### Standard deviation of market return
Then calculate the standard deviation of the market return  
$\sigma_{m,t} = \sqrt{252 \times \frac{1}{N} \sum_{t=1}^{T}(r_{m,t} - \mu_{m})^2 } $ 

##### Hints
* Please use [numpy.nanmean](https://docs.scipy.org/doc/numpy-1.15.0/reference/generated/numpy.nanmean.html) so that it ignores null values.
* When using `numpy.nanmean`:  
axis=0 will calculate one average for every column (think of it like creating a new row in a spreadsheet)  
axis=1 will calculate one average for every row (think of it like creating a new column in a spreadsheet)  
* The returns data in `compute` has one day in each row, and one stock in each column.
* Notice that we defined a dictionary `params` that has a key `annualization_factor`.  This `annualization_factor` can be used as a regular variable, and you'll be using it in the `compute` function.  This is also done in the definition of AnnualizedVolatility (as seen earlier in the notebook).

In [None]:
class MarketVolatility(CustomFactor):
    inputs = [DailyReturns()]
    window_length = 1  # We'll want to set this in the constructor when creating the object.
    window_safe = True
    params = {'annualization_factor': 252.0}
    
    def compute(self, today, assets, out, returns, annualization_factor):
        
        # TODO
        """ 
        For each row (each row represents one day of returns), 
        calculate the average of the cross-section of stock returns
        So that market_returns has one value for each day in the window_length
        So choose the appropriate axis (please see hints above)
        """
        mkt_returns = # ...
        
        # TODO
        # Calculate the mean of market returns
        mkt_returns_mu = # ...
        
        # TODO
        # Calculate the standard deviation of the market returns, then annualize them.
        out[:] = # ...

In [None]:
# TODO: create market volatility features using one month and six-month windows
market_vol_20d = # ...
market_vol_120d = # ...

In [None]:
# add market volatility features to pipeline
pipeline.add(market_vol_20d, 'market_vol_20d')
pipeline.add(market_vol_120d, 'market_vol_120d')

#### Stop here and continue with the lesson section "Sector and Industry"

# Sector and Industry

#### Add sector code

Note that after we run the pipeline and get the data in a dataframe, we can work on enhancing the sector code feature with one-hot encoding.

In [None]:
pipeline.add(sector, 'sector_code')

#### Run pipeline to calculate features


In [None]:
all_factors = engine.run_pipeline(pipeline, factor_start_date, universe_end_date)
all_factors.head()

#### One-hot encode sector

Let's get all the unique sector codes.  Then we'll use the `==` comparison operator to check when the sector code equals a particular value.  This returns a series of True/False values.  For some functions that we'll use in a later lesson, it's easier to work with numbers instead of booleans.  We can convert the booleans to type int.  So False becomes 0, and 1 becomes True.

In [None]:
sector_code_l = set(all_factors['sector_code'])

In [None]:
sector_0 = all_factors['sector_code'] == 0
sector_0[0:5]

In [None]:
sector_0_numeric = sector_0.astype(int)
sector_0_numeric[0:5]

#### Quiz: One-hot encode sector
Choose column names that look like "sector_code_0", "sector_code_1" etc.  Store the values as 1 when the row matches the sector code of the column, 0 otherwise.

In [None]:
# TODO: one-hot encode sector and store into dataframe
for s in sector_code_l:
    # ...

In [None]:
all_factors.head()

#### Stop here and continue with the lesson section "Date Parts".

# Date Parts
* We will make features that might capture trader/investor behavior due to calendar anomalies.
* We can get the dates from the index of the dataframe that is returned from running the pipeline.

#### Accessing index of dates
* Note that we can access the date index. using `Dataframe.index.get_level_values(0)`, since the date is stored as index level 0, and the asset name is stored in index level 1.  This is of type [DateTimeIndex](https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.DatetimeIndex.html).

In [None]:
all_factors.index.get_level_values(0)

#### [DateTimeIndex attributes](https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.DatetimeIndex.html)

* The `month` attribute is a numpy array with a 1 for January, 2 for February ... 12 for December etc.  
* We can use a comparison operator such as `==` to return True or False.

* It's usually easier to have all data of a similar type (numeric), so we recommend converting booleans to integers.  
The numpy ndarray has a function `.astype()` that can cast the data to a specified type.  
For instance, `astype(int)` converts False to 0 and True to 1.


In [None]:
# Example
print(all_factors.index.get_level_values(0).month)
print(all_factors.index.get_level_values(0).month == 1)
print( (all_factors.index.get_level_values(0).month == 1).astype(int) )

## Quiz
* Create a numpy array that has 1 when the month is January, and 0 otherwise.  Store it as a column in the all_factors dataframe.
* Add another similar column to indicate when the month is December

In [None]:
# TODO: create a feature that indicate whether it's January
all_factors['is_January'] = # ...

# TODO: create a feature to indicate whether it's December
all_factors['is_December'] = # ...

## Weekday, quarter
* add columns to the all_factors dataframe that specify the weekday, quarter and year.
* As you can see in the [documentation for DateTimeIndex](https://pandas.pydata.org/pandas-docs/version/0.23.4/generated/pandas.DatetimeIndex.html), `weekday`, `quarter`, and `year` are attributes that you can use here.

In [None]:
# we can see that 0 is for Monday, 4 is for Friday
set(all_factors.index.get_level_values(0).weekday)

In [None]:
# Q1, Q2, Q3 and Q4 are represented by integers too
set(all_factors.index.get_level_values(0).quarter)

#### Quiz
Add features for weekday, quarter and year.

In [None]:
# TODO
all_factors['weekday'] = # ...
all_factors['quarter'] = # ...
all_factors['year'] = # ...

## Start and end-of features

* The start and end of the week, month, and quarter may have structural differences in trading activity.
* [Pandas.date_range](https://pandas.pydata.org/pandas-docs/stable/generated/pandas.date_range.html) takes the start_date, end_date, and frequency.
* The [frequency](http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases) for end of month is `BM`.

In [None]:
# Example
tmp = pd.date_range(start=factor_start_date, end=universe_end_date, freq='BM')
tmp

#### Example

Create a DatetimeIndex that stores the dates which are the last business day of each month.  
Use the `.isin` function, passing in these last days of the month, to create a series of booleans.  
Convert the booleans to integers.  

In [None]:
last_day_of_month = pd.date_range(start=factor_start_date, end=universe_end_date, freq='BM')
last_day_of_month

In [None]:
tmp_month_end = all_factors.index.get_level_values(0).isin(last_day_of_month)
tmp_month_end

In [None]:
tmp_month_end_int = tmp_month_end.astype(int)
tmp_month_end_int

In [None]:
all_factors['month_end'] = tmp_month_end_int

#### Quiz: Start of Month
Create a feature that indicates the first business day of each month.

**Hint:** The frequency for first business day of the month uses the code `BMS`.

In [None]:
# TODO: month_start feature
first_day_of_month = # pd.date_range()
all_factors['month_start'] = # ...

#### Quiz: Quarter end and quarter start

Create features for the last business day of each quarter, and first business day of each quarter.  
**Hint**: use `freq=BQ` for business day end of quarter, and `freq=BQS` for business day start of quarter.

In [None]:
# TODO: qtr_end feature
last_day_qtr = # ...
all_factors['qtr_end'] = # ...

In [None]:
# TODO: qtr_start feature
first_day_qtr = # ...
all_factors['qtr_start'] = # ...

## View all features

In [None]:
list(all_factors.columns)

Note that we can skip the sector_code feature, since we one-hot encoded it into separate features.

In [None]:
features = ['Mean_Reversion_Sector_Neutral_Smoothed',
 'Momentum_1YR',
 'Overnight_Sentiment_Smoothed',
 'adv_120d',
 'adv_20d',
 'dispersion_120d',
 'dispersion_20d',
 'market_vol_120d',
 'market_vol_20d',
 #'sector_code', # removed sector_code
 'volatility_120d',
 'volatility_20d',
 'sector_code_0',
 'sector_code_1',
 'sector_code_2',
 'sector_code_3',
 'sector_code_4',
 'sector_code_5',
 'sector_code_6',
 'sector_code_7',
 'sector_code_8',
 'sector_code_9',
 'sector_code_10',
 'sector_code_-1',
 'is_January',
 'is_December',
 'weekday',
 'quarter',
 'year',
 'month_start',
 'qtr_end',
 'qtr_start']

#### Stop here and continue to the lesson section "Targets"

# Targets (Labels)

- We are going to try to predict the go forward 1-week return
- Very important! Quantize the target. Why do we do this?
  - Makes it market neutral return
  - Normalizes changing volatility and dispersion over time
  - Make the target robust to changes in market regimes
- The factor we create is the trailing 5-day return.



In [None]:
# we'll create a separate pipeline to handle the target
pipeline_target = Pipeline(screen=universe)

#### Example

We'll convert weekly returns into 2-quantiles.

In [None]:
return_5d_2q = Returns(window_length=5, mask=universe).quantiles(2)
return_5d_2q

In [None]:
pipeline_target.add(return_5d_2q, 'return_5d_2q')

#### Quiz
Create another weekly return target that's converted to 5-quantiles.

In [None]:
# TODO: create a target using 5-quantiles
return_5d_5q = # ...

# TODO: add the feature to the pipeline
# ...

# Let's run the pipeline to get the dataframe
targets_df = engine.run_pipeline(pipeline_target, factor_start_date, universe_end_date)
targets_df.head()

In [None]:
targets_df.columns

## Solution

[solution notebook](feature_engineering_solution.ipynb)