In [None]:
#########################################################################################
# We use a Gaussian Naive Bayes model to predict if a stock will have a high return 
# or low return next Monday (num_holding_days = 5),  using as input decision variables 
# the assets growthto yesterday from 2,3,,4,5,6,7,8,9 and 10 days before  
# We use the code form post [“How to Leverage the Pipeline to Conduct Machine Learning in the IDE”][2] 
# by Jim Obreen to preprocess teh data
#########################################################################################
 
##################################################
# Imports
##################################################

from __future__ import division
from collections import OrderedDict
import time

# Pipeline, Morningstar, and Quantopian Trading Functions
import quantopian.algorithm as algo
import quantopian.optimize as opt
from quantopian.pipeline.experimental import risk_loading_pipeline
from quantopian.algorithm import attach_pipeline, pipeline_output, order_optimal_portfolio
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.optimize import TargetWeights
from quantopian.pipeline.factors import Returns


# The basics
import pandas as pd
import numpy as np

# SKLearn :)
#from sklearn.naive_bayes import GaussianNB
#from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
##################################################
# Globals
##################################################

num_holding_days = 5 # holding our stocks for five trading days.
days_for_fundamentals_analysis = 30
upper_percentile = 70
lower_percentile = 100 - upper_percentile

MAX_GROSS_EXPOSURE = 1.0
MAX_POSITION_CONCENTRATION = 0.05

MAX_GROSS_LEVERAGE = 1.0
TOTAL_POSITIONS = 300

MAX_SHORT_POSITION_SIZE = 2.0 / TOTAL_POSITIONS
MAX_LONG_POSITION_SIZE = 2.0 / TOTAL_POSITIONS

##################################################
# Initialize
##################################################

def initialize(context):
    """ Called once at the start of the algorithm. """

    algo.attach_pipeline(make_pipeline(), 'long_short_equity_template')

    # Attach the pipeline for the risk model factors that we
    # want to neutralize in the optimization step. The 'risk_factors' string is 
    # used to retrieve the output of the pipeline in before_trading_start below.
    algo.attach_pipeline(risk_loading_pipeline(), 'risk_factors')

    # Schedule our rebalance function
    algo.schedule_function(func=rebalance,
                           date_rule=algo.date_rules.week_start(),
                           time_rule=algo.time_rules.market_open(hours=0, minutes=30),
                           half_days=True)

    # Record our portfolio variables at the end of day
    algo.schedule_function(func=record_vars,
                           date_rule=algo.date_rules.every_day(),
                           time_rule=algo.time_rules.market_close(),
                           half_days=True)

##################################################
# Pipeline-Related Code
##################################################
        
            
class Momentum(CustomFactor):
    # Default inputs
    inputs = [USEquityPricing.close]

    # Compute momentum
    def compute(self, today, assets, out, close):
        out[:] = close[-1] / close[0]
        
class Average_True_Range(CustomFactor):  
    inputs = [USEquityPricing.close, USEquityPricing.high, USEquityPricing.low]  
    window_length = 15  
    def compute(self, today, assets, out, close, high, low):  
        lb = self.window_length  
        atr = np.zeros(len(assets), dtype=np.float64)  
        a=np.array(([high[1:(lb)]-low[1:(lb)],abs(high[1:(lb)]-close[0:(lb-1)]),abs(low[1:(lb)]-close[0:(lb-1)])]))  
        b=a.T.max(axis=2)  
        c=b.sum(axis=1)  
        atr=c /(lb-1)

        out[:] = atr  

class Predictor(CustomFactor):
    """ Defines our machine learning model. """
    
    # The factors that we want to pass to the compute function. We use an ordered dict for clear labeling of our inputs.
    factor_dict = OrderedDict()
    factor_dict['Volume'] =USEquityPricing.volume
    factor_dict['earning_yield'] = Fundamentals.earning_yield
    factor_dict['cash_return'] = Fundamentals.cash_return
    factor_dict['Asset_Growth_2d'] = Returns(window_length=2)
    factor_dict['Asset_Growth_3d'] = Returns(window_length=3)
    factor_dict['Asset_Growth_4d'] = Returns(window_length=4)
    factor_dict['style_score'] = Fundamentals.style_score
    factor_dict['value_score'] = Fundamentals.value_score
    factor_dict['Return'] =  Returns(inputs=[USEquityPricing.open],window_length=5)
    
    columns = factor_dict.keys()
    inputs = factor_dict.values()

    # Run it.
    def compute(self, today, assets, out, *inputs):
        """ Through trial and error, I determined that each item in the input array comes in with rows as days and securities as columns. Most recent data is at the "-1" index. Oldest is at 0.

        !!Note!! In the below code, I'm making the somewhat peculiar choice  of "stacking" the data... you don't have to do that... it's just a design choice... in most cases you'll probably implement this without stacking the data.
        """

        ## Import Data and define y.
        inputs = OrderedDict([(self.columns[i],pd.DataFrame(inputs[i]).fillna(0,axis=1).fillna(0,axis=1)) for i in range(len(inputs))]) # bring in data with some null handling.
        num_secs = len(inputs['Return'].columns)
        y = inputs['Return'].shift(-num_holding_days)
        y=y.dropna(axis=0,how='all')
        
        for index, row in y.iterrows():
            
             upper = np.nanpercentile(row, upper_percentile)            
             lower = np.nanpercentile(row, lower_percentile)
             auxrow = np.zeros_like(row)
             
             for i in range(0,len(row)):
                if row[i] <= lower: 
                    auxrow[i] = -1
                elif row[i] >= upper: 
                    auxrow[i] = 1 
        
             y.iloc[index] = auxrow
            
        y=y.stack(dropna=False)
        
        
        ## Get rid of our y value as an input into our machine learning algorithm.
        #del inputs['Return']

        ## Munge x and y
        x = pd.concat([df.stack(dropna=False) for df in inputs.values()], axis=1).fillna(0)
        
        ## Run Model
        #model = GaussianNB() 
        model= DecisionTreeClassifier()
        model_x = x[:-num_secs*(num_holding_days)]
        model.fit(model_x, y)
        
        out[:] =  model.predict(x[-num_secs:])

def make_pipeline():
    universe = QTradableStocksUS()
    
    
    #============FACTORES=======================
    predicciones= Predictor(window_length=days_for_fundamentals_analysis, mask=universe)
    Momentum_10=Momentum(window_length=10)
    Average_True_Range_14=Average_True_Range(window_length=10)
    #===========================================
    
    FactorC=np.array([predicciones,Momentum_10,Average_True_Range_14])
    
    PesoFactor=np.array([[0.5,1,-1]])*-1
                         
    combined_factor = ((FactorC*PesoFactor[0,:]).sum())
    
    
    longs = combined_factor.top(TOTAL_POSITIONS//2, mask=universe)
    shorts = combined_factor.bottom(TOTAL_POSITIONS//2, mask=universe)

    #The final output of our pipeline should only include
    # the top/bottom 300 stocks by our criteria
    long_short_screen = (longs | shorts)
    
    return Pipeline(columns={
            #'Model': predicciones,
            #'Momentum_10': Momentum_10,
            #'ATR_14': Average_True_Range_14,
            #'Factor Combinado': combined_factor
            'longs': longs,
            'shorts': shorts,
            'combined_factor': combined_factor
        },screen = long_short_screen)

##################################################
# Execution Functions
##################################################

def before_trading_start(context, data):
    """
    Optional core function called automatically before the open of each market day.

    Parameters
    ----------
    context : AlgorithmContext
        See description above.
    data : BarData
        An object that provides methods to get price and volume data, check
        whether a security exists, and check the last time a security traded.
    """
    # Call algo.pipeline_output to get the output
    # Note: this is a dataframe where the index is the SIDs for all
    # securities to pass my screen and the columns are the factors
    # added to the pipeline object above
    context.pipeline_data = algo.pipeline_output('long_short_equity_template')

    # This dataframe will contain all of our risk loadings
    context.risk_loadings = algo.pipeline_output('risk_factors')


def record_vars(context, data):
    """
    A function scheduled to run every day at market close in order to record
    strategy information.

    Parameters
    ----------
    context : AlgorithmContext
        See description above.
    data : BarData
        See description above.
    """
    # Plot the number of positions over time.
    algo.record(num_positions=len(context.portfolio.positions))


# Called at the start of every month in order to rebalance
# the longs and shorts lists
def rebalance(context, data):
    """
    A function scheduled to run once every Monday at 10AM ET in order to
    rebalance the longs and shorts lists.

    Parameters
    ----------
    context : AlgorithmContext
        See description above.
    data : BarData
        See description above.
    """
    # Retrieve pipeline output
    pipeline_data = context.pipeline_data

    risk_loadings = context.risk_loadings

    # Here we define our objective for the Optimize API. We have
    # selected MaximizeAlpha because we believe our combined factor
    # ranking to be proportional to expected returns. This routine
    # will optimize the expected return of our algorithm, going
    # long on the highest expected return and short on the lowest.
    objective = opt.MaximizeAlpha(pipeline_data.combined_factor)

    # Define the list of constraints
    constraints = []
    # Constrain our maximum gross leverage
    constraints.append(opt.MaxGrossExposure(MAX_GROSS_LEVERAGE))

    # Require our algorithm to remain dollar neutral
    constraints.append(opt.DollarNeutral())

    # Add the RiskModelExposure constraint to make use of the
    # default risk model constraints
    neutralize_risk_factors = opt.experimental.RiskModelExposure(
        risk_model_loadings=risk_loadings,
        version=0
    )
    constraints.append(neutralize_risk_factors)

    # With this constraint we enforce that no position can make up
    # greater than MAX_SHORT_POSITION_SIZE on the short side and
    # no greater than MAX_LONG_POSITION_SIZE on the long side. This
    # ensures that we do not overly concentrate our portfolio in
    # one security or a small subset of securities.
    constraints.append(
        opt.PositionConcentration.with_equal_bounds(
            min=-MAX_SHORT_POSITION_SIZE,
            max=MAX_LONG_POSITION_SIZE
        ))

    # Put together all the pieces we defined above by passing
    # them into the algo.order_optimal_portfolio function. This handles
    # all of our ordering logic, assigning appropriate weights
    # to the securities in our universe to maximize our alpha with
    # respect to the given constraints.
    algo.order_optimal_portfolio(
        objective=objective,
        constraints=constraints
    )