# This file will serve as the main testing ground for a variety of signals for the CRSP data set

## The multiple strategies are the underlying:

### 1. Univariate Strategies
   
### 2. Multivariate Strategies
   
### 3. NN-based methodologies
   * LSTM
   * other?
   
### 4. Others? (eg. from academic literature)

# Libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import statsmodels.tsa

# Preprocessing

In [61]:
data  = pd.read_csv('data/returns.csv')
data.set_index('date', inplace=True)
data.index = pd.to_datetime(data.index)

# Univariate strategies
* Simple strategies
    + moving average
    + MACD
* forecasting strategies (requires cross-validation procedure)
    + AR
    + MA
    + auto_arima

In [123]:
def macd(ret, long=26, short=12, signal_span=9, plot=False):

    short_signal = ret.ewm(span=short, adjust=False).mean()
    long_signal = ret.ewm(span=long, adjust=False).mean() 
    macd = short_signal - long_signal
    signal = macd.ewm(span=signal_span, adjust=False).mean()

    position = np.zeros(len(ret))
    
    for i in range(len(ret)):
        
        if macd[i] >= 0:
            if macd[i] > signal[i]:
                position[i] = 1
            else: position[i] = 0
        
        else:
            if macd[i] < signal[i]:
                position[i] = -1
            else: position[i] = 0
    
    if plot==True:
        %matplotlib widget
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(ret.index[:100],signal[:100], label = 'signal', color='g')
        ax.plot(ret.index[:100],macd[:100], label = 'macd', color = 'b')
        ax.legend(loc=1)
        ax.set_xlabel("time")
        ax.set_ylabel("MACD/SIGNAL")
        
        months = mdates.MonthLocator()
        ax.xaxis.set_major_locator(months) 
        fig.autofmt_xdate()
        
        ax2 = ax.twinx()
        ax2.stem(stock.index[:100],position[:100])
        ax2.set_ylabel("position (1 for long, 0 for none, -1 for short)")
        ax2.set_ylim(-3,3)
        ax.set_title("MACD strategy visualised with positions as stems for first 100 observations")
        plt.show()
    
    return (position * ret).values

def macd_signals(returns,long=26,short=12,signal_span=9):
    signals = pd.DataFrame()
    for i in range(returns.shape[1]):
        signals['signal_{}'.format(i)] = macd(returns.iloc[:,i],long,short,signal_span)
    signals.index = returns.index
    return signals
        



### Testing the univariate strategies

In [124]:
# 1. test macd
stock = data.iloc[:,200]
macd_single = macd(stock, 26,12,9,)
macd_multiple = macd_signals(data.iloc[:,:10],26,12,9)

# 2. test ma


Unnamed: 0_level_0,signal_0,signal_1,signal_2,signal_3,signal_4,signal_5,signal_6,signal_7,signal_8,signal_9
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2000-01-03,0.000000,-0.000000,0.000000,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000,-0.000000
2000-01-04,0.084310,0.033780,0.083217,-0.023493,0.036610,0.039252,0.059355,-0.034561,-0.019242,-0.017586
2000-01-05,-0.014634,-0.010544,0.148741,-0.006173,0.010556,-0.019455,0.010974,0.016432,-0.019025,0.054516
2000-01-06,0.086538,0.033498,0.060036,0.014197,0.035515,0.010913,0.071913,0.041570,0.046061,0.049383
2000-01-07,-0.047368,-0.013068,-0.061010,0.018373,0.038332,0.075564,-0.013924,0.008869,0.079954,-0.000735
...,...,...,...,...,...,...,...,...,...,...
2019-12-24,0.000951,-0.000000,-0.000000,-0.002770,-0.000000,-0.004033,-0.001422,0.000044,-0.002562,-0.000000
2019-12-26,0.019840,0.008197,0.044467,0.010612,0.000685,-0.000084,0.008518,-0.004346,-0.000000,0.000000
2019-12-27,-0.000379,0.001828,0.000551,0.000000,0.000549,-0.000586,0.004786,0.001369,0.006948,0.003422
2019-12-30,0.005935,0.008619,-0.000000,0.003665,0.003087,0.001589,0.005658,0.001680,0.012848,0.005866


# Multivariate Econometric
   * Vector Autoregressive (VAR)
   * Vector error correction models (VECM)
   * Kernel Ridge Regression (KRR)

# NN-based methodologies
   * LSTM
   * other?