In [203]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

In [204]:
df = pd.read_csv('data/train.csv')
df = df.rename({'Company ': 'Company'}, axis=1)
df = df.sort_values(['Company', 'ID'])
df = df.reset_index()

importantCols = ['SMA', 'EMA', 'WMA', 'DEMA', 'TEMA', 'TRIMA', 'KAMA', 'FAMA', 'MAMA', 'T3']
for col in importantCols:
    for i in range(1, 4):
        df.insert(df.shape[1], '{0}_{1}'.format(col, i), np.nan)
        
for index, row in df.iterrows():
    for col in importantCols:
        for i in range(1, 4):
            if index + i < df.shape[0] and df.at[index + 1, 'Company'] == row['Company']:
                df.at[index, '{0}_{1}'.format(col, i)] = df.at[index + i, col]

df = df.dropna()
df[:5]

Unnamed: 0,index,ID,Date,Company,SMA,EMA,WMA,DEMA,TEMA,TRIMA,...,KAMA_3,FAMA_1,FAMA_2,FAMA_3,MAMA_1,MAMA_2,MAMA_3,T3_1,T3_2,T3_3
54,162,163,03-04-2148,ABC,26.936,26.916,27.1042,27.5128,27.2546,26.9737,...,25.6033,1.1623,1.205,1.2482,9.5367,9.6995,9.855,27.5508,27.545,27.4724
55,165,166,06-04-2148,ABC,27.042,26.9313,27.1158,27.432,27.1422,27.074,...,25.5289,1.205,1.2482,1.292,9.6995,9.855,10.007,27.545,27.4724,27.3319
56,168,169,07-04-2148,ABC,26.986,26.7274,26.8918,26.9703,26.5223,27.093,...,25.5335,1.2482,1.292,1.3364,9.855,10.007,10.1631,27.4724,27.3319,27.149
57,171,172,08-04-2148,ABC,26.749,26.4588,26.5762,26.4377,25.8552,27.0033,...,25.5449,1.292,1.3364,1.3813,10.007,10.1631,10.3252,27.3319,27.149,26.9623
58,174,175,09-04-2148,ABC,26.58,26.2045,26.2691,25.9792,25.3354,26.8447,...,25.5607,1.3364,1.3813,1.4269,10.1631,10.3252,10.4939,27.149,26.9623,26.8113


In [205]:
corr = df[df.columns[3:]].corr().sort_values('Price', ascending=False)['Price']
corr

Price         1.000000
TEMA_2        0.999734
DEMA_3        0.999662
DEMA_2        0.999603
TEMA_3        0.999591
WMA_3         0.999499
TEMA_1        0.999445
EMA_3         0.999281
WMA_2         0.999244
DEMA_1        0.999218
EMA_2         0.999027
SMA_3         0.998983
TRIMA_3       0.998904
TEMA          0.998848
WMA_1         0.998789
KAMA_3        0.998670
DEMA          0.998630
SMA_2         0.998591
EMA_1         0.998582
TRIMA_2       0.998410
T3_3          0.998326
KAMA_2        0.998319
WMA           0.998212
SMA_1         0.998087
EMA           0.998017
MIDPRICE      0.997942
TRIMA_1       0.997857
T3_2          0.997821
KAMA_1        0.997816
MIDPOINT      0.997781
                ...   
SlowD         0.090743
PHASE         0.088746
FastD         0.084793
SlowK         0.084793
PPO           0.083061
AROONOSC      0.076934
FastK         0.071163
Aroon Up      0.070774
ADXR          0.064644
ADOSC         0.061550
CCI           0.059824
ADX           0.057177
MFI        

In [206]:
model_vars = [name for name in corr.index if corr[name] < 1.0 and corr[name] > 0.98]
model = sm.OLS(df['Price'], df[model_vars]).fit()
model.summary()

0,1,2,3
Dep. Variable:,Price,R-squared:,1.0
Model:,OLS,Adj. R-squared:,1.0
Method:,Least Squares,F-statistic:,1583000.0
Date:,"Thu, 04 Jul 2019",Prob (F-statistic):,0.0
Time:,03:43:00,Log-Likelihood:,-25826.0
No. Observations:,11826,AIC:,51730.0
Df Residuals:,11788,BIC:,52010.0
Df Model:,38,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
TEMA_2,123.1520,247.800,0.497,0.619,-362.577,608.881
DEMA_3,17.5773,14.783,1.189,0.234,-11.401,46.555
DEMA_2,-15.9846,13.087,-1.221,0.222,-41.637,9.668
TEMA_3,-245.2698,202.460,-1.211,0.226,-642.126,151.586
WMA_3,-479.5997,490.205,-0.978,0.328,-1440.482,481.282
TEMA_1,293.4570,261.322,1.123,0.261,-218.778,805.692
EMA_3,1088.8644,520.170,2.093,0.036,69.246,2108.483
WMA_2,63.4968,556.934,0.114,0.909,-1028.187,1155.180
DEMA_1,-0.0926,0.103,-0.897,0.370,-0.295,0.110

0,1,2,3
Omnibus:,22849.925,Durbin-Watson:,2.04
Prob(Omnibus):,0.0,Jarque-Bera (JB):,304330675.247
Skew:,14.338,Prob(JB):,0.0
Kurtosis:,788.363,Cond. No.,1.01e+16


In [207]:
abbreviated_names = ['SMA', 'EMA', 'WMA', 'DEMA', 'TEMA', 'TRIMA', 'KAMA', 'FAMA', 'MAMA', 'T3', 'MACD', 'MACD_Hist', 'MACD_Signal', 'MAC', 'MAC_Hist', 'MAC_Signal', 'SlowD', 'SlowK', 'FastD', 'FastK', 'RSI', 'FatD', 'FatK', 'WILLR', 'ADX', 'ADXR', 'APO', 'PPO', 'MOM', 'BOP', 'CCI', 'CMO', 'ROC', 'ROCR', 'Aroon Down', 'Aroon Up', 'AROONOSC', 'MFI', 'TRIX', 'ULTOSC', 'DX', 'MINUS_DI', 'PLUS_DI', 'MINUS_DM', 'PLUS_DM', 'Real Lower Band', 'Real Middle Band', 'Real Upper Band', 'MIDPOINT', 'MIDPRICE', 'SAR', 'TRANGE', 'ATR', 'NATR', 'Chaikin A/D', 'ADOSC', 'OBV', 'HT_TRENDLINE', 'LEAD SINE', 'SINE', 'TRENDMODE', 'DCPERIOD', 'HT_DCPHASE', 'PHASE', 'QUADRATURE']
full_names = ['simpleMovingAverage', 'exponentialMovingAverage', 'waysAndMeansAdvances', 'doubleExponentialMovingAverage', 'tripleExponentialMovingAverage', 'triangularMovingAverage', 'kaufmansAdaptiveMovingAverage', 'fAdaptiveMovingAverage', 'mesaAdaptiveMovingAverage', 't3MovingAverage', 'movingAverageConvergenceDivergence', 'movingAverageConvergenceDivergenceHistogram', 'movingAverageConvergenceDivergenceSignal', 'movingAverageConvergence', 'movingAverageConvergenceHistogram', 'movingAverageConvergenceSignal', 'slowStochasticOscillatorSecondLine', 'slowStochasticOscillatorMainLine', 'fastStochasticOscillatorSecondLine', 'fastStochasticOscillatorMainLine', 'relativeStrengthIndex', 'fastStochasticSecondLine', 'fastStochasticMainLine', 'willr', 'averageDirectionalIndex', 'averageDirectionalIndexRating', 'absolutePriceOscillator', 'percentagePriceOscillator', 'momentumIndicator', 'balanceOfPower', 'commodityChannelIndex', 'chandeMomentumOscillator', 'rateOfChange', 'rateOfChangeRating', 'aroonOscillatorDown', 'aroonOscillatorUp', 'aroonOscillator', 'moneyFlowIndex', 'tripleSmoothedExponentialMovingAverage', 'ultimateOscillatorDefinitionAndStrategies', 'directionalMovementIndex', 'minusDirectionalIndex', 'plusDirectionalIndex', 'minusDirectionalMovementIndex', 'plusDirectionalMovementIndex', 'relativeStrengthIndexLowerBand', 'relativeStrengthIndexMiddleBand', 'relativeStrengthIndexHighBand', 'midPoint', 'midPrice', 'parabolicStopAndReverse', 'trange', 'averageTrueRange', 'normalizedAverageTrueRange', 'chaikinAccumulationDistributionLine', 'accumulationDistributionOscillatorSC', 'onBalanceVolume', 'htTrendline', 'leadSine', 'sine', 'trendMode', 'dcPeriod', 'htDcPhase', 'phase', 'quadrature']

indexes = [int(name.split('_')[1]) for name in model.params.index if '_' in name and name not in abbreviated_names]

if len(indexes) > 0:
    print('if (nextRecords.size < {0}) {{'.format(max(indexes)))
    print('    return record.tripleExponentialMovingAverage')
    print('}')
    print()

print('return listOf(')
    
for name in model.params.index:
    modifier = model.params[name]
    comma = ',' if name != model.params.index[-1] else ''
    
    if '_' in name and name not in abbreviated_names:
        parts = name.split('_')
        index = int(parts[1]) - 1
        full_name = full_names[abbreviated_names.index(parts[0])]
        
        print('    nextRecords[{0}].{1} * {2}{3}'.format(index, full_name, modifier, comma))
    else:
        full_name = full_names[abbreviated_names.index(name)]
        
        print('    record.{0} * {1}{2}'.format(full_name, modifier, comma))

print(').sum()')

if (nextRecords.size < 3) {
    return record.tripleExponentialMovingAverage
}

return listOf(
    nextRecords[1].tripleExponentialMovingAverage * 123.15203358224193,
    nextRecords[2].doubleExponentialMovingAverage * 17.577344039041236,
    nextRecords[1].doubleExponentialMovingAverage * -15.984571190011554,
    nextRecords[2].tripleExponentialMovingAverage * -245.2698224864371,
    nextRecords[2].waysAndMeansAdvances * -479.5996636225168,
    nextRecords[0].tripleExponentialMovingAverage * 293.4570099420041,
    nextRecords[2].exponentialMovingAverage * 1088.864351708784,
    nextRecords[1].waysAndMeansAdvances * 63.49676268963094,
    nextRecords[0].doubleExponentialMovingAverage * -0.09261893252123932,
    nextRecords[1].exponentialMovingAverage * -377.4972663029023,
    nextRecords[2].simpleMovingAverage * 0.8758034381174127,
    nextRecords[2].triangularMovingAverage * 1.120192857895411,
    record.tripleExponentialMovingAverage * -188.0585459748954,
    nextRecords[0].waysAndMe