In [2]:
from functools import reduce
from itertools import combinations_with_replacement  
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd
import seaborn as sns
import sys

In [3]:
sys.path.append("./utilities")
from preprocess import percentChange, movingAverage

In [4]:
df = pd.read_csv('etc_orig.csv')  

In [5]:
df = df.iloc[::-1]

In [6]:
df = df.reset_index(drop=True)

In [7]:
# preprocessing

In [7]:
ma3 = movingAverage(df.Open, 3)
ma5 = movingAverage(df.Open, 5)
ma8 = movingAverage(df.Open, 8)
ma13 = movingAverage(df.Open, 13)
ma21 = movingAverage(df.Open, 21)

In [23]:
# exponential moving average
ema_com_05 = df.Open.ewm(com=0.5).mean()

In [None]:
# feature collection. 

In [7]:
# calculate features

class CalculateFeatures():
    
    def PercentageChangePermutations(self, ts, window_right_edge, window_size):
        ''' 
        p1 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-9])
        p2 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-8])
        p3 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-7])
        p4 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-6])
        p5 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-5])
        p6 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-4])
        p7 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-3])
        p8 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-2])
        p9 = percentChange(ts[window_right_edge-10], ma3[window_right_edge-1])
        p10 = percentChange(ts[window_right_edge-10], ma3[window_right_edge])
        dataPoints = [p1, p2, p3, p4, p5, p6, p7, p8, p9, p10]
        return dataPoints
        '''
        feats = []
        for interval in range(window_size) :
            pc = percentChange(ts[window_right_edge-window_size], ts[window_right_edge-interval])
            feats.append(pc)
        return feats
    
    def PercentageChangeSecondOrder(self, features):
        # Get all combinations of [1, 2, 3] and length 2  
        comb = combinations_with_replacement(features, 2)  
        
        feats = []
        # Print the obtained combinations  
        for i in list(comb):    
            pc = percentChange(i[0], i[1])
            pc = pc / 100 # normalise
            feats.append(pc)
        return(feats)
    

In [None]:
# Create a class for calculating what to predict in the future

In [8]:
class CalculatePred():
    def PredOutcome(self, ts, t, y):
        '''
        Takes a value at time t+y
        
        Parameters:
        ts: time series dataset
        t: index of current value 
        y: index of future value we are trying to predict
        '''
        try:
            y_val = ts[t+y]
        except:
            y_val = 0
        return y_val
    
    def PredAveOutcome(self, ts, t, l_window, r_window):
        '''
        Takes a value at time t and "cuts a chunk" from the future, averages this and returns a trend indicator
        
        Parameters:
        ts: time series dataset
        t: the value at time t 
        l_window: the value of where the cut in the future begins
        r_window: the value of where the cut in the future ends
        '''
        outcome_range = ts[t+l_window:t+r_window]
        outcome_range = outcome_range[~np.isnan(outcomeRange)]
        try:
            avg_outcome = reduce(lambda x, y: x + y, outcome_range) / len(outcome_range)
        except:
            avg_outcome = 0
        return avg_outcome
    
    def BinaryClassification(self, perc_change):
        '''
        Converts percentage change of val at time t and y into binary classifcation
        where:
            1 = increase
            0 = decrease
        '''
        if perc_change >= 0:
            bin_class = 1
        else:
            bin_class = 0
        return bin_class
    
    def MultiClassification(self, perc_change):
        '''
        Converts percentage change into bins based on values of percentiles taken from bitcoin up/down swings
        '''
        if perc_change < -18.0:
            multi_class = 0
        elif perc_change > -18.0 and perc_change < -10:
            multi_class = 1
        elif perc_change > -10.0 and perc_change < -5:
            multi_class = 2
        elif perc_change > -5.0 and perc_change < -2.5:
            multi_class = 3
        elif perc_change > -2.5 and perc_change < 0:
            multi_class = 5
        elif perc_change > 0 and perc_change < -2.5:
            multi_class = 6
        elif perc_change > 2.5 and perc_change < 5:
            multi_class = 7
        elif perc_change > 5 and perc_change < 10:
            multi_class = 8
        elif perc_change > 10 and perc_change < 18:
            multi_class = 9
        else:
            multi_class = 10
        return multi_class
        

In [9]:
CF = CalculateFeatures()
CP = CalculatePred()

In [27]:
# Method to take in a time series and return a set of features based on percentage changes
def LoopThroughTime(window_right_edge, ts):
    
    window_size = window_right_edge
    window_left_edge = 0
    tsLen = len(ts)
    tsLen_pred_gap = 8 
    tsLen2 = tsLen - tsLen_pred_gap
    
    feats = []
    preds = []
    
    while window_right_edge < tsLen2:
        #print(str(window_right_edge) + " " + str(window_left_edge) + " " + str(window_size))
        
        data_chunk = ts[window_left_edge:window_right_edge]
        
        f1 = CF.PercentageChangePermutations(data_chunk, window_right_edge-1, window_size-1)
        f2 = CF.PercentageChangeSecondOrder(f1)
        f3 = f1 + f2
        feats.append(f3)
        
        y = CP.PredOutcome(ts, window_right_edge, tsLen_pred_gap) # predict 8 days ahead
        preds.append(y)
        
        window_right_edge += 1
        window_left_edge += 1
    
    return([feats, preds])

In [28]:
output = LoopThroughTime(10, ma3)

In [35]:
import pandas as pd

In [36]:
features_1 = pd.DataFrame.from_records(output[0]) 

In [46]:
preds_1 = pd.DataFrame(output[1])
preds_1.columns = ['Y']

In [50]:
result = pd.concat([features_1, preds_1],  axis=1)

In [51]:
result

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,45,46,47,48,49,50,51,52,53,Y
0,,,,,,,,,,,...,,,,,,,,,,1.945307
1,,,,,,,,,,,...,,,,,,,,,,1.835153
2,237.509247,202.717034,150.089232,119.245332,115.709843,115.319725,150.408566,102.039987,74.527697,0.0,...,0.304274,-0.115156,-0.353730,0.0,-0.321581,-0.504498,0.0,-0.269623,0.0,1.805997
3,93.030407,93.384346,73.449280,43.294867,25.622084,23.596338,23.372810,43.477837,15.763853,0.0,...,-0.009473,0.842567,-0.331936,0.0,0.860189,-0.325547,0.0,-0.637428,0.0,1.856293
4,62.634136,66.744974,67.050717,49.830258,23.782047,8.515811,6.765916,6.572826,23.940102,0.0,...,-0.205488,-0.228162,1.811253,0.0,-0.028539,2.538339,0.0,2.642284,0.0,1.884120
5,33.190451,31.219947,34.536741,34.783427,20.889249,-0.127525,-12.444956,-13.856844,-14.012636,0.0,...,96.588219,107.659665,108.881326,-0.0,0.113451,0.125969,-0.0,0.011243,-0.0,1.886900
6,51.037869,54.895377,52.603756,56.461060,56.747946,40.589551,16.147851,1.823152,0.181181,0.0,...,-0.602167,-0.955083,-0.995536,0.0,-0.887096,-0.988780,0.0,-0.900622,0.0,1.884230
7,42.441911,50.764712,54.615244,52.327767,56.178096,56.464463,40.335290,15.937794,1.639002,0.0,...,-0.285652,-0.717738,-0.970973,0.0,-0.604867,-0.959366,0.0,-0.897163,0.0,1.822497
8,25.039596,40.144933,48.333523,52.121961,49.871372,53.659611,53.941361,38.072283,14.068213,0.0,...,0.005251,-0.290485,-0.737825,0.0,-0.294191,-0.739194,0.0,-0.630487,0.0,1.762177
9,1.942272,9.618265,22.860636,30.039315,33.360519,31.387499,34.708529,34.955529,21.043610,0.0,...,0.105807,0.113677,-0.329554,0.0,0.007116,-0.393705,0.0,-0.397989,0.0,1.735407
