# Objectives
- Acceleration and Velocity columns
- Percent Change over past few columns
- Direction(y) 0 or 1 goes up or down


In [1]:
!ls

[34mOLD_ALGOS[m[m                 Preproccesing_tests.ipynb spy_data_clean.pickle
Preproccesing.ipynb       [34m__pycache__[m[m


In [7]:
import pickle
import pandas as pd
import numpy as np

In [5]:
with open('spy_data_clean.pickle', 'rb') as f:
    tickData = pickle.load(f)

In [6]:
tickData.head()

Unnamed: 0,price,DateTime
0,1496.4,2000-01-03 08:30:34
1,1496.0,2000-01-03 08:30:36
2,1495.5,2000-01-03 08:30:37
3,1495.0,2000-01-03 08:30:46
4,1495.5,2000-01-03 08:30:53


### Idea:

    Instead of using the actual time difference between the points while calculating the velocity, use 1 or the sqrt of the time difference.

### Reasoning:

    The prices are being updated each tick, yet the data appears in time stamps. To make the calculus more applicable one aproach could be to treat it as a continous function, with each next point being discrete.


In [13]:
tickData["price"].iloc[4]

1495.5

In [111]:
tickData = tickData.drop_duplicates(subset='DateTime')


In [114]:
def get_prctChange(index, window_size=3):
    """
    Args:
        index: which row of data to find the percent change for
        window_size: how many previous rows to include 
        
    Output:
         A list of the perecentage changes on the past window_size rows
         index/ index-1 would be the first element of the list
    
    """
    
    window = tickData['price'].iloc[index-window_size:index]
    window = np.array(window)
    changeList = []
    for i in range(1,window_size):
        changeList.append(window[i]/window[i-1])
        
    changeList.reverse()
    
    return changeList

In [112]:
def get_velocity(index, method="SQRT"):
    
    p1 = tickData["price"].iloc[index]
    p0 = tickData["price"].iloc[index-1]
    
    timeDiff = 1
    if method == "SQRT":
        timeDiff = (tickData["DateTime"].iloc[index] - tickData["DateTime"].iloc[index-1])
        timeDiff = np.sqrt(timeDiff.total_seconds())
        
    velocity = (p1-p0)/timeDiff
    
    return velocity
        
    

In [113]:
def get_acceleration(index, method="SQRT"):
    
    v1 = get_velocity(index)
    v0 = get_velocity(index-1)
    
    timeDiff = 1
    if method == "SQRT":
        timeDiff = (tickData["DateTime"].iloc[index] - tickData["DateTime"].iloc[index-1])
        timeDiff = np.sqrt(timeDiff.total_seconds())
        
    acceleration = (v1-v0)/timeDiff
    
    return acceleration

In [115]:
tickData['price'].iloc[index-4:index]


0    1496.4
1    1496.0
2    1495.5
3    1495.0
Name: price, dtype: float64

In [132]:
def get_momentum(index):
    pC = get_prctChange(index)[0]
    v = get_velocity(index)
    
    m = (v*pC)
    if m <0:
        return ((v*pC)**2)*-1
    return (v*pC)**2

In [133]:
def get_direction(index):
    if tickData['price'].iloc[index+1] > tickData['price'].iloc[index]:
        return 1
    else:
        return 0

In [139]:
vList=[]
aList=[]
mList=[]
pC_0=[]
pC_1=[]
dList=[]

for i in range(3,len(tickData)-4):
    vList.append(get_velocity(i))
    aList.append(get_acceleration(i))
    mList.append(get_momentum(i))
    pc = get_prctChange(i)
    pC_0.append(pc[0])
    pC_1.append(pc[1])
    dList.append(get_direction(i))

In [140]:
featuredDict = {"Velocity":vList,"Acceleration":aList,"Momentum":mList,"pC_0":pC_0,"pC_1":pC_1,"Direction":dList}

In [141]:
featuredData= pd.DataFrame(featuredDict)

In [142]:
featuredData.head()

Unnamed: 0,Velocity,Acceleration,Momentum,pC_0,pC_1,Direction
0,-0.166667,0.111111,-0.027759,0.999666,0.999733,1
1,0.188982,0.134423,0.03569,0.999666,0.999666,0
2,-0.5,-0.688982,-0.250167,1.000334,0.999666,0
3,-0.223607,0.123607,-0.049967,0.999666,1.000334,1
4,0.223607,0.2,0.049967,0.999666,0.999666,0


In [143]:
featuredData.to_pickle('spy_classification_data.pkl')
