In [None]:
def norm(data):
  data['Norm'] = np.log(data['Adj Close']/data['Adj Close'].shift(1))
  data.dropna(inplace=True, axis=0)
  stats.probplot(data['Norm'], dist="norm", plot=pylab)
  return data


In [None]:
import pandas as pd

def MACD(df, period1, period2, periodSignal):
    EMA1 = pd.DataFrame.ewm(df,span=period1).mean() # Provides exponential weighted functions
    EMA2 = pd.DataFrame.ewm(df,span=period2).mean()

    MACD = EMA1-EMA2
    Signal = pd.DataFrame.ewm(MACD,periodSignal).mean()

    Histogram = MACD-Signal
    return Histogram

In [None]:
import pandas as pd

def stochastics_oscillator(df, period):
    # Rolling minimum and maximum prices over a specified period
    low_prices = pd.DataFrame.rolling(df['Low'], window=period).min()
    high_prices = pd.DataFrame.rolling(df['High'], window=period).max()

    # Stochastic Oscillator formula: %K = 100 * (Current Close - Lowest Low) / (Highest High - Lowest Low)
    k_percent = 100 * (df['Close'] - low_prices) / (high_prices - low_prices)

    return k_percent

In [None]:
def ATR(df, period):
    '''
    Method A: Current High less the current Low
    '''
    # Calculate three different true range components
    df['H-L'] = abs(df['High'] - df['Low'])         # High minus Low
    df['H-PC'] = abs(df['High'] - df['Close'].shift(1))  # High minus Previous Close
    df['L-PC'] = abs(df['Low'] - df['Close'].shift(1))   # Low minus Previous Close

    # Calculate True Range (TR) as the maximum of the three components
    TR = df[['H-L', 'H-PC', 'L-PC']].max(axis=1)

    return TR.to_frame()

In [None]:
from sklearn.preprocessing import RobustScaler
from collections import deque
import random
import numpy as np
def preprocess_df(data, shuffle=True):
    mapped_data = data.copy()
    SEQ_LEN = 60

    scaler = RobustScaler()
    scaled_data = scaler.fit_transform(data)


    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in scaled_data:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target

        # if prev_days have 60 days of data
        if len(prev_days) == SEQ_LEN:
            # sequential_data = [prev_days_data, target variable]
            sequential_data.append([np.array(prev_days), i[-1]])
    if shuffle == True:
        random.shuffle(sequential_data)  # shuffle for good measure.

    X = []
    y = []

    for seq, target in sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels (buys vs sell/notbuy)

    if shuffle == True:
        return np.array(X), y
    else:
        return np.array(X), y