In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
%matplotlib inline  

data = pd.read_csv('data.csv')

In [22]:
def RSI(dataframe, period):
    '''
    Computes the RSI of a given price series for a given period length
    :param dataframe:
    :param period:
    :return dataframe with rsi:
    '''

    rsi = []

    for stock in dataframe['Symbol'].unique():
        all_prices = dataframe[dataframe['Symbol'] == stock]['Close']
        diff = np.diff(all_prices) # length is 1 less than the all_prices
        for i in range(period):
            rsi.append(None) # because RSI can't be calculated until period prices have occured

        for i in range(len(diff) - period + 1):
            avg_gain = diff[i:period + i]
            avg_loss = diff[i:period + i]
            avg_gain = abs(sum(avg_gain[avg_gain >= 0]) / period)
            avg_loss = abs(sum(avg_loss[avg_loss < 0]) / period)
            if avg_loss == 0:
                rsi.append(100)
            elif avg_gain == 0:
                rsi.append(0)
            else:
                rs = avg_gain / avg_loss
                rsi.append(100 - (100 / (1 + rs)))

    dataframe['RSI'] = rsi
    return dataframe


def PROC(dataframe, period):
    '''
    Computes the PROC(price rate of change) of a given price series for a given period length
    :param dataframe:
    :param period:
    :return proc:
    '''

    proc = []

    for stock in dataframe['Symbol'].unique():
        all_prices = list(dataframe[dataframe['Symbol'] == stock]['Close'])
        for i in range(period):
            proc.append(None) # because proc can't be calculated until period prices have occured
        for i in range(len(all_prices) - period):
            if len(all_prices) <= period:
                proc.append(None)
            else:
                calculated = (all_prices[i + period] - all_prices[i]) / all_prices[i]
                proc.append(calculated)
    dataframe['PROC'] = proc
    return dataframe


def SO(dataframe, period):
    
    so = []
    
    for stock in dataframe['Symbol'].unique():
        all_prices = list(dataframe[dataframe['Symbol'] == stock]['Close'])
        
        for i in range(period):
            so.append(None)
 
        for i in range(len(all_prices) - period):
            C = all_prices[i]
            H = max(all_prices[i:i+period])
            L = min(all_prices[i:i+period])
            so.append(100 * ((C - L) / (H - L)))

    dataframe['SO'] = so
    return dataframe


def Williams_R(dataframe, period):
    '''
    Williams %R
    Calculates fancy shit for late usage. Nice!

    EXAMPLE USAGE:
    data = pandas.read_csv("./data/ALL.csv", sep=",",header=0,quotechar='"')
    wr = Williams_R(data)
    print(wr)

    '''
    
    wr = []
    
    for stock in dataframe['Symbol'].unique():
        all_prices = list(dataframe[dataframe['Symbol'] == stock]['Close'])
        for i in range(period):
            wr.append(None) # because proc can't be calculated until period prices have occured
            
        for i in range(period-1,len(all_prices)-1):
            C = all_prices[i]
            H = max(all_prices[i-period+1:i])
            L = min(all_prices[i-period+1:i])
            wr_one = (
                ((H - C) 
                 / (H - L)) * -100
            )
            if wr_one <=-100:
                wr.append(-100)
            elif wr_one >= 100:
                wr.append(100)
            else:
                wr.append(wr_one)
    dataframe["WR"] = wr
    return dataframe


def calculate_targets(df, period):
    
    targets = []

    for stock in df['Symbol'].unique():
        all_prices = list(df[df['Symbol'] == stock]['Close'])
        
        for i in range(0, len(all_prices)-period):
            targets.append(np.sign(all_prices[i+period] - all_prices[i]))
        for i in range(len(all_prices)-period, len(all_prices)):
            targets.append(None)

    df["Target({})".format(period)] = targets
    return df


def On_Balance_Volume(dataframe):
    '''
    Williams %R
    Calculates fancy shit for late usage. Nice!

    EXAMPLE USAGE:
    data = pandas.read_csv("./data/ALL.csv", sep=",",header=0,quotechar='"')
    wr = Williams_R(data)
    print(wr)

    '''
    obv = []
    
    for stock in dataframe['Symbol'].unique():
        all_prices = list(dataframe[dataframe['Symbol'] == stock]['Close'])
        all_volumes = list(dataframe[dataframe['Symbol'] == stock]['Volume'])
    
        obv.append(dataframe.iloc[0]["Volume"])
        for i in range(1,len(all_prices)):
            C_old = all_prices[i-1]
            C = all_prices[i]
            if(C > C_old):
                obv.append(obv[i-1]+ all_volumes[i])
            elif (C < C_old):
                obv.append(obv[i - 1] - all_volumes[i])
            else:
                obv.append(obv[i-1])

    dataframe['OBV'] = obv
    return dataframe


def delete_bad_data(df):
    for stock in df['Symbol'].unique():
        if df[df["Symbol"]==stock]["Close"].any() < 1:
            df = df.drop(df[df["Symbol"]==stock], axis=0)
        if df[df["Symbol"]==stock]["Volume"].any() == 0:
            df = df.drop(df[df["Symbol"]==stock], axis=0)
        if df[df["Symbol"]==stock]["Open"].any() == None:
            df = df.drop(df[df["Symbol"]==stock], axis=0)
    return df

def detrend_data(df):
    trend = None
    for stock in df['Symbol'].unique():
        all_prices = list(df[df['Symbol'] == stock]['Close'])
#        trend.append(signal.detrend(all_prices))
        if(trend is None):
            trend = list(signal.detrend(all_prices))
        else:
            trend.extend(signal.detrend(all_prices))
        print("len(trend):{} len(df['Symbol']):{}".format(len(trend),len(all_prices)))

    print("len(trend):{} len(df):{}".format(len(trend),len(df)))
    df['Close_detrend'] = trend
    return df

In [7]:
data = delete_bad_data(data)
print("Test of deleting bad data:")
try:
    print("Deletion successful:", data[data["Symbol"]=="AAP"]["Close"][5] )
except:
    print("True")

Test of deleting bad data:
True


In [None]:
data = RSI(data,14)
print("RSI: Done")

In [None]:
data = PROC(data, 14)
print("PROC: Done")

In [None]:
data = SO(data,14)
print("SO: Done")

In [None]:
data = Williams_R(data, 14 )
print("Williams_R: Done")

In [None]:
data["EWMA"] = pd.ewma(data["Close"], com=.5)
print("EWMA: Done")

In [24]:
data = detrend_data(data)
print("Date detrend: Done")

len(trend):1867 len(df['Symbol']):1867
len(trend):2744 len(df['Symbol']):877
len(trend):4278 len(df['Symbol']):1534
len(trend):6144 len(df['Symbol']):1866
len(trend):7272 len(df['Symbol']):1128
len(trend):9139 len(df['Symbol']):1867
len(trend):11006 len(df['Symbol']):1867
len(trend):12872 len(df['Symbol']):1866
len(trend):14173 len(df['Symbol']):1301
len(trend):16039 len(df['Symbol']):1866
len(trend):17906 len(df['Symbol']):1867
len(trend):19772 len(df['Symbol']):1866
len(trend):21639 len(df['Symbol']):1867
len(trend):23267 len(df['Symbol']):1628
len(trend):25134 len(df['Symbol']):1867
len(trend):27001 len(df['Symbol']):1867
len(trend):28868 len(df['Symbol']):1867
len(trend):29793 len(df['Symbol']):925
len(trend):31660 len(df['Symbol']):1867
len(trend):33527 len(df['Symbol']):1867
len(trend):35394 len(df['Symbol']):1867
len(trend):37261 len(df['Symbol']):1867
len(trend):39127 len(df['Symbol']):1866
len(trend):40994 len(df['Symbol']):1867
len(trend):42622 len(df['Symbol']):1628
len(tren

In [None]:
data = calculate_targets(data, 1)
data = calculate_targets(data, 3)
data = calculate_targets(data, 5)
data = calculate_targets(data, 10)
data = calculate_targets(data, 14)
data = calculate_targets(data, 30)
print('Targets Done - except 60')

In [None]:
data

In [None]:
data.to_csv("./data_preprocessed.csv")

In [None]:
data_processed = pd.read_csv("data_preprocessed.csv")
print(data_processed[:10])

In [None]:
stock= data_processed['Symbol'].unique()[0]
plt.figure(figsize=(10,10))

plt.plot(data_processed[data_processed["Symbol"]==stock]["WR"],label="WR")
plt.plot(data_processed[data_processed["Symbol"]==stock]["SO"],label="So")
plt.plot(data_processed[data_processed["Symbol"]==stock]["RSI"],label="RSI")
plt.plot(data_processed[data_processed["Symbol"]==stock]["PROC"],label="PROC")

plt.plot(data_processed[data_processed["Symbol"]==stock]["Close"],label="Close")
plt.legend()