In [5]:
import pandas as pd
import numpy as np
from pandas import DataFrame # specify datatype while passing df object

In [6]:
df=pd.read_csv('WMT.csv')

In [17]:
import pandas as pd
import numpy as np
from pandas import DataFrame

def rsi(df):
    Difference = pd.Series(df['Close'].diff())
    Gain = pd.Series(np.where(Difference > 0, Difference, 0))
    Loss = pd.Series(np.where(Difference < 0, -Difference, 0))
    for period in range(6, 26):
        AvgGain = pd.Series(Gain.rolling(window=period).mean())
        AvgLoss = pd.Series(Loss.rolling(window=period).mean())
        RS = pd.Series(AvgGain / AvgLoss)
        df[f'RSI_{period}'] = pd.Series(100 - 100 / (1 + RS))
    return df
    
def cmo(df):
    Difference = pd.Series(df["Close"] - df["Close"].shift(1))
    Sum = pd.Series(Difference.abs())
    for period in range(6, 21):       
        diff = Difference.rolling(window=period).sum() 
        sum_ = Sum.rolling(window=period).sum()
        df[f'cmo_{period}'] = (diff / sum_) * 100 
    return df
    
def williams(df):
    for period in range(6, 21):
        lowest_low = pd.Series(df["Low"].rolling(window=period).min())
        highest_high = pd.Series(df["High"].rolling(window=period).max())  
        df[f'Williams_{period}'] = pd.Series(((highest_high - df['Close']) / (highest_high - lowest_low)) * -100)
    return df
    
def sma(df: DataFrame):
    for period in range(6, 31):
        col_name = f'sma_{period}'
        df[col_name] = df["Adj Close"].rolling(window=period).mean()
    return df 
    
def wma(df: DataFrame):
    for period in range(6, 21):
        col_name = f'wma_{period}'
        weights = np.arange(1, period+1)
        denominator = np.sum(weights)
        df[col_name] = df["Adj Close"].rolling(window=period).apply(
                        lambda x: np.sum(weights*x)/denominator)
    return df
    
def ppo(df):
    for period in range(6, 21):
        ema_lower = df["Adj Close"].ewm(span=period).mean()
        ema_higher = df["Adj Close"].ewm(span=period + 14).mean()
        df[f'ppo_{period}'] = (ema_lower - ema_higher) * 100 / ema_higher
        df[f'ppo_signal_{period}'] = df[f'ppo_{period}'].ewm(span=period - 3).mean()
    return df
    
def ema(df: DataFrame):
    for period in range(6, 21):
        col_name = f'ema_{period}'
        df[col_name] = df["Adj Close"].ewm(span=period, adjust=True).mean()
    return df
    
def roc(df):
    for n in range(6, 21):
        col_name = f'roc_{n}'
        D = df['Adj Close'].diff(n)
        N = df['Adj Close'].shift(n)
        df[col_name] = (D / N) * 100
    return df
    
def cmfi(df):
    MFM = ((df['Close'] - df['Low']) - (df['High'] - df['Close'])) / (df['High'] - df['Low'])      #Calculating Multiplier
    MFV = MFM * df['Volume']           
    for period in range(6, 21):
        df[f'cmfi_{period}'] = (MFV.rolling(window=period).sum()) / (df['Volume'].rolling(window=period).sum())
    return df

def hma(df):
    for length in range(6, 21):
        col_name = f'hma_{length}'
        half_length = length // 2
        k = int(np.sqrt(length))
        half_weights = np.arange(1, half_length+1)
        weights = np.arange(1, length+1)
        half_wma = df['Adj Close'].rolling(half_length).apply(lambda x: np.dot(x, half_weights) / half_weights.sum(), raw=True)
        full_wma = df['Adj Close'].rolling(length).apply(lambda x: np.dot(x, weights) / weights.sum(), raw=True)
        diff = (2 * half_wma) - full_wma
        w = np.arange(1, k+1)
        df[col_name] = diff.rolling(k).apply(lambda x: np.dot(x, w) / w.sum(), raw=True)
    return df
    
def tripleema(df):
    for period in range(6, 21):
        ema1 = df['Adj Close'].ewm(span=period).mean()
        ema2 = ema1.ewm(span=period).mean()
        ema3 = ema2.ewm(span=period).mean()
        df[f'tripleema_{period}'] = ((3 * ema1) - (3 * ema2) + ema3)
    return df
    
def psi(df):        #Psychology Line Index Indicator
    movement = pd.Series(np.where(df['Close'] - df['Close'].shift(1) > 0, 'UP', None))
    for period in range(6, 21):
        df[f'psi_{period}'] = ((movement.rolling(window=period).count()) / period) * 100
    return df
    
def cci(df): 
    TP = (df['High'] + df['Low'] + df['Close']) / 3 
    for period in range(6, 21):
        df[f'cci_{period}'] = pd.Series((TP - TP.rolling(window=period).mean()) / (0.015 * TP.rolling(window=period).std()))
    return df


In [18]:
def label_data(df):
  """
  Research Paper Algorithm's of Labelling Method is simply telling us to take Window Size of 11 period and check,
  whether the middle number is maximum in the window, then Label it as "Sell", or
  if the middle number is minimum in the window, then Label it as "Buy",
  else Label it as "Hold", and then Slide the Window by 1 Period
  """
  """
  Parameter : df ==> Dataframe with OHLCV data.
  Returns : df ==> Dataframe with 1 added column of "Label".
  """
  df['Label'] = ''
  WindowSize = 11
  counter_row = 0

  while counter_row < len(df):
    if counter_row >= WindowSize:
      
      #Creating the Window
      WindowBeginIndex = counter_row - WindowSize             
      WindowEndIndex = WindowBeginIndex + WindowSize - 1
      WindowMidIndex = (WindowBeginIndex + WindowEndIndex)/2
      min = np.inf
      max = 0
      min_index = -1
      max_index = -1
      
      #Finding Maximum and minimum in the Window 
      for i in range (WindowBeginIndex, WindowEndIndex +1):   
        number = df['Close'][i]
        if number < min:
          min = number
          min_index = i
        if number > max:
          max = number
          max_index = i
      
      #Checking if the Middle number of Window is Max or Min and classifying them as "SELL","BUY","HOLD"
      if max_index == WindowMidIndex:                         
        df['Label'][WindowMidIndex] = 0       #SELL
      elif min_index == WindowMidIndex:
        df['Label'][WindowMidIndex] = 1       #BUY
      else :
        df['Label'][WindowMidIndex] = 2       #HOLD
    counter_row = counter_row + 1
  return df

In [19]:
def normalizing(df):
  list_features = df.iloc[:, 8:233].columns
  min_max_scaler = preprocessing.MinMaxScaler()
  df[list_features] = min_max_scaler.fit_transform(df[list_features])
  return df

In [21]:
df1=rsi(df)

In [22]:
df1

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,RSI_6,RSI_7,RSI_8,...,RSI_16,RSI_17,RSI_18,RSI_19,RSI_20,RSI_21,RSI_22,RSI_23,RSI_24,RSI_25
0,2002-01-24,59.860001,59.950001,59.299999,59.669998,41.282154,7984100,,,,...,,,,,,,,,,
1,2002-01-25,59.360001,59.480000,58.060001,58.400002,40.403519,7322100,,,,...,,,,,,,,,,
2,2002-01-28,58.700001,59.180000,58.320000,58.630001,40.562641,5142700,,,,...,,,,,,,,,,
3,2002-01-29,59.080002,59.480000,57.910000,57.910000,40.064510,7373900,,,,...,,,,,,,,,,
4,2002-01-30,57.799999,59.950001,57.750000,59.750000,41.337513,10006100,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4526,2020-01-16,115.500000,115.930000,115.279999,115.900002,114.884079,5369400,47.111094,43.265350,35.392335,...,31.990830,27.939728,29.429154,30.924881,27.203420,31.499217,32.838168,35.510517,38.991491,38.607611
4527,2020-01-17,116.050003,116.300003,114.639999,114.959999,113.952309,10045000,21.698183,38.970565,36.301390,...,25.136620,28.868147,25.528030,26.936950,28.356908,25.196256,29.302679,30.589956,33.169706,36.551272
4528,2020-01-21,114.589996,115.690002,114.519997,115.589996,114.576790,7353700,39.845764,31.827505,45.304718,...,29.887428,29.959082,33.235868,29.600692,30.861031,32.133887,28.721020,32.459240,33.635088,35.997299
4529,2020-01-22,115.220001,116.529999,115.180000,116.099998,115.082321,4926100,52.820519,46.818200,38.289963,...,32.908946,33.365785,33.430509,36.397409,32.585210,33.741830,34.911721,31.340069,34.815321,35.911229
