In [2]:
# Import libraries
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import plot_roc_curve
from sklearn.metrics import accuracy_score, classification_report

price_data = pd.read_csv('price_data_pre_indicator.csv')
price_data

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price
0,BSESN,2003-07-14,3720.750000,3726.560059,3704.629883,3704.629883,21200.0,
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912
2,BSESN,2003-07-16,3721.649902,3729.020020,3662.699951,3702.139893,18400.0,35.309814
3,BSESN,2003-07-17,3668.909912,3750.000000,3660.340088,3728.840088,22000.0,-52.739990
4,BSESN,2003-07-18,3647.580078,3684.229980,3625.729980,3651.389893,22000.0,-21.329834
...,...,...,...,...,...,...,...,...
4255,BSESN,2020-11-03,40261.128906,40354.730469,39952.789063,39990.750000,21400.0,503.550781
4256,BSESN,2020-11-04,40616.140625,40693.511719,40076.468750,40171.710938,20900.0,355.011719
4257,BSESN,2020-11-05,41340.160156,41370.910156,41030.171875,41112.121094,42600.0,724.019531
4258,BSESN,2020-11-06,41893.058594,41954.929688,41383.289063,41438.761719,19000.0,552.898438


In [3]:
# Calculate the 14 day RSI
n = 14

# First make a copy of the data frame twice
up_df, down_df = price_data[['symbol','change_in_price']].copy(), price_data[['symbol','change_in_price']].copy()

# For up days, if the change is less than 0 set to 0.
up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0

# For down days, if the change is greater than 0 set to 0.
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

# We need change in price to be absolute.
down_df['change_in_price'] = down_df['change_in_price'].abs()

# Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
ewma_up = up_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df.groupby('symbol')['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

# Calculate the Relative Strength
relative_strength = ewma_up / ewma_down

# Calculate the Relative Strength Index
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))

# Add the info to the data frame.
price_data['down_days'] = down_df['change_in_price']
price_data['up_days'] = up_df['change_in_price']
price_data['RSI'] = relative_strength_index

# Display the head.
price_data.head(30)

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI
0,BSESN,2003-07-14,3720.75,3726.560059,3704.629883,3704.629883,21200.0,,,,
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.0,0.0
2,BSESN,2003-07-16,3721.649902,3729.02002,3662.699951,3702.139893,18400.0,35.309814,0.0,35.309814,54.212916
3,BSESN,2003-07-17,3668.909912,3750.0,3660.340088,3728.840088,22000.0,-52.73999,52.73999,0.0,28.026871
4,BSESN,2003-07-18,3647.580078,3684.22998,3625.72998,3651.389893,22000.0,-21.329834,21.329834,0.0,22.871527
5,BSESN,2003-07-21,3569.580078,3664.639893,3564.409912,3655.26001,19400.0,-78.0,78.0,0.0,12.877123
6,BSESN,2003-07-22,3554.129883,3582.080078,3534.060059,3557.129883,22400.0,-15.450195,15.450195,0.0,11.707825
7,BSESN,2003-07-23,3577.889893,3596.590088,3551.97998,3571.75,31800.0,23.76001,0.0,23.76001,23.9599
8,BSESN,2003-07-24,3668.070068,3677.23999,3572.639893,3595.600098,44400.0,90.180175,0.0,90.180175,52.70299
9,BSESN,2003-07-25,3726.459961,3737.76001,3661.689941,3685.310059,25800.0,58.389893,0.0,58.389893,63.118375


In [4]:
#Calculating the stochastic indicator
n = 14

# Make a copy of the high and low column.
low_14, high_14 = price_data[['symbol','low']].copy(), price_data[['symbol','high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate the Stochastic Oscillator.
k_percent = 100 * ((price_data['close'] - low_14) / (high_14 - low_14))

# Add the info to the data frame.
price_data['low_14'] = low_14
price_data['high_14'] = high_14
price_data['k_percent'] = k_percent

# Display the head.
price_data.head(30)

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent
0,BSESN,2003-07-14,3720.75,3726.560059,3704.629883,3704.629883,21200.0,,,,,,,
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.0,0.0,,,
2,BSESN,2003-07-16,3721.649902,3729.02002,3662.699951,3702.139893,18400.0,35.309814,0.0,35.309814,54.212916,,,
3,BSESN,2003-07-17,3668.909912,3750.0,3660.340088,3728.840088,22000.0,-52.73999,52.73999,0.0,28.026871,,,
4,BSESN,2003-07-18,3647.580078,3684.22998,3625.72998,3651.389893,22000.0,-21.329834,21.329834,0.0,22.871527,,,
5,BSESN,2003-07-21,3569.580078,3664.639893,3564.409912,3655.26001,19400.0,-78.0,78.0,0.0,12.877123,,,
6,BSESN,2003-07-22,3554.129883,3582.080078,3534.060059,3557.129883,22400.0,-15.450195,15.450195,0.0,11.707825,,,
7,BSESN,2003-07-23,3577.889893,3596.590088,3551.97998,3571.75,31800.0,23.76001,0.0,23.76001,23.9599,,,
8,BSESN,2003-07-24,3668.070068,3677.23999,3572.639893,3595.600098,44400.0,90.180175,0.0,90.180175,52.70299,,,
9,BSESN,2003-07-25,3726.459961,3737.76001,3661.689941,3685.310059,25800.0,58.389893,0.0,58.389893,63.118375,,,


In [5]:
# Calculate the Williams %R
n = 14

# Make a copy of the high and low column.
low_14, high_14 = price_data[['symbol','low']].copy(), price_data[['symbol','high']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14.groupby('symbol')['low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14.groupby('symbol')['high'].transform(lambda x: x.rolling(window = n).max())

# Calculate William %R indicator.
r_percent = ((high_14 - price_data['close']) / (high_14 - low_14)) * - 100

# Add the info to the data frame.
price_data['r_percent'] = r_percent

# Display the head.
price_data.head(30)

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent
0,BSESN,2003-07-14,3720.75,3726.560059,3704.629883,3704.629883,21200.0,,,,,,,,
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.0,0.0,,,,
2,BSESN,2003-07-16,3721.649902,3729.02002,3662.699951,3702.139893,18400.0,35.309814,0.0,35.309814,54.212916,,,,
3,BSESN,2003-07-17,3668.909912,3750.0,3660.340088,3728.840088,22000.0,-52.73999,52.73999,0.0,28.026871,,,,
4,BSESN,2003-07-18,3647.580078,3684.22998,3625.72998,3651.389893,22000.0,-21.329834,21.329834,0.0,22.871527,,,,
5,BSESN,2003-07-21,3569.580078,3664.639893,3564.409912,3655.26001,19400.0,-78.0,78.0,0.0,12.877123,,,,
6,BSESN,2003-07-22,3554.129883,3582.080078,3534.060059,3557.129883,22400.0,-15.450195,15.450195,0.0,11.707825,,,,
7,BSESN,2003-07-23,3577.889893,3596.590088,3551.97998,3571.75,31800.0,23.76001,0.0,23.76001,23.9599,,,,
8,BSESN,2003-07-24,3668.070068,3677.23999,3572.639893,3595.600098,44400.0,90.180175,0.0,90.180175,52.70299,,,,
9,BSESN,2003-07-25,3726.459961,3737.76001,3661.689941,3685.310059,25800.0,58.389893,0.0,58.389893,63.118375,,,,


In [6]:
# Calculate the MACD
ema_26 = price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = price_data.groupby('symbol')['close'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26

# Calculate the EMA
ema_9_macd = macd.ewm(span = 9).mean()

# Store the data in the data frame.
price_data['MACD'] = macd
price_data['MACD_EMA'] = ema_9_macd

# Print the head.
price_data

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA
0,BSESN,2003-07-14,3720.750000,3726.560059,3704.629883,3704.629883,21200.0,,,,,,,,,0.000000,0.000000
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.000000,0.000000,,,,,-0.772017,-0.428898
2,BSESN,2003-07-16,3721.649902,3729.020020,3662.699951,3702.139893,18400.0,35.309814,0.000000,35.309814,54.212916,,,,,0.111252,-0.207525
3,BSESN,2003-07-17,3668.909912,3750.000000,3660.340088,3728.840088,22000.0,-52.739990,52.739990,0.000000,28.026871,,,,,-1.393904,-0.609415
4,BSESN,2003-07-18,3647.580078,3684.229980,3625.729980,3651.389893,22000.0,-21.329834,21.329834,0.000000,22.871527,,,,,-3.031201,-1.329841
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4255,BSESN,2020-11-03,40261.128906,40354.730469,39952.789063,39990.750000,21400.0,503.550781,0.000000,503.550781,55.461668,39241.871094,41048.050781,56.431695,-43.568305,251.256095,370.293241
4256,BSESN,2020-11-04,40616.140625,40693.511719,40076.468750,40171.710938,20900.0,355.011719,0.000000,355.011719,62.076880,39241.871094,40976.019531,79.247514,-20.752486,277.246758,351.683945
4257,BSESN,2020-11-05,41340.160156,41370.910156,41030.171875,41112.121094,42600.0,724.019531,0.000000,724.019531,71.898697,39241.871094,41370.910156,98.555686,-1.444314,352.206850,351.788526
4258,BSESN,2020-11-06,41893.058594,41954.929688,41383.289063,41438.761719,19000.0,552.898438,0.000000,552.898438,77.120077,39241.871094,41954.929688,97.719508,-2.280492,451.028372,371.636495


In [7]:
# Calculate the Price Rate of Change
n = 9

# Calculate the Rate of Change in the Price, and store it in the Data Frame.
price_data['Price_Rate_Of_Change'] = price_data.groupby('symbol')['close'].transform(lambda x: x.pct_change(periods = n))

# Print the first 30 rows
price_data

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change
0,BSESN,2003-07-14,3720.750000,3726.560059,3704.629883,3704.629883,21200.0,,,,,,,,,0.000000,0.000000,
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.000000,0.000000,,,,,-0.772017,-0.428898,
2,BSESN,2003-07-16,3721.649902,3729.020020,3662.699951,3702.139893,18400.0,35.309814,0.000000,35.309814,54.212916,,,,,0.111252,-0.207525,
3,BSESN,2003-07-17,3668.909912,3750.000000,3660.340088,3728.840088,22000.0,-52.739990,52.739990,0.000000,28.026871,,,,,-1.393904,-0.609415,
4,BSESN,2003-07-18,3647.580078,3684.229980,3625.729980,3651.389893,22000.0,-21.329834,21.329834,0.000000,22.871527,,,,,-3.031201,-1.329841,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4255,BSESN,2020-11-03,40261.128906,40354.730469,39952.789063,39990.750000,21400.0,503.550781,0.000000,503.550781,55.461668,39241.871094,41048.050781,56.431695,-43.568305,251.256095,370.293241,-0.010961
4256,BSESN,2020-11-04,40616.140625,40693.511719,40076.468750,40171.710938,20900.0,355.011719,0.000000,355.011719,62.076880,39241.871094,40976.019531,79.247514,-20.752486,277.246758,351.683945,0.001421
4257,BSESN,2020-11-05,41340.160156,41370.910156,41030.171875,41112.121094,42600.0,724.019531,0.000000,724.019531,71.898697,39241.871094,41370.910156,98.555686,-1.444314,352.206850,351.788526,0.016091
4258,BSESN,2020-11-06,41893.058594,41954.929688,41383.289063,41438.761719,19000.0,552.898438,0.000000,552.898438,77.120077,39241.871094,41954.929688,97.719508,-2.280492,451.028372,371.636495,0.043531


In [25]:
#fucntion for calculating the On Balance Volume indicator
def obv(group):
    volume = group['volume']
    change = group['close'].diff()
    prev_obv = 0
    obv_values = []
    for i, j in zip(change, volume):

        if i > 0:
            current_obv = prev_obv + j
        elif i < 0:
            current_obv = prev_obv - j
        else:
            current_obv = prev_obv

        # OBV.append(current_OBV)
        prev_obv = current_obv
        obv_values.append(current_obv)
    
    return pd.Series(obv_values, index = group.index)

# apply the function to each group
obv_groups = price_data.groupby('symbol').apply(obv)
obv_groups = obv_groups.transpose()
price_data['On Balance Volume'] = obv_groups.reset_index(level=0, drop=True)

#price_data['On Balance Volume'] = obv_groups

In [26]:
price_data

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change,On Balance Volume
0,BSESN,2003-07-14,3720.750000,3726.560059,3704.629883,3704.629883,21200.0,,,,,,,,,0.000000,0.000000,,0.0
1,BSESN,2003-07-15,3686.340088,3733.879883,3657.179932,3719.169922,22800.0,-34.409912,34.409912,0.000000,0.000000,,,,,-0.772017,-0.428898,,-22800.0
2,BSESN,2003-07-16,3721.649902,3729.020020,3662.699951,3702.139893,18400.0,35.309814,0.000000,35.309814,54.212916,,,,,0.111252,-0.207525,,-4400.0
3,BSESN,2003-07-17,3668.909912,3750.000000,3660.340088,3728.840088,22000.0,-52.739990,52.739990,0.000000,28.026871,,,,,-1.393904,-0.609415,,-26400.0
4,BSESN,2003-07-18,3647.580078,3684.229980,3625.729980,3651.389893,22000.0,-21.329834,21.329834,0.000000,22.871527,,,,,-3.031201,-1.329841,,-48400.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4255,BSESN,2020-11-03,40261.128906,40354.730469,39952.789063,39990.750000,21400.0,503.550781,0.000000,503.550781,55.461668,39241.871094,41048.050781,56.431695,-43.568305,251.256095,370.293241,-0.010961,63790400.0
4256,BSESN,2020-11-04,40616.140625,40693.511719,40076.468750,40171.710938,20900.0,355.011719,0.000000,355.011719,62.076880,39241.871094,40976.019531,79.247514,-20.752486,277.246758,351.683945,0.001421,63811300.0
4257,BSESN,2020-11-05,41340.160156,41370.910156,41030.171875,41112.121094,42600.0,724.019531,0.000000,724.019531,71.898697,39241.871094,41370.910156,98.555686,-1.444314,352.206850,351.788526,0.016091,63853900.0
4258,BSESN,2020-11-06,41893.058594,41954.929688,41383.289063,41438.761719,19000.0,552.898438,0.000000,552.898438,77.120077,39241.871094,41954.929688,97.719508,-2.280492,451.028372,371.636495,0.043531,63872900.0


In [27]:
result = price_data.to_csv("price_data_indicators")
print(result)

None
