In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')

# Acquire Data with yfinance

## Data import

In [2]:
#Stocks
ticker_list = pd.read_csv('./stock_list/stock_all_20230510.csv')['ticker'].tolist()
print(f'Number of stocks:', len(ticker_list))
ticker_list = [ticker + '.JK' for ticker in ticker_list]
#Period
period = '1y'

Number of stocks: 869


In [3]:
import yfinance as yf
df = pd.DataFrame()
for ticker in ticker_list:
    stock = yf.Ticker(ticker)
    print(stock)
    stock_df = stock.history(period=period)
    stock_df['Ticker'] = ticker
    df = pd.concat([df, stock_df])
print(df.info())
print('Number of ticker data acquired: ', len(df['Ticker'].unique()))

yfinance.Ticker object <AALI.JK>
yfinance.Ticker object <ABBA.JK>
yfinance.Ticker object <ABDA.JK>
yfinance.Ticker object <ABMM.JK>
yfinance.Ticker object <ACES.JK>
yfinance.Ticker object <ACST.JK>
yfinance.Ticker object <ADCP.JK>
yfinance.Ticker object <ADES.JK>
yfinance.Ticker object <ADFO.JK>
ADFO.JK: No data found, symbol may be delisted
yfinance.Ticker object <ADHI.JK>
yfinance.Ticker object <ADMF.JK>
yfinance.Ticker object <ADMG.JK>
yfinance.Ticker object <ADMR.JK>
yfinance.Ticker object <ADRO.JK>
yfinance.Ticker object <AGAR.JK>
yfinance.Ticker object <AGII.JK>
yfinance.Ticker object <AGRO.JK>
yfinance.Ticker object <AGRS.JK>
yfinance.Ticker object <AHAP.JK>
yfinance.Ticker object <AIMS.JK>
yfinance.Ticker object <AISA.JK>
yfinance.Ticker object <AKKU.JK>
yfinance.Ticker object <AKPI.JK>
yfinance.Ticker object <AKRA.JK>
yfinance.Ticker object <AKSI.JK>
yfinance.Ticker object <ALDO.JK>
yfinance.Ticker object <ALKA.JK>
yfinance.Ticker object <ALMI.JK>
yfinance.Ticker object <ALTO.

In [4]:
df.index = pd.to_datetime(df.index, utc=True)
df.index

DatetimeIndex(['2022-05-16 17:00:00+00:00', '2022-05-17 17:00:00+00:00',
               '2022-05-18 17:00:00+00:00', '2022-05-19 17:00:00+00:00',
               '2022-05-22 17:00:00+00:00', '2022-05-23 17:00:00+00:00',
               '2022-05-24 17:00:00+00:00', '2022-05-26 17:00:00+00:00',
               '2022-05-29 17:00:00+00:00', '2022-05-30 17:00:00+00:00',
               ...
               '2023-05-03 17:00:00+00:00', '2023-05-04 17:00:00+00:00',
               '2023-05-07 17:00:00+00:00', '2023-05-08 17:00:00+00:00',
               '2023-05-09 17:00:00+00:00', '2023-05-10 17:00:00+00:00',
               '2023-05-11 17:00:00+00:00', '2023-05-14 17:00:00+00:00',
               '2023-05-15 17:00:00+00:00', '2023-05-16 17:00:00+00:00'],
              dtype='datetime64[ns, UTC]', name='Date', length=202407, freq=None)

## Add 'change' and 'range'

In [5]:
#change = close - open
df['change'] = df['Close']-df['Open']

#range = high - low
df['range'] = df['High']-df['Low']
df[['Ticker', 'change', 'range']]

Unnamed: 0_level_0,Ticker,change,range
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2022-05-16 17:00:00+00:00,AALI.JK,-142.670239,214.005358
2022-05-17 17:00:00+00:00,AALI.JK,-142.670241,237.783734
2022-05-18 17:00:00+00:00,AALI.JK,237.783743,523.124236
2022-05-19 17:00:00+00:00,AALI.JK,499.345837,642.016076
2022-05-22 17:00:00+00:00,AALI.JK,-784.686335,808.464709
...,...,...,...
2023-05-10 17:00:00+00:00,ZYRX.JK,4.000000,6.000000
2023-05-11 17:00:00+00:00,ZYRX.JK,-12.000000,12.000000
2023-05-14 17:00:00+00:00,ZYRX.JK,8.000000,12.000000
2023-05-15 17:00:00+00:00,ZYRX.JK,0.000000,10.000000


# Parameters for Technical Analysis

In [46]:
# Input MA Cross Window
window_short_cross = 9
window_long_cross = 26

# Input EMA Window
window_ema = 20

# Input MACD Window
window_short_macd = 12
window_long_macd = 26
window_sign_macd = 9

# MA Cross Calculation

In [47]:
import ta
def get_sma_cross(df, window_short, window_long):
    tickers = df['Ticker'].unique()
    sma_cross_df = pd.DataFrame()
    for ticker in tickers:
        ticker_df = df[df['Ticker'] == ticker]
        sma_short = ta.trend.sma_indicator(close=ticker_df['Close'], window=window_short, fillna=True)
        sma_long = ta.trend.sma_indicator(close=ticker_df['Close'], window=window_long, fillna=True)
        print(ticker, ' SMA Cross Calculated')
        sma_cross_df[ticker] = sma_short - sma_long
    return sma_cross_df

In [48]:
sma_cross_df = get_sma_cross(df, window_short_cross, window_long_cross)
print(sma_cross_df.tail(1))

AALI.JK  SMA Cross Calculated
ABBA.JK  SMA Cross Calculated
ABDA.JK  SMA Cross Calculated
ABMM.JK  SMA Cross Calculated
ACES.JK  SMA Cross Calculated
ACST.JK  SMA Cross Calculated
ADCP.JK  SMA Cross Calculated
ADES.JK  SMA Cross Calculated
ADHI.JK  SMA Cross Calculated
ADMF.JK  SMA Cross Calculated
ADMG.JK  SMA Cross Calculated
ADMR.JK  SMA Cross Calculated
ADRO.JK  SMA Cross Calculated
AGAR.JK  SMA Cross Calculated
AGII.JK  SMA Cross Calculated
AGRO.JK  SMA Cross Calculated
AGRS.JK  SMA Cross Calculated
AHAP.JK  SMA Cross Calculated
AIMS.JK  SMA Cross Calculated
AISA.JK  SMA Cross Calculated
AKKU.JK  SMA Cross Calculated
AKPI.JK  SMA Cross Calculated
AKRA.JK  SMA Cross Calculated
AKSI.JK  SMA Cross Calculated
ALDO.JK  SMA Cross Calculated
ALKA.JK  SMA Cross Calculated
ALMI.JK  SMA Cross Calculated
ALTO.JK  SMA Cross Calculated
AMAG.JK  SMA Cross Calculated
AMAN.JK  SMA Cross Calculated
AMAR.JK  SMA Cross Calculated
AMFG.JK  SMA Cross Calculated
AMIN.JK  SMA Cross Calculated
AMMS.JK  S

# EMA Calculation

In [49]:
import ta
def get_ema(df, window):
    tickers = df['Ticker'].unique()
    ema_df = pd.DataFrame()
    for ticker in tickers:
        ticker_df = df[df['Ticker'] == ticker]
        ema = ta.trend.EMAIndicator(close=ticker_df['Close'], window=window, fillna=True).ema_indicator()
        print(ticker, ' EMA Calculated')
        ema_df[ticker] = ema
    return ema_df

In [50]:
ema_df = get_ema(df, window_ema)
print(ema_df.tail())

AALI.JK  EMA Calculated
ABBA.JK  EMA Calculated
ABDA.JK  EMA Calculated
ABMM.JK  EMA Calculated
ACES.JK  EMA Calculated
ACST.JK  EMA Calculated
ADCP.JK  EMA Calculated
ADES.JK  EMA Calculated
ADHI.JK  EMA Calculated
ADMF.JK  EMA Calculated
ADMG.JK  EMA Calculated
ADMR.JK  EMA Calculated
ADRO.JK  EMA Calculated
AGAR.JK  EMA Calculated
AGII.JK  EMA Calculated
AGRO.JK  EMA Calculated
AGRS.JK  EMA Calculated
AHAP.JK  EMA Calculated
AIMS.JK  EMA Calculated
AISA.JK  EMA Calculated
AKKU.JK  EMA Calculated
AKPI.JK  EMA Calculated
AKRA.JK  EMA Calculated
AKSI.JK  EMA Calculated
ALDO.JK  EMA Calculated
ALKA.JK  EMA Calculated
ALMI.JK  EMA Calculated
ALTO.JK  EMA Calculated
AMAG.JK  EMA Calculated
AMAN.JK  EMA Calculated
AMAR.JK  EMA Calculated
AMFG.JK  EMA Calculated
AMIN.JK  EMA Calculated
AMMS.JK  EMA Calculated
AMOR.JK  EMA Calculated
AMRT.JK  EMA Calculated
ANDI.JK  EMA Calculated
ANJT.JK  EMA Calculated
ANTM.JK  EMA Calculated
APEX.JK  EMA Calculated
APIC.JK  EMA Calculated
APII.JK  EMA Cal

# MACD Calculation

In [51]:
def get_macd(df, window_short, window_long, window_sign):
    tickers = df['Ticker'].unique()
    macd_df = pd.DataFrame()
    for ticker in tickers:
        ticker_df = df[df['Ticker'] == ticker]
        macd = ta.trend.MACD(ticker_df['Close'], window_slow=window_long, window_fast=window_short, window_sign=window_sign).macd()
        macd_signal = ta.trend.MACD(ticker_df['Close'], window_slow=window_long, window_fast=window_short, window_sign=window_sign).macd_signal()
        macd_diff = ta.trend.MACD(ticker_df['Close'], window_slow=window_long, window_fast=window_short, window_sign=window_sign).macd_diff()
        print(ticker, ' MACD Calculated')
        macd_df[f'{ticker}_MACD_Line'] = macd
        macd_df[f'{ticker}_MACD_Signal'] = macd_signal
        macd_df[f'{ticker}_MACD_Diff'] = macd_diff
    return macd_df

In [52]:
macd_df = get_macd(df, window_short_macd, window_long_macd, window_sign_macd)
macd_df.tail(1)

AALI.JK  MACD Calculated
ABBA.JK  MACD Calculated
ABDA.JK  MACD Calculated
ABMM.JK  MACD Calculated
ACES.JK  MACD Calculated
ACST.JK  MACD Calculated
ADCP.JK  MACD Calculated
ADES.JK  MACD Calculated
ADHI.JK  MACD Calculated
ADMF.JK  MACD Calculated
ADMG.JK  MACD Calculated
ADMR.JK  MACD Calculated
ADRO.JK  MACD Calculated
AGAR.JK  MACD Calculated
AGII.JK  MACD Calculated
AGRO.JK  MACD Calculated
AGRS.JK  MACD Calculated
AHAP.JK  MACD Calculated
AIMS.JK  MACD Calculated
AISA.JK  MACD Calculated
AKKU.JK  MACD Calculated
AKPI.JK  MACD Calculated
AKRA.JK  MACD Calculated
AKSI.JK  MACD Calculated
ALDO.JK  MACD Calculated
ALKA.JK  MACD Calculated
ALMI.JK  MACD Calculated
ALTO.JK  MACD Calculated
AMAG.JK  MACD Calculated
AMAN.JK  MACD Calculated
AMAR.JK  MACD Calculated
AMFG.JK  MACD Calculated
AMIN.JK  MACD Calculated
AMMS.JK  MACD Calculated
AMOR.JK  MACD Calculated
AMRT.JK  MACD Calculated
ANDI.JK  MACD Calculated
ANJT.JK  MACD Calculated
ANTM.JK  MACD Calculated
APEX.JK  MACD Calculated


Unnamed: 0_level_0,AALI.JK_MACD_Line,AALI.JK_MACD_Signal,AALI.JK_MACD_Diff,ABBA.JK_MACD_Line,ABBA.JK_MACD_Signal,ABBA.JK_MACD_Diff,ABDA.JK_MACD_Line,ABDA.JK_MACD_Signal,ABDA.JK_MACD_Diff,ABMM.JK_MACD_Line,...,ZBRA.JK_MACD_Diff,ZINC.JK_MACD_Line,ZINC.JK_MACD_Signal,ZINC.JK_MACD_Diff,ZONE.JK_MACD_Line,ZONE.JK_MACD_Signal,ZONE.JK_MACD_Diff,ZYRX.JK_MACD_Line,ZYRX.JK_MACD_Signal,ZYRX.JK_MACD_Diff
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2023-05-16 17:00:00+00:00,-81.754019,-58.661677,-23.092343,-2.700418,-2.359527,-0.340891,,,,104.947305,...,-0.121483,-0.000716,-0.001052,0.000337,-19.478745,-16.204144,-3.274601,-1.687079,-1.872041,0.184962


In [53]:
# Set MACD Difference limit for indexing
macd_diff_low = -1.5
macd_diff_high = 1.5

# Get the last row of MACD DataFrame
last_row = macd_df.iloc[-1]

# Create a boolean mask for MACD Diff between macd_diff_low and macd_diff_high
tickers = df['Ticker'].unique()
mask = (last_row[[f'{ticker}_MACD_Diff' for ticker in tickers]] >= macd_diff_low) & (last_row[[f'{ticker}_MACD_Diff' for ticker in tickers]] <= macd_diff_high)

# Get the tickers and their corresponding MACD Diff values that satisfy the boolean mask
filtered_tickers_macd = last_row[[f'{ticker}_MACD_Diff' for ticker in tickers]][mask]

print(filtered_tickers_macd)

ABBA.JK_MACD_Diff   -0.340891
ACST.JK_MACD_Diff    0.034712
ADCP.JK_MACD_Diff   -0.030557
ADMG.JK_MACD_Diff    1.180016
AGII.JK_MACD_Diff    0.516844
                       ...   
YPAS.JK_MACD_Diff    0.944314
ZATA.JK_MACD_Diff   -0.322988
ZBRA.JK_MACD_Diff   -0.121483
ZINC.JK_MACD_Diff    0.000337
ZYRX.JK_MACD_Diff    0.184962
Name: 2023-05-16 17:00:00+00:00, Length: 431, dtype: float64


# RSI Calculation

In [54]:
import ta
def get_rsi(df):
    tickers = df['Ticker'].unique()
    rsi_df = pd.DataFrame()
    for ticker in tickers:
        ticker_df = df[df['Ticker'] == ticker]
        rsi = ta.momentum.RSIIndicator(ticker_df['Close']).rsi()
        print(ticker, ' RSI Calculated')
        rsi_df[ticker] = rsi
    return rsi_df

In [None]:
rsi_df = get_rsi(df)
print(rsi_df.info())

AALI.JK  RSI Calculated
ABBA.JK  RSI Calculated
ABDA.JK  RSI Calculated
ABMM.JK  RSI Calculated
ACES.JK  RSI Calculated
ACST.JK  RSI Calculated
ADCP.JK  RSI Calculated
ADES.JK  RSI Calculated
ADHI.JK  RSI Calculated
ADMF.JK  RSI Calculated
ADMG.JK  RSI Calculated
ADMR.JK  RSI Calculated
ADRO.JK  RSI Calculated


# Compilation of last close price and last technicals

Final dataframe 'last_df' containing last_close, last_ema, last_rsi, last_macd_diff, & delta_close_ema

In [None]:
last_close = df.groupby('Ticker').last()['Close']
last_ema = ema_df.tail(1).T.squeeze()
last_sma_cross = sma_cross_df.tail(1).T.squeeze()
last_rsi = rsi_df.tail(1).reset_index().drop('Date', axis=1).T.squeeze()
last_macd_diff = macd_df.tail(1).filter(regex='_MACD_Diff$').T.squeeze()
last_macd_diff.index = df['Ticker'].unique()

last_df = pd.DataFrame([last_close, last_sma_cross,  last_ema, last_rsi, last_macd_diff])
last_df.index = ['last_close', 'last_sma_cross', 'last_ema', 'last_rsi', 'last_macd_diff']
last_df = last_df.transpose()
last_df.head()

Add index information on 'last_df'

In [None]:
lq45 = pd.read_csv('./stock_list/lq45.csv')['Ticker'].tolist()
lq45 = [ticker + '.JK' for ticker in lq45]

idx30 = pd.read_csv('./stock_list/idx30.csv')['Ticker'].tolist()
idx30 = [ticker + '.JK' for ticker in idx30]

idx80 = pd.read_csv('./stock_list/idx80.csv')['Ticker'].tolist()
idx80 = [ticker + '.JK' for ticker in idx80]

bisnis27 = pd.read_csv('./stock_list/bisnis27.csv')['Ticker'].tolist()
bisnis27 = [ticker + '.JK' for ticker in bisnis27]

mnc36 = pd.read_csv('./stock_list/mnc36.csv')['Ticker'].tolist()
mnc36 = [ticker + '.JK' for ticker in mnc36]

pefindo25 = pd.read_csv('./stock_list/pefindo25.csv')['Ticker'].tolist()
pefindo25 = [ticker + '.JK' for ticker in pefindo25]

kompas100 = pd.read_csv('./stock_list/kompas100.csv')['Ticker'].tolist()
kompas100 = [ticker + '.JK' for ticker in kompas100]

print('Index list imported')

In [None]:
last_df['kompas100'] = last_df.index.isin(kompas100)
last_df['lq45'] = last_df.index.isin(lq45)
last_df['idx30'] = last_df.index.isin(idx30)
last_df['idx80'] = last_df.index.isin(idx80)
last_df['bisnis27'] = last_df.index.isin(bisnis27)
last_df['mnc36'] = last_df.index.isin(mnc36)
last_df['pefindo25'] = last_df.index.isin(pefindo25)
last_df.head()

# Recommended Stocks (Technical Criteria)

In [None]:
#Mask 0: Index-related filter
mask0 = last_df['kompas100'] == True

#Mask 1: SMA Cross related filter --> SMA cross between x1 and x2
mask1 = last_df['last_sma_cross'].between(-5,5)

#Mask 2: EMA-related filter
mask2 = (last_df['last_close']/last_df['last_ema']).between(1, 1.1)

#Mask 3: MACD-related filter --> MACD difference between x1 and x2
mask3 = last_df['last_macd_diff'].between(-1,1)

#Mask 4: RSI-related filter --> RSI below x
mask4 = last_df['last_rsi'] < 60

rec_df = last_df[mask0][mask2][mask4]
print('Number of recommended stocks: ', len(rec_df), '\n')
rec_df

# Pivot Points

In [None]:
# Calculate Pivot Points
def calculate_pivot_points(df):
    # Calculate Pivot Point (PP)
    df['PP'] = (df['High'] + df['Low'] + df['Close']) / 3

    # Calculate Support and Resistance Levels
    df['S1'] = (2 * df['PP']) - df['High']
    df['S2'] = df['PP'] - (df['High'] - df['Low'])
    df['S3'] = df['Low'] - 2 * (df['High'] - df['PP'])

    df['R1'] = (2 * df['PP']) - df['Low']
    df['R2'] = df['PP'] + (df['High'] - df['Low'])
    df['R3'] = df['High'] + 2 * (df['PP'] - df['Low'])

    return df[['Ticker','Open', 'High', 'Low', 'Close', 'Volume', 'PP', 'S1', 'S2', 'S3', 'R1', 'R2', 'R3']]

In [None]:
pivot_points = calculate_pivot_points(df)
last_pivot_points = pivot_points.groupby('Ticker').last()

In [None]:
last_pivot_points.loc[rec_df.index]

# Logistic Regression Classification Model

Using coefficients and intercepts from stock_list_classification_technical.py

In [None]:
chosen_ticker_list = last_df.index.tolist()

In [None]:
# Check feature parameters
current_feature_parameters = {
    'window_short_cross': window_short_cross,
    'window_long_cross': window_long_cross,
    'window_ema': window_ema,
    'window_short_macd': window_short_macd,
    'window_long_macd': window_long_macd,
    'window_sign_macd': window_sign_macd
}
current_feature_parameters = pd.DataFrame.from_dict(current_feature_parameters, orient='index', columns=['Value'])
model_feature_parameters = pd.read_csv('./lr_model/feature_parameters.csv', index_col=0)

# Prepare dataframe
lr_df = pd.DataFrame(columns = ['Ticker', 'Accuracy', 'increase_proba'])

# Apply model to predict probabilities if feature parameters match
if current_feature_parameters.equals(model_feature_parameters) == True:
    print('Feature parameters match!', '\n')
    lr = pd.read_csv('./lr_model/lr_model.csv').set_index('Ticker')
    for chosen_ticker in chosen_ticker_list:
        # Check if chosen ticker model is available
        if chosen_ticker in lr.index:
            # Show model accuracy
            #print(chosen_ticker, ' model accuracy:', lr.loc[chosen_ticker]['Accuracy'])

            # Extract coef and intercept
            import re
            string = lr[lr.index == chosen_ticker]['coef'][0]
            coef = [float(x) for x in string.strip('[]').split()]
            intercept = lr[lr.index == chosen_ticker]['intercept'][0]

            # Feature generation
            feature = [ema_df[chosen_ticker].pct_change().tail(1).values[0],
                       sma_cross_df[chosen_ticker].pct_change().tail(1).values[0],
                   macd_df[chosen_ticker + '_MACD_Line'].pct_change().tail(1).values[0],
                   macd_df[chosen_ticker + '_MACD_Signal'].pct_change().tail(1).values[0],
                   macd_df[chosen_ticker + '_MACD_Diff'].pct_change().tail(1).values[0],
                   rsi_df[chosen_ticker].pct_change().tail(1).values[0]]
            
            # define sigmoid function
            import numpy as np
            def sigmoid(z):
                return 1 / (1 + np.exp(-z))

            # compute linear combination of features and coefficients, add intercept
            z = np.dot(feature, coef) + intercept

            # apply sigmoid function to get predicted probability of target class being positive
            y_pred_proba = sigmoid(z)

            # apply threshold to predicted probabilities to get predicted class labels (0 or 1)
            threshold = 0.5  # you can adjust this threshold depending on your needs
            y_pred = (y_pred_proba >= threshold).astype(int)
            #print(f'{chosen_ticker} price increase probability: ', y_pred_proba, '\n')

            # append to dataframe
            lr_df = lr_df.append({'Ticker':chosen_ticker, 'Accuracy':lr.loc[chosen_ticker]['Accuracy'], 'increase_proba':y_pred_proba}, ignore_index = True)
        else:
            print(f'{chosen_ticker} model is not available.')
else:
    print('Model parameters do not match')
lr_df = lr_df.set_index('Ticker')

In [None]:
lr_df['kompas100'] = lr_df.index.isin(kompas100)
lr_df['lq45'] = lr_df.index.isin(lq45)
lr_df['idx30'] = lr_df.index.isin(idx30)
lr_df['idx80'] = lr_df.index.isin(idx80)
lr_df['bisnis27'] = lr_df.index.isin(bisnis27)
lr_df['mnc36'] = lr_df.index.isin(mnc36)
lr_df['pefindo25'] = lr_df.index.isin(pefindo25)
lr_df.head()

# Recommended Stocks (Classification Model)

In [None]:
#Mask 0: Index-related filter
mask0 = lr_df['kompas100'] == True

#Mask 1: Model accuracy filter
mask1 = lr_df['Accuracy'].between(0.5,0.9)

#Mask 2: Price increase probability filter
mask2 = lr_df['increase_proba'] > 0.5

print('Number of recommended stocks: ', len(lr_df[mask0][mask1][mask2]), '\n')
lr_df[mask0][mask1][mask2]

# Additional
For customized analysis