In [260]:
import pandas as pd
import yfinance as yf

Input ticker code and target.
For target, 'up' means the price increase/decrease for the same day as the technical indicator while 'next_up' does for the next day.

In [261]:
# Input ticker code
ticker = 'ZYRX.JK'

# Select 'up' or 'next_up' as target
target_col = 'next_up'

# Data Acquisition (yfinance)

In [262]:
obj = yf.Ticker(ticker)
print(obj)
df = obj.history(period='5y')
df.head()

yfinance.Ticker object <ZYRX.JK>


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-03-30 00:00:00+07:00,306.78894,306.78894,306.78894,306.78894,1596900,0.0,0.0
2021-03-31 00:00:00+07:00,383.486176,383.486176,383.486176,383.486176,14900,0.0,0.0
2021-04-01 00:00:00+07:00,477.882782,477.882782,440.517462,477.882782,226436800,0.0,0.0
2021-04-05 00:00:00+07:00,496.565428,550.646811,452.317023,489.682343,167673500,0.0,0.0
2021-04-06 00:00:00+07:00,489.682339,565.396275,464.116594,535.897339,109912200,0.0,0.0


# Feature Calculation (Technical Indicators)

In [263]:
import ta
ema = ta.trend.EMAIndicator(close=df['Close'], window=9).ema_indicator()
macd_line = ta.trend.MACD(df['Close']).macd()
macd_signal = ta.trend.MACD(df['Close']).macd_signal()
macd_diff = ta.trend.MACD(df['Close']).macd_diff()
rsi = ta.momentum.RSIIndicator(df['Close']).rsi()
feature = pd.concat([sma, macd_line, macd_signal, macd_diff, rsi], axis=1)
feature = feature.pct_change().dropna()
feature

Unnamed: 0_level_0,sma_9,MACD_12_26,MACD_sign_12_26,MACD_diff_12_26,rsi
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2021-05-21 00:00:00+07:00,0.032036,0.032898,0.144619,-0.134295,-0.006607
2021-05-24 00:00:00+07:00,0.014026,-0.046486,0.087082,-0.310772,0.062615
2021-05-25 00:00:00+07:00,0.014512,-0.049592,0.050988,-0.363489,0.000000
2021-05-27 00:00:00+07:00,0.004470,-0.023882,0.033109,-0.317556,-0.023038
2021-05-28 00:00:00+07:00,0.000000,-0.253591,-0.031582,-1.985467,0.197302
...,...,...,...,...,...
2023-05-08 00:00:00+07:00,0.006571,-0.092655,-0.041063,0.322861,-0.058813
2023-05-09 00:00:00+07:00,0.005341,-0.091402,-0.049406,0.153778,0.000000
2023-05-10 00:00:00+07:00,-0.002952,0.003404,-0.041040,-0.210377,-0.063854
2023-05-11 00:00:00+07:00,-0.001776,-0.195333,-0.066616,0.556586,0.151553


In [264]:
feature.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 485 entries, 2021-05-21 00:00:00+07:00 to 2023-05-12 00:00:00+07:00
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   sma_9            485 non-null    float64
 1   MACD_12_26       485 non-null    float64
 2   MACD_sign_12_26  485 non-null    float64
 3   MACD_diff_12_26  485 non-null    float64
 4   rsi              485 non-null    float64
dtypes: float64(5)
memory usage: 22.7 KB


# Target / Label

In [265]:
df['up'] = False  # initialize 'up' column with 0
df.loc[df['Close'] > df['Close'].shift(), 'up'] = True
df['next_up'] = df['up'].shift(-1)
df[['Close', 'up', 'next_up']]

Unnamed: 0_level_0,Close,up,next_up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2021-03-30 00:00:00+07:00,306.788940,False,True
2021-03-31 00:00:00+07:00,383.486176,True,True
2021-04-01 00:00:00+07:00,477.882782,True,True
2021-04-05 00:00:00+07:00,489.682343,True,True
2021-04-06 00:00:00+07:00,535.897339,True,True
...,...,...,...
2023-05-08 00:00:00+07:00,298.000000,False,False
2023-05-09 00:00:00+07:00,298.000000,False,False
2023-05-10 00:00:00+07:00,296.000000,False,True
2023-05-11 00:00:00+07:00,300.000000,True,False


In [266]:
# up or next_up as target?
target = df[df.index.isin(feature.index) == True][target_col].dropna().astype('bool')
target.info()

<class 'pandas.core.series.Series'>
DatetimeIndex: 484 entries, 2021-05-21 00:00:00+07:00 to 2023-05-11 00:00:00+07:00
Series name: next_up
Non-Null Count  Dtype
--------------  -----
484 non-null    bool 
dtypes: bool(1)
memory usage: 4.3 KB


# Align Data Length Between Feature and Target

In [267]:
feature = feature[feature.index.isin(target.index) == True]
feature.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 484 entries, 2021-05-21 00:00:00+07:00 to 2023-05-11 00:00:00+07:00
Data columns (total 5 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   sma_9            484 non-null    float64
 1   MACD_12_26       484 non-null    float64
 2   MACD_sign_12_26  484 non-null    float64
 3   MACD_diff_12_26  484 non-null    float64
 4   rsi              484 non-null    float64
dtypes: float64(5)
memory usage: 22.7 KB


# Classification: Logistic Regression

In [268]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets
test_size = int(0.2 * len(target))
print(test_size)

# Split the data into training and testing sets
X_train, y_train = feature[:-test_size], target[:-test_size]
X_test, y_test = feature[-test_size:], target[-test_size:]

# Initialize the logistic regression model
model = LogisticRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = model.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

96
Accuracy: 0.75


In [269]:
# Make probability predictions on the testing data
y_pred_proba = model.predict_proba(X_test)
y_pred_proba[-1]

array([0.62728242, 0.37271758])

In [270]:
pd.concat([y_test,pd.DataFrame(y_test).set_index(y_test.index)], axis=1)

Unnamed: 0_level_0,next_up,next_up
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-12-15 00:00:00+07:00,False,False
2022-12-16 00:00:00+07:00,False,False
2022-12-19 00:00:00+07:00,False,False
2022-12-20 00:00:00+07:00,False,False
2022-12-21 00:00:00+07:00,False,False
...,...,...
2023-05-05 00:00:00+07:00,False,False
2023-05-08 00:00:00+07:00,False,False
2023-05-09 00:00:00+07:00,False,False
2023-05-10 00:00:00+07:00,True,True


# Summary

In [271]:
print('Ticker:', ticker)
print('Training data size: ', len(y_train))
print('Test data size: ', len(y_test))
print("Accuracy:", accuracy)

Ticker: ZYRX.JK
Training data size:  388
Test data size:  96
Accuracy: 0.75
