In [1]:
%pip install yfinance

Collecting yfinance
  Downloading yfinance-0.2.40-py2.py3-none-any.whl.metadata (11 kB)
Collecting requests>=2.31 (from yfinance)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.11-py3-none-any.whl.metadata (5.5 kB)
Collecting lxml>=4.9.1 (from yfinance)
  Downloading lxml-5.2.2-cp311-cp311-macosx_10_9_universal2.whl.metadata (3.4 kB)
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.4-py311-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.17.5.tar.gz (3.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
[?25hCollecting beautifulsoup4>=4.11.1 (from yfinance)
  Usin

In [14]:
import yfinance as yf
import pandas as pd

# Fetch ETH data
eth_data = yf.download('ETH-USD', start='2020-01-01', end='2024-06-01')
eth_data.head()


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-01-01,129.630661,132.835358,129.198288,130.802002,130.802002,7935230330
2020-01-02,130.820038,130.820038,126.95491,127.410179,127.410179,8032709256
2020-01-03,127.411263,134.554016,126.490021,134.171707,134.171707,10476845358
2020-01-04,134.168518,136.052719,133.040558,135.069366,135.069366,7430904515
2020-01-05,135.072098,139.410202,135.045624,136.276779,136.276779,7526675353


In [15]:
# Calculate EMA
eth_data['EMA_12'] = eth_data['Close'].ewm(span=12, adjust=False).mean()
eth_data['EMA_26'] = eth_data['Close'].ewm(span=26, adjust=False).mean()

# Calculate MACD
eth_data['MACD'] = eth_data['EMA_12'] - eth_data['EMA_26']
eth_data['Signal_Line'] = eth_data['MACD'].ewm(span=9, adjust=False).mean()

# Calculate RSI
def calculate_rsi(data, window):
    delta = data['Close'].diff(1)
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

eth_data['RSI'] = calculate_rsi(eth_data, 14)

eth_data.dropna(inplace=True)
eth_data.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,EMA_12,EMA_26,MACD,Signal_Line,RSI
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2020-01-14,144.251053,167.681549,144.064926,165.955353,165.955353,16712318373,144.399865,139.061449,5.338416,3.126303,80.32819
2020-01-15,165.734619,170.425674,161.109619,166.230682,166.230682,15173694057,147.758452,141.073985,6.684468,3.837936,80.421204
2020-01-16,166.332825,167.058151,159.709641,164.391006,164.391006,13735193018,150.317307,142.801172,7.516135,4.573575,82.623564
2020-01-17,164.48938,173.069214,162.775604,170.779953,170.779953,14997091826,153.465406,144.873674,8.591732,5.377207,82.508581
2020-01-18,170.747726,178.528778,166.789017,175.365677,175.365677,14929342256,156.834679,147.132341,9.702338,6.242233,83.583854


In [16]:
import numpy as np

# Bollinger Bands
eth_data['BB_Middle'] = eth_data['Close'].rolling(window=20).mean()
eth_data['BB_Upper'] = eth_data['BB_Middle'] + (eth_data['Close'].rolling(window=20).std() * 2)
eth_data['BB_Lower'] = eth_data['BB_Middle'] - (eth_data['Close'].rolling(window=20).std() * 2)

# Stochastic Oscillator
low_14 = eth_data['Low'].rolling(window=14).min()
high_14 = eth_data['High'].rolling(window=14).max()
eth_data['Stochastic'] = ((eth_data['Close'] - low_14) / (high_14 - low_14)) * 100

# Average True Range (ATR)
high_low = eth_data['High'] - eth_data['Low']
high_close = np.abs(eth_data['High'] - eth_data['Close'].shift())
low_close = np.abs(eth_data['Low'] - eth_data['Close'].shift())
tr = high_low.combine(high_close, max).combine(low_close, max)
eth_data['ATR'] = tr.rolling(window=14).mean()

# On-Balance Volume (OBV)
eth_data['OBV'] = (np.sign(eth_data['Close'].diff()) * eth_data['Volume']).fillna(0).cumsum()

# MACD Histogram
eth_data['MACD_Hist'] = eth_data['MACD'] - eth_data['Signal_Line']

# Volume-weighted Average Price (VWAP)
vwap = (eth_data['Volume'] * (eth_data['High'] + eth_data['Low'] + eth_data['Close']) / 3).cumsum() / eth_data['Volume'].cumsum()
eth_data['VWAP'] = vwap

eth_data.dropna(inplace=True)

In [20]:
# Additional RSI for different periods
eth_data['RSI_7'] = calculate_rsi(eth_data, 7)
eth_data['RSI_21'] = calculate_rsi(eth_data, 21)

# Momentum
eth_data['Momentum'] = eth_data['Close'].diff(10)

# Rate of Change (ROC)
eth_data['ROC'] = eth_data['Close'].pct_change(periods=10) * 100

# Commodity Channel Index (CCI)
def calculate_cci(data, ndays): 
    TP = (data['High'] + data['Low'] + data['Close']) / 3 
    CCI = pd.Series((TP - TP.rolling(ndays).mean()) / (0.015 * TP.rolling(ndays).std()), name = 'CCI') 
    return CCI

eth_data['CCI'] = calculate_cci(eth_data, 20)

# Williams %R
eth_data['Williams_%R'] = ((high_14 - eth_data['Close']) / (high_14 - low_14)) * -100

# Chaikin Money Flow (CMF)
def calculate_cmf(data, ndays):
    mfv = ((data['Close'] - data['Low']) - (data['High'] - data['Close'])) / (data['High'] - data['Low']) * data['Volume']
    cmf = mfv.rolling(ndays).sum() / data['Volume'].rolling(ndays).sum()
    return cmf

eth_data['CMF'] = calculate_cmf(eth_data, 20)

# Money Flow Index (MFI)
def calculate_mfi(data, window):
    typical_price = (data['High'] + data['Low'] + data['Close']) / 3
    raw_money_flow = typical_price * data['Volume']
    positive_flow = raw_money_flow.copy()
    negative_flow = raw_money_flow.copy()
    positive_flow[data['Close'] <= data['Close'].shift(1)] = 0
    negative_flow[data['Close'] > data['Close'].shift(1)] = 0
    positive_mf = positive_flow.rolling(window).sum()
    negative_mf = negative_flow.rolling(window).sum()
    mfi = 100 - (100 / (1 + positive_mf / negative_mf))
    return mfi

eth_data['MFI'] = calculate_mfi(eth_data, 14)

# Force Index
eth_data['Force_Index'] = eth_data['Close'].diff(1) * eth_data['Volume']

eth_data.dropna(inplace=True)


In [21]:
# Create target variable (1 if next day's close price is higher, else 0)
eth_data['Target'] = (eth_data['Close'].shift(-1) > eth_data['Close']).astype(int)

# Features
features = [
    'Open', 'High', 'Low', 'Close', 'Volume',
    'MACD', 'Signal_Line', 'RSI',
    'BB_Middle', 'BB_Upper', 'BB_Lower',
    'Stochastic', 'ATR', 'OBV', 'MACD_Hist', 'VWAP',
    'RSI_7','RSI_21','Momentum','ROC','CCI','Williams_%R','CMF','MFI','Force_Index'
]

X = eth_data[features]
y = eth_data['Target']

# Drop the last row as it will have NaN target value
X = X[:-1]
y = y[:-1]


In [22]:
X_train, X_test, y_train, y_test = X[:1200], X[1200:], y[:1200], y[1200:]

In [23]:
# from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Split data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict and evaluate the model
y_pred = rf_model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.5083333333333333
Classification Report:
               precision    recall  f1-score   support

           0       0.49      0.53      0.51       173
           1       0.53      0.49      0.51       187

    accuracy                           0.51       360
   macro avg       0.51      0.51      0.51       360
weighted avg       0.51      0.51      0.51       360

