In [2]:
import numpy as np
import pandas as pd
import yfinance as yf

from sklearn.model_selection import train_test_split

In [14]:
# Import BTC data using yfinance library
df = yf.download('BTC-USD',start='2021-03-26')
# Show the data
df = df.reset_index()
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-03-25,52726.746094,53392.386719,50856.570312,51704.160156,51704.160156,67999812841
1,2021-03-26,51683.011719,55137.312500,51579.855469,55137.312500,55137.312500,56652197978
2,2021-03-27,55137.566406,56568.214844,54242.910156,55973.511719,55973.511719,47266542233
3,2021-03-28,55974.941406,56610.312500,55071.113281,55950.746094,55950.746094,47686580918
4,2021-03-29,55947.898438,58342.097656,55139.339844,57750.199219,57750.199219,57625587027
...,...,...,...,...,...,...,...
414,2022-05-13,29030.910156,30924.802734,28782.330078,29283.103516,29283.103516,42841124537
415,2022-05-14,29285.642578,30192.802734,28702.910156,30101.265625,30101.265625,28579868620
416,2022-05-15,30098.585938,31308.191406,29527.740234,31305.113281,31305.113281,25835372065
417,2022-05-16,31304.375000,31305.341797,29251.884766,29862.917969,29862.917969,32613897286


In [15]:
# Create a function to calculate the Exponential Moving Average (EMA) Indicator
# Gives more importance to recent price data

def EMA(data, period=20, column='Close'):
    return data[column].ewm(span=period, adjust=False).mean()

In [16]:
# Create a function to calculate the Relative Strength Index (RSI)
def RSI(data, period=14, column='Close'):
    delta = data[column].diff(1)
    delta = delta.dropna()
    up = delta.copy()
    down = delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    data['up'] = up
    data['down'] = down
    avg_gain = EMA(data, period, column='up')
    avg_loss = abs(EMA(data, period, column='down'))
    RS = avg_gain/avg_loss
    RSI = 100.0 - (100.0/(1.0+RS))

    data['RSI'+str(period)] = RSI
    return data

In [17]:
# Add indicators to dataset
RSI(df, 7)
RSI(df, 14)
RSI(df, 20)
df['EMA15'] = EMA(df, 15)
df['EMA20'] = EMA(df, 20)
df['EMA50'] = EMA(df, 50)

# Show the data
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,up,down,RSI7,RSI14,RSI20,EMA15,EMA20,EMA50
0,2021-03-25,52726.746094,53392.386719,50856.570312,51704.160156,51704.160156,67999812841,,,,,,51704.160156,51704.160156,51704.160156
1,2021-03-26,51683.011719,55137.312500,51579.855469,55137.312500,55137.312500,56652197978,3433.152344,0.000000,100.000000,100.000000,100.000000,52133.304199,52031.127046,51838.793581
2,2021-03-27,55137.566406,56568.214844,54242.910156,55973.511719,55973.511719,47266542233,836.199219,0.000000,100.000000,100.000000,100.000000,52613.330139,52406.592253,52000.939391
3,2021-03-28,55974.941406,56610.312500,55071.113281,55950.746094,55950.746094,47686580918,0.000000,-22.765625,99.728156,99.886668,99.924836,53030.507133,52744.130714,52155.833771
4,2021-03-29,55947.898438,58342.097656,55139.339844,57750.199219,57750.199219,57625587027,1799.453125,0.000000,99.788694,99.897285,99.929468,53620.468644,53220.899143,52375.220652
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,2022-05-13,29030.910156,30924.802734,28782.330078,29283.103516,29283.103516,42841124537,235.351562,0.000000,19.999581,21.584215,23.861224,33752.713117,34863.979739,38169.637622
415,2022-05-14,29285.642578,30192.802734,28702.910156,30101.265625,30101.265625,28579868620,818.162109,0.000000,36.749670,29.385364,29.281081,33296.282181,34410.387918,37853.230877
416,2022-05-15,30098.585938,31308.191406,29527.740234,31305.113281,31305.113281,25835372065,1203.847656,0.000000,55.166004,39.588924,36.618485,33047.386068,34114.647477,37596.441951
417,2022-05-16,31304.375000,31305.341797,29251.884766,29862.917969,29862.917969,32613897286,0.000000,-1442.195312,37.653809,32.998015,32.195457,32649.327556,33709.720857,37293.166501


In [18]:
# Create the target column to determine if tomorrow's price will be greater than today's price then put 1 else put 0 - BUT THIS IS NOT KNOWABLE!!!!
df['Target'] = np.where(df['Close'].shift(-1) > df['Close'], 1, 0)

In [19]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,up,down,RSI7,RSI14,RSI20,EMA15,EMA20,EMA50,Target
0,2021-03-25,52726.746094,53392.386719,50856.570312,51704.160156,51704.160156,67999812841,,,,,,51704.160156,51704.160156,51704.160156,1
1,2021-03-26,51683.011719,55137.312500,51579.855469,55137.312500,55137.312500,56652197978,3433.152344,0.000000,100.000000,100.000000,100.000000,52133.304199,52031.127046,51838.793581,1
2,2021-03-27,55137.566406,56568.214844,54242.910156,55973.511719,55973.511719,47266542233,836.199219,0.000000,100.000000,100.000000,100.000000,52613.330139,52406.592253,52000.939391,0
3,2021-03-28,55974.941406,56610.312500,55071.113281,55950.746094,55950.746094,47686580918,0.000000,-22.765625,99.728156,99.886668,99.924836,53030.507133,52744.130714,52155.833771,1
4,2021-03-29,55947.898438,58342.097656,55139.339844,57750.199219,57750.199219,57625587027,1799.453125,0.000000,99.788694,99.897285,99.929468,53620.468644,53220.899143,52375.220652,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,2022-05-13,29030.910156,30924.802734,28782.330078,29283.103516,29283.103516,42841124537,235.351562,0.000000,19.999581,21.584215,23.861224,33752.713117,34863.979739,38169.637622,1
415,2022-05-14,29285.642578,30192.802734,28702.910156,30101.265625,30101.265625,28579868620,818.162109,0.000000,36.749670,29.385364,29.281081,33296.282181,34410.387918,37853.230877,1
416,2022-05-15,30098.585938,31308.191406,29527.740234,31305.113281,31305.113281,25835372065,1203.847656,0.000000,55.166004,39.588924,36.618485,33047.386068,34114.647477,37596.441951,0
417,2022-05-16,31304.375000,31305.341797,29251.884766,29862.917969,29862.917969,32613897286,0.000000,-1442.195312,37.653809,32.998015,32.195457,32649.327556,33709.720857,37293.166501,1


In [20]:
# Remove the first row of data
df = df[1:]
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,up,down,RSI7,RSI14,RSI20,EMA15,EMA20,EMA50,Target
1,2021-03-26,51683.011719,55137.312500,51579.855469,55137.312500,55137.312500,56652197978,3433.152344,0.000000,100.000000,100.000000,100.000000,52133.304199,52031.127046,51838.793581,1
2,2021-03-27,55137.566406,56568.214844,54242.910156,55973.511719,55973.511719,47266542233,836.199219,0.000000,100.000000,100.000000,100.000000,52613.330139,52406.592253,52000.939391,0
3,2021-03-28,55974.941406,56610.312500,55071.113281,55950.746094,55950.746094,47686580918,0.000000,-22.765625,99.728156,99.886668,99.924836,53030.507133,52744.130714,52155.833771,1
4,2021-03-29,55947.898438,58342.097656,55139.339844,57750.199219,57750.199219,57625587027,1799.453125,0.000000,99.788694,99.897285,99.929468,53620.468644,53220.899143,52375.220652,1
5,2021-03-30,57750.132812,59447.222656,57251.550781,58917.691406,58917.691406,54414116432,1167.492188,0.000000,99.822826,99.904016,99.932453,54282.621489,53763.450787,52631.788132,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
414,2022-05-13,29030.910156,30924.802734,28782.330078,29283.103516,29283.103516,42841124537,235.351562,0.000000,19.999581,21.584215,23.861224,33752.713117,34863.979739,38169.637622,1
415,2022-05-14,29285.642578,30192.802734,28702.910156,30101.265625,30101.265625,28579868620,818.162109,0.000000,36.749670,29.385364,29.281081,33296.282181,34410.387918,37853.230877,1
416,2022-05-15,30098.585938,31308.191406,29527.740234,31305.113281,31305.113281,25835372065,1203.847656,0.000000,55.166004,39.588924,36.618485,33047.386068,34114.647477,37596.441951,0
417,2022-05-16,31304.375000,31305.341797,29251.884766,29862.917969,29862.917969,32613897286,0.000000,-1442.195312,37.653809,32.998015,32.195457,32649.327556,33709.720857,37293.166501,1


In [22]:
# Split the dataset into a feature (X) and target (y) dataset
# Get a list of columns to keep

keep_columns = df.drop(['Date', 'High', 'Low', 'Open', 'Volume', 'Adj Close', 'up', 'down', 'Close', 'Target'], axis=1).columns
X = df[keep_columns].values
y = df.Target.values

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [25]:
# Create the model
from sklearn.ensemble import RandomForestClassifier
forest = RandomForestClassifier(n_estimators=1, criterion='entropy', random_state=123)
print(forest)

RandomForestClassifier(criterion='entropy', n_estimators=1, random_state=123)


In [27]:
forest.fit(X_train, y_train)

RandomForestClassifier(criterion='entropy', n_estimators=1, random_state=123)

In [28]:
# See how well the model did on the test data - just better than a coin toss
forest.score(X_test, y_test)

0.5357142857142857

In [29]:
# Models predicted values
forest_predictions = forest.predict(X_test)
forest_predictions

array([1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1,
       1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,
       0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])

In [30]:
# Actual values
y_test

array([0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0])