## 70% Buy and Sell Signals using RSI and Python

In [1]:
# Import libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import yfinance as yf

In [2]:
# Create a function to get the relative strength index (RSI) values
def RSI(data, time_period=14, column="Close", MA='SMA'):
    delta = data[column].diff(1)
    delta = delta.dropna()
    up = delta.copy()
    down = delta.copy()
    up[up < 0] = 0
    down[down > 0] = 0
    if MA == 'SMA':
        AVG_Gain = up.rolling(window=time_period).mean()
        AVG_Loss = abs(down.rolling(window=time_period).mean())
    elif MA == 'EMA':
        AVG_Gain = up.ewm(span=time_period, adjust=False).mean()
        AVG_Loss = abs(down.ewm(span=time_period, adjust=False).mean())

    RS = AVG_Gain / AVG_Loss
    RSI = 100.0 - (100.0 / (1.0 + RS))
    data['RSI' + str(time_period) + '_' + str(MA)] = RSI

In [3]:
# Create a function to select the best model to use
def select_best_model(X_train, y_train):
    # Create a list of machine learning models
    models = [
        RandomForestClassifier(),
        SVC(),
        LogisticRegression()
    ]

    # Evaluate each model on the training dataset
    model_scores = []

    for model in models:
        model.fit(X_train, y_train)
        # Calculate the accuracy of the model
        score = model.score(X_train, y_train)
        model_scores.append((model, score))

    # Select a model with the highest score
    best_model = max(model_scores, key= lambda x: x[1])[0]

    return best_model

In [9]:
# Load the data
df = yf.download(tickers=['BTC-USD'], start='2018-09-30', end='2024-09-30')

df = df.reset_index()

# Create a new column called Percentage Change
df['Percentage Change'] = df['Close'].pct_change()

# Create a buy and sell signal (1=Bye, -1=Sell)
df['Signal'] = np.where(df['Percentage Change'] >= 0, 1, -1)
df.dropna(inplace=True) # Remove NaN values

# Create RSI columns 
RSI(df, 6, MA="SMA")
RSI(df, 2, MA="SMA")

df.dropna(inplace=True)

# Create the buy (1) and sell (-1) and hold (0) signals based on the RSI values for RSI6_SMA and RSI2_SMA
df['RSI6_Signal'] = np.where(df['RSI6_SMA'] <= 10, 1, 0)
df['RSI6_Signal'] = np.where(df['RSI6_SMA'] >= 90, -1, df['RSI6_Signal'])

df['RSI2_Signal'] = np.where(df['RSI2_SMA'] <= 10, 1, 0)
df['RSI2_Signal'] = np.where(df['RSI2_SMA'] >= 90, -1, df['RSI2_Signal'])

# Set the index to be the date
df.set_index(df['Date'])

# Drop the Date column
df.drop("Date", axis=1, inplace=True)

# Show the data
df

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,Percentage Change,Signal,RSI6_SMA,RSI2_SMA,RSI6_Signal,RSI2_Signal
7,6590.680176,6641.490234,6557.040039,6602.950195,6602.950195,3306630000,0.002222,1,52.606275,29.994098,0,0
8,6600.189941,6675.060059,6576.040039,6652.229980,6652.229980,3979460000,0.007463,1,67.704114,100.000000,0,-1
9,6653.080078,6661.410156,6606.939941,6642.640137,6642.640137,3580810000,-0.001442,-1,80.770840,83.710032,0,0
10,6640.290039,6640.290039,6538.959961,6585.529785,6585.529785,3787650000,-0.008598,-1,52.098927,0.000000,0,1
11,6586.740234,6586.740234,6243.740234,6256.240234,6256.240234,5181640000,-0.050002,-1,12.937171,0.000000,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2187,64302.589844,64804.503906,62945.375000,63143.144531,63143.144531,25078377700,-0.018022,-1,53.208953,45.620438,0,0
2188,63138.546875,65790.796875,62669.269531,65181.019531,65181.019531,36873129847,0.032274,1,70.107598,63.749351,0,0
2189,65180.664062,66480.695312,64852.992188,65790.664062,65790.664062,32058813449,0.009353,1,72.385471,100.000000,0,-1
2190,65792.179688,66255.531250,65458.035156,65887.648438,65887.648438,15243637984,0.001474,1,71.551429,100.000000,0,-1


In [11]:
# Get the features to train and test on and the target data set to predict/classify
features = ['Volume', 'Close', 'RSI6_Signal', 'RSI2_Signal']
target = 'Signal'
X = df[features]
y = df[target]

# Split the dataset into (80%) training and (20%) testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Select the best model 
best_model = select_best_model(X_train, y_train)

# Evaluate the best model on the test data and get the model predictions/classifications
y_pred = best_model.predict(X_test)

# Calculate the score of the model
score = best_model.score(X_test, y_test)

# Print the accuracy of the best model on the test data
print("Accuracy of the best model: ", score)

Accuracy of the best model:  0.7116704805491991
