In [4]:
import pandas as pd
import numpy as np
import yfinance as yf
from scipy.stats import linregress
import hvplot.pandas  # Make sure hvplot is installed

# Fetch S&P 500 data
sp500 = yf.download('^GSPC', period="60y", interval="1mo")

# Ensure there's no missing data in critical columns
sp500.dropna(subset=['Adj Close'], inplace=True)

# Calculate the RSI
def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = np.where(delta > 0, delta, 0)
    loss = np.where(delta < 0, -delta, 0)
    avg_gain = pd.Series(gain).rolling(window=window, min_periods=1).mean()
    avg_loss = pd.Series(loss).rolling(window=window, min_periods=1).mean()
    rs = avg_gain / avg_loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

sp500['RSI'] = calculate_rsi(sp500['Adj Close'])

# Calculate RSI Slope; ensuring we handle any NaN values
sp500['RSI_Slope'] = sp500['RSI'].rolling(window=5).apply(lambda x: linregress(range(len(x)), x).slope if not x.isnull().any() else np.nan, raw=True)

# Generate Buy/Sell Signals; ensuring we exclude any rows where RSI_Slope is NaN for signal generation
sp500['Signal'] = np.where(sp500['RSI_Slope'] > 0, 'Buy', np.where(sp500['RSI_Slope'] < 0, 'Sell', 'Hold'))

# Calculate future returns (1 month ahead) and determine signal correctness
sp500['Future Returns'] = sp500['Adj Close'].pct_change().shift(-1)
sp500['Correct'] = np.where((sp500['Signal'] == 'Buy') & (sp500['Future Returns'] > 0) | (sp500['Signal'] == 'Sell') & (sp500['Future Returns'] < 0), 1, 0)

# Remove rows with any NaN values that could disrupt plotting or analysis
sp500_cleaned = sp500.dropna()

# Plotting
plot = sp500_cleaned['RSI'].hvplot(title='S&P 500 Monthly RSI & Signals', ylabel='RSI', width=800, height=400) * \
       sp500_cleaned[sp500_cleaned['Signal'] == 'Buy']['RSI'].hvplot.scatter(color='green', legend='top', marker='^', size=100) * \
       sp500_cleaned[sp500_cleaned['Signal'] == 'Sell']['RSI'].hvplot.scatter(color='red', legend='top', marker='v', size=100)

# Ensure the plot displays in your environment, might require `%matplotlib inline` for Jupyter Notebooks

# Save the cleaned data with signals and correctness indicator to CSV
sp500_cleaned[['Adj Close', 'RSI', 'RSI_Slope', 'Signal', 'Future Returns', 'Correct']].to_csv('sp500_signal_accuracy.csv', index=True)

print("Analysis complete and saved to 'sp500_signal_accuracy.csv'.")


[*********************100%%**********************]  1 of 1 completed

Analysis complete and saved to 'sp500_signal_accuracy.csv'.



