In [72]:
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import datetime

def get_sp500_tickers():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    tickers = table[0]['Symbol'].tolist()
    return tickers

def download_data(tickers, start, end):
    #data = yf.download(tickers, start=start, end=end, group_by='ticker')
    data = yf.download(tickers, start=start, end=end, interval='1d', group_by='ticker')
    
    return data

def calculate_features(stock_data):
    result = pd.DataFrame()

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()
        stock.loc[:, 'return'] = stock['Adj Close'].pct_change()
        stock.loc[:, 'log_return'] = np.log(stock['Adj Close'] / stock['Adj Close'].shift(1))
        stock.loc[:, 'volume_ma'] = stock['Volume'].rolling(window=30*6).mean()
        stock.loc[:, 'detrended_vol'] = stock['Volume'] - stock['volume_ma']
        stock.loc[:, 'log_return_mean'] = stock['log_return'].rolling(window=252).mean()
        stock.loc[:, 'demeaned_log_return'] = stock['log_return'] - stock['log_return_mean']
        stock.loc[:, 'momentum'] = (stock['Adj Close'] / stock['Adj Close'].shift(30)) - 1
        stock.loc[:, 'ticker'] = ticker
        stock.loc[:, 'date'] = stock.index  
        
        stock.loc[:, 'realized_variance'] = stock['return'].rolling(window=30*6).var()
        stock.loc[:, 'realized_volatility'] = np.sqrt(stock['realized_variance'])  

        min_detrended = stock['detrended_vol'].min()
        max_detrended = stock['detrended_vol'].max()
        stock.loc[:, 'normalized_detrended_vol'] = (stock['detrended_vol'] - min_detrended) / (max_detrended - min_detrended)

        result = pd.concat([result, stock[['date', 'momentum', 'detrended_vol', 'normalized_detrended_vol', 'demeaned_log_return', 'realized_volatility', 'ticker']]])

    return result

def analyze_short_reversals_on_date(stock_data, specific_date):
    short_reversal_results = []
    specific_date = stock_data.index[-1].date().strftime('%Y-%m-%d')
    specific_date = pd.to_datetime(specific_date).strftime('%Y-%m-%d')

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()

        # Filter to get data for the specific date only
        #print(ticker)
        stock_on_date = stock.loc[specific_date]

        # Find the last hour of the specific day
        if not stock_on_date.empty:
            latest_time = stock_on_date.index[-1]  # Get the latest timestamp of the day
            latest_index = stock.index.get_loc(latest_time)  # Find the position of this timestamp

            adj_close = stock['Adj Close']
            volume = stock['Volume']

            # Calculate the maximum closing volume for the last 5 hours
            max_volume_last_5_hours = volume.iloc[latest_index - 5:latest_index].max()
            price_increase = adj_close.iloc[latest_index] - adj_close.iloc[latest_index - 30]
            current_volume = volume.iloc[latest_index]

            # Short reversal condition: current volume less than the max of last 5 hour closes
            if price_increase > 0:  
                if current_volume > max_volume_last_5_hours:
                    short_reversal_percent = 0.0
                else:
                    short_reversal_percent = ((max_volume_last_5_hours - current_volume) / max_volume_last_5_hours) * 100
            else:
                short_reversal_percent = 0.0

            short_reversal_results.append({
                'ticker': ticker,
                'Short Reversal (%)': short_reversal_percent,
            })
        else:
            print(f"{ticker} does not have data for {specific_date}.")
    
    return pd.DataFrame(short_reversal_results)

def rank_stocks(stock_data):
    stock_data = stock_data.dropna()

    if 'date' not in stock_data.columns:
        stock_data = stock_data.reset_index()  

    stock_data['momentum_rank'] = stock_data.groupby('date')['momentum'].rank(ascending=False)
    stock_data['detrended_vol_rank'] = stock_data.groupby('date')['detrended_vol'].rank(ascending=False)
    stock_data['demeaned_log_return_rank'] = stock_data.groupby('date')['demeaned_log_return'].rank(ascending=False)
    stock_data['normalized_detrended_vol_rank'] = stock_data.groupby('date')['normalized_detrended_vol'].rank(ascending=False)
    stock_data['realized_volatility_rank'] = stock_data.groupby('date')['realized_volatility'].rank(ascending=True)  # Lower volatility is better
    stock_data['SR_rank'] = stock_data['Short Reversal (%)'].rank(ascending=True)
    stock_data['SR_rank'] = (stock_data['SR_rank'] - stock_data['SR_rank'].min()) / (stock_data['SR_rank'].max() - stock_data['SR_rank'].min())

    stock_data['final_rank'] = stock_data[['momentum_rank', 'detrended_vol_rank', 'demeaned_log_return_rank','SR_rank']].mean(axis=1)
    stock_data['norm_final_rank'] = stock_data[['momentum_rank', 'normalized_detrended_vol_rank', 'realized_volatility_rank','SR_rank']].mean(axis=1)

    ranked_stocks = stock_data.sort_values(by=['date', 'final_rank'], ascending=[True, True])
    ranked_stocks_normalized = stock_data.sort_values(by=['date', 'norm_final_rank'], ascending=[True, True])
    
    return ranked_stocks, ranked_stocks_normalized



if __name__ == "__main__":
    tickers = get_sp500_tickers()
    
    #тут менять даты
    start_date = '2024-01-01'
    end_date = '2024-10-05'
    #последняя доступная дата для проверки шорт реверса
    #specific_date = '2024-10-04'
    
    stock_data = yf.download(tickers, start=start_date, end=end_date, interval='1h',group_by = 'ticker')
    specific_date = stock_data.index[-1].date().strftime('%Y-%m-%d')
    short_reversal_analysis = analyze_short_reversals_on_date(stock_data, specific_date)
    
    stock_features = calculate_features(stock_data)
    stock_features = stock_features.merge(short_reversal_analysis,on = 'ticker', how = 'left').reset_index(drop=True)
    
    ranked_stocks, ranked_stocks_normalized = rank_stocks(stock_features)    
    latest_date = ranked_stocks['date'].max()
    
    top_long_stocks = ranked_stocks[ranked_stocks['date'] == latest_date].head(20)
    top_long_stocks.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks[['ticker','realized_volatility_rank','detrended_vol', 'SR_rank', 'final_rank']])
    
    top_long_stocks_normalized = ranked_stocks_normalized[ranked_stocks_normalized['date'] == latest_date].head(20)
    top_long_stocks_normalized.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks_normalized[['ticker', 'momentum','realized_volatility_rank','normalized_detrended_vol', 'norm_final_rank']])
    #top_long_stocks.to_csv(f'top_long_stocks_{latest_date}.csv', index=False)


[*********************100%%**********************]  503 of 503 completed

2 Failed downloads:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1h 2024-01-01 -> 2024-10-05)')
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')


In [108]:
from pytz import timezone
import pandas as pd

def backtest_strategy(stock_data, start_backtest, end_backtest, top_n=5):
    ny_tz = timezone('America/New_York')
    
    backtest_results = []
    portfolio_value = 100000
    positions = {}

    last_dates = stock_data.index.normalize().drop_duplicates()[-5:]
    
    for current_date in last_dates:
        print(f"Backtesting on {current_date.strftime('%Y-%m-%d')}")

        stock_data_up_to_date = stock_data[stock_data.index <= current_date]
        stock_features = calculate_features(stock_data_up_to_date)
        short_reversal_analysis = analyze_short_reversals_on_date(stock_data_up_to_date, current_date)
        stock_features = stock_features.merge(short_reversal_analysis, on='ticker', how='left').reset_index(drop=True)

        ranked_stocks, ranked_stocks_normalized = rank_stocks(stock_features)
        top_stocks = ranked_stocks_normalized[ranked_stocks_normalized['date'] == ranked_stocks_normalized['date'].iloc[-1]].head(top_n)
        current = ranked_stocks_normalized['date'].iloc[-1]

        if positions:
            print("Selling previous day's stocks")
            portfolio_value -= 100000
            for ticker, num_shares in positions.items():
                if ticker in stock_data.columns.levels[0]:
                    sell_price = stock_data[ticker]['Adj Close'].loc[current]
                    portfolio_value += num_shares * sell_price
                    print(f"Sold {ticker} at ${sell_price}, Total Cash: {portfolio_value}")
            print("\n\n")
            positions.clear()

        equal_allocation = portfolio_value / top_n
        temp = 0
        for ticker in top_stocks['ticker']:
            if ticker in stock_data.columns.levels[0]:
                buy_price = stock_data[ticker]['Adj Close'].loc[current]
                num_shares = equal_allocation / buy_price
                positions[ticker] = num_shares
                temp += buy_price * num_shares
                print(f"Bought {ticker} at ${buy_price} with {num_shares} shares, port val {temp}")
        print('\n')

        portfolio_value = 0
        for ticker, shares in positions.items():
            if ticker in stock_data.columns.levels[0]:
                current_price = stock_data[ticker]['Adj Close'].loc[current]
                portfolio_value += shares * current_price

        backtest_results.append({
            'date': current,
            'portfolio_value': portfolio_value,
            'stocks_held': list(positions.keys())
        })

    return pd.DataFrame(backtest_results)

if __name__ == "__main__":
    tickers = get_sp500_tickers()
    start_date = '2024-01-01'
    end_date = '2024-10-05'
    backtest_start_date = '2024-09-30'
    backtest_end_date = '2024-10-05'
    backtest_results = backtest_strategy(stock_data, backtest_start_date, backtest_end_date)
    backtest_results['percentage_change'] = (backtest_results['portfolio_value'] - 100000) / 100000 * 100
    backtest_results['percentage_change'].fillna(0, inplace=True)

    print(backtest_results)


Backtesting on 2024-09-30
Bought YUM at $139.94000244140625 with 142.91839110388844 shares, port val 20000.0
Bought ES at $67.51000213623047 with 296.25239767644206 shares, port val 40000.0
Bought J at $129.9749298095703 with 153.87582843324114 shares, port val 60000.0
Bought LNT at $60.7400016784668 with 329.272299099891 shares, port val 80000.0
Bought AIZ at $199.91000366210938 with 100.04501842641288 shares, port val 100000.0


Backtesting on 2024-10-01
Selling previous day's stocks
Sold YUM at $139.64999389648438, Total Cash: 19958.552445353387
Sold ES at $68.05000305175781, Total Cash: 40118.52901132584
Sold J at $130.88999938964844, Total Cash: 60259.33610103442
Sold LNT at $60.689998626708984, Total Cash: 80242.87148122012
Sold AIZ at $198.82000732421875, Total Cash: 100133.82277751113



Bought AFL at $111.79000091552734 with 179.14629565693616 shares, port val 20026.764555502225
Bought WTW at $294.5299987792969 with 67.99566984179795 shares, port val 40053.52911100445
Bought Y