In [91]:
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import datetime

def get_sp500_tickers():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    tickers = table[0]['Symbol'].tolist()
    return tickers

def download_data(tickers, start, end):
    data = yf.download(tickers, start=start, end=end, group_by='ticker')
    #data = yf.download(tickers, start=start, end=end, interval='1h', group_by='ticker')
    
    return data

def calculate_features(stock_data):
    result = pd.DataFrame()

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()
        stock.loc[:, 'return'] = stock['Adj Close'].pct_change()
        stock.loc[:, 'log_return'] = np.log(stock['Adj Close'] / stock['Adj Close'].shift(1))
        stock.loc[:, 'volume_ma'] = stock['Volume'].rolling(window=30).mean()
        stock.loc[:, 'detrended_vol'] = stock['Volume'] - stock['volume_ma']
        stock.loc[:, 'log_return_mean'] = stock['log_return'].rolling(window=252).mean()
        stock.loc[:, 'demeaned_log_return'] = stock['log_return'] - stock['log_return_mean']
        stock.loc[:, 'momentum'] = (stock['Adj Close'] / stock['Adj Close'].shift(30)) - 1
        stock.loc[:, 'ticker'] = ticker
        stock.loc[:, 'date'] = stock.index  
        
        stock.loc[:, 'realized_variance'] = stock['return'].rolling(window=63).var()
        stock.loc[:, 'realized_volatility'] = np.sqrt(stock['realized_variance'])  

        min_detrended = stock['detrended_vol'].min()
        max_detrended = stock['detrended_vol'].max()
        stock.loc[:, 'normalized_detrended_vol'] = (stock['detrended_vol'] - min_detrended) / (max_detrended - min_detrended)

        result = pd.concat([result, stock[['date', 'momentum', 'detrended_vol', 'normalized_detrended_vol', 'demeaned_log_return', 'realized_volatility', 'ticker']]])

    return result

def analyze_short_reversals_on_date(stock_data, specific_date):
    short_reversal_results = []
    specific_date = pd.to_datetime(specific_date).strftime('%Y-%m-%d')

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()
        
        if specific_date in stock.index:
            latest_index = stock.index.get_loc(specific_date)
            
            if latest_index >= 30:
                adj_close = stock['Adj Close']
                volume = stock['Volume']
                volume_ma = volume.rolling(window=30).mean()
                price_increase = adj_close.iloc[latest_index] - adj_close.iloc[latest_index - 30]
                current_volume = volume.iloc[latest_index]
                volume_moving_avg = volume_ma.iloc[latest_index]
                
                if price_increase > 0:  
                    if current_volume > volume_moving_avg:
                        short_reversal_percent = 0.0  
                    else:
                        short_reversal_percent = ((volume_moving_avg - current_volume) / volume_moving_avg) * 100
                else:
                    short_reversal_percent = 0.0  

                short_reversal_results.append({
                    'ticker': ticker,
                    'Short Reversal (%)': short_reversal_percent,
                })
            else:
                print(f"mb data is missing for {ticker} before {specific_date}.")
        else:
            print(f"{ticker} does not have data for {specific_date}.")

    return pd.DataFrame(short_reversal_results)

def rank_stocks(stock_data):
    stock_data = stock_data.dropna()

    if 'date' not in stock_data.columns:
        stock_data = stock_data.reset_index()  

    stock_data['momentum_rank'] = stock_data.groupby('date')['momentum'].rank(ascending=False)
    stock_data['detrended_vol_rank'] = stock_data.groupby('date')['detrended_vol'].rank(ascending=False)
    stock_data['demeaned_log_return_rank'] = stock_data.groupby('date')['demeaned_log_return'].rank(ascending=False)
    stock_data['normalized_detrended_vol_rank'] = stock_data.groupby('date')['normalized_detrended_vol'].rank(ascending=False)
    stock_data['realized_volatility_rank'] = stock_data.groupby('date')['realized_volatility'].rank(ascending=True)  # Lower volatility is better
    stock_data['SR_rank'] = stock_data['Short Reversal (%)'].rank(ascending=True)
    stock_data['SR_rank'] = (stock_data['SR_rank'] - stock_data['SR_rank'].min()) / (stock_data['SR_rank'].max() - stock_data['SR_rank'].min())

    stock_data['final_rank'] = stock_data[['momentum_rank', 'detrended_vol_rank', 'demeaned_log_return_rank','SR_rank']].mean(axis=1)
    stock_data['norm_final_rank'] = stock_data[['momentum_rank', 'normalized_detrended_vol_rank', 'realized_volatility_rank','SR_rank']].mean(axis=1)

    ranked_stocks = stock_data.sort_values(by=['date', 'final_rank'], ascending=[True, True])
    ranked_stocks_normalized = stock_data.sort_values(by=['date', 'norm_final_rank'], ascending=[True, True])
    
    return ranked_stocks, ranked_stocks_normalized

if __name__ == "__main__":
    tickers = get_sp500_tickers()
    
    #тут менять даты
    start_date = '2021-01-01'
    end_date = '2024-10-05'
    #последняя доступная дата для проверки шорт реверса
    specific_date = '2024-10-04'
    
    stock_data = download_data(tickers, start=start_date, end=end_date)
    short_reversal_analysis = analyze_short_reversals_on_date(stock_data, specific_date)
    
    stock_features = calculate_features(stock_data)
    stock_features = stock_features.merge(short_reversal_analysis,on = 'ticker', how = 'left').reset_index(drop=True)
    
    ranked_stocks, ranked_stocks_normalized = rank_stocks(stock_features)    
    latest_date = ranked_stocks['date'].max()
    
    top_long_stocks = ranked_stocks[ranked_stocks['date'] == latest_date].head(20)
    top_long_stocks.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks[['ticker','realized_volatility_rank','detrended_vol', 'SR_rank', 'final_rank']])
    
    top_long_stocks_normalized = ranked_stocks_normalized[ranked_stocks_normalized['date'] == latest_date].head(20)
    top_long_stocks_normalized.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks_normalized[['ticker', 'momentum','realized_volatility_rank','normalized_detrended_vol', 'norm_final_rank']])
    #top_long_stocks.to_csv(f'top_long_stocks_{latest_date}.csv', index=False)


[*********************100%%**********************]  503 of 503 completed

2 Failed downloads:
['BF.B']: Exception('%ticker%: No price data found, symbol may be delisted (1d 2021-01-01 -> 2024-10-05)')
['BRK.B']: Exception('%ticker%: No timezone found, symbol may be delisted')


In [90]:
top_long_stocks_normalized[['ticker','realized_volatility','normalized_detrended_vol','Short Reversal (%)','SR_rank', 'norm_final_rank']]

Unnamed: 0,ticker,realized_volatility,normalized_detrended_vol,Short Reversal (%),SR_rank,norm_final_rank
2024-10-04,KMI,0.011952,0.224135,10.945036,0.392344,51.848086
2024-10-04,WMB,0.013706,0.431647,0.0,0.0,52.25
2024-10-04,BLK,0.011913,0.231784,0.0,0.0,58.25
2024-10-04,HD,0.012916,0.236751,0.0,0.0,58.25
2024-10-04,J,0.01428,0.297472,0.0,0.0,60.5
2024-10-04,XEL,0.010881,0.218558,10.367283,0.384667,64.596167
2024-10-04,MET,0.015276,0.458778,0.0,0.0,66.25
2024-10-04,MA,0.011932,0.299703,18.530896,0.479355,68.369839
2024-10-04,MCD,0.011533,0.281528,6.97461,0.330924,71.832731
2024-10-04,HLT,0.013758,0.279631,19.619638,0.497269,72.874317


In [88]:
top_long_stocks[['ticker','realized_volatility_rank','detrended_vol','Short Reversal (%)', 'SR_rank', 'final_rank']]

Unnamed: 0,ticker,realized_volatility_rank,detrended_vol,Short Reversal (%),SR_rank,final_rank
2024-10-04,UAL,459.0,3835090.0,0.0,0.0,3.25
2024-10-04,VST,494.0,4590173.0,0.0,0.0,4.5
2024-10-04,TSLA,484.0,8126507.0,0.0,0.0,6.0
2024-10-04,ALB,489.0,2266610.0,0.0,0.0,7.5
2024-10-04,CCL,452.0,2442893.0,0.0,0.0,11.25
2024-10-04,AMD,469.0,10684620.0,0.0,0.0,13.5
2024-10-04,MET,229.0,1648023.0,0.0,0.0,19.75
2024-10-04,WYNN,400.0,1182187.0,0.0,0.0,19.75
2024-10-04,LUV,406.0,552710.0,0.0,0.0,25.5
2024-10-04,WMB,147.0,2572330.0,0.0,0.0,26.25


In [None]:
import warnings
warnings.filterwarnings("ignore")

import yfinance as yf
import pandas as pd
import numpy as np
import datetime

def get_sp500_tickers():
    table = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')
    tickers = table[0]['Symbol'].tolist()
    return tickers

def download_data(tickers, start, end):
    data = yf.download(tickers, start=start, end=end, group_by='ticker')
    #data = yf.download(tickers, start=start, end=end, interval='1h', group_by='ticker')
    
    return data

def calculate_features(stock_data):
    result = pd.DataFrame()

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()
        stock.loc[:, 'return'] = stock['Adj Close'].pct_change()
        stock.loc[:, 'log_return'] = np.log(stock['Adj Close'] / stock['Adj Close'].shift(1))
        stock.loc[:, 'volume_ma'] = stock['Volume'].rolling(window=30).mean()
        stock.loc[:, 'detrended_vol'] = stock['Volume'] - stock['volume_ma']
        stock.loc[:, 'log_return_mean'] = stock['log_return'].rolling(window=252).mean()
        stock.loc[:, 'demeaned_log_return'] = stock['log_return'] - stock['log_return_mean']
        stock.loc[:, 'momentum'] = (stock['Adj Close'] / stock['Adj Close'].shift(30)) - 1
        stock.loc[:, 'ticker'] = ticker
        stock.loc[:, 'date'] = stock.index  
        
        stock.loc[:, 'realized_variance'] = stock['return'].rolling(window=63).var()
        stock.loc[:, 'realized_volatility'] = np.sqrt(stock['realized_variance'])  

        min_detrended = stock['detrended_vol'].min()
        max_detrended = stock['detrended_vol'].max()
        stock.loc[:, 'normalized_detrended_vol'] = (stock['detrended_vol'] - min_detrended) / (max_detrended - min_detrended)

        result = pd.concat([result, stock[['date', 'momentum', 'detrended_vol', 'normalized_detrended_vol', 'demeaned_log_return', 'realized_volatility', 'ticker']]])

    return result

def analyze_short_reversals_on_date(stock_data, specific_date):
    short_reversal_results = []
    specific_date = pd.to_datetime(specific_date).strftime('%Y-%m-%d')

    for ticker in stock_data.columns.levels[0]:
        stock = stock_data[ticker].copy()
        
        if specific_date in stock.index:
            latest_index = stock.index.get_loc(specific_date)
            
            if latest_index >= 30:
                adj_close = stock['Adj Close']
                volume = stock['Volume']
                volume_ma = volume.rolling(window=30).mean()
                price_increase = adj_close.iloc[latest_index] - adj_close.iloc[latest_index - 30]
                current_volume = volume.iloc[latest_index]
                volume_moving_avg = volume_ma.iloc[latest_index]
                
                if price_increase > 0:  
                    if current_volume > volume_moving_avg:
                        short_reversal_percent = 0.0  
                    else:
                        short_reversal_percent = ((volume_moving_avg - current_volume) / volume_moving_avg) * 100
                else:
                    short_reversal_percent = 0.0  

                short_reversal_results.append({
                    'ticker': ticker,
                    'Short Reversal (%)': short_reversal_percent,
                })
            else:
                print(f"mb data is missing for {ticker} before {specific_date}.")
        else:
            print(f"{ticker} does not have data for {specific_date}.")

    return pd.DataFrame(short_reversal_results)

def rank_stocks(stock_data):
    stock_data = stock_data.dropna()

    if 'date' not in stock_data.columns:
        stock_data = stock_data.reset_index()  

    stock_data['momentum_rank'] = stock_data.groupby('date')['momentum'].rank(ascending=False)
    stock_data['detrended_vol_rank'] = stock_data.groupby('date')['detrended_vol'].rank(ascending=False)
    stock_data['demeaned_log_return_rank'] = stock_data.groupby('date')['demeaned_log_return'].rank(ascending=False)
    stock_data['normalized_detrended_vol_rank'] = stock_data.groupby('date')['normalized_detrended_vol'].rank(ascending=False)
    stock_data['realized_volatility_rank'] = stock_data.groupby('date')['realized_volatility'].rank(ascending=True)  # Lower volatility is better
    stock_data['SR_rank'] = stock_data['Short Reversal (%)'].rank(ascending=True)
    stock_data['SR_rank'] = (stock_data['SR_rank'] - stock_data['SR_rank'].min()) / (stock_data['SR_rank'].max() - stock_data['SR_rank'].min())

    stock_data['final_rank'] = stock_data[['momentum_rank', 'detrended_vol_rank', 'demeaned_log_return_rank','SR_rank']].mean(axis=1)
    stock_data['norm_final_rank'] = stock_data[['momentum_rank', 'normalized_detrended_vol_rank', 'realized_volatility_rank','SR_rank']].mean(axis=1)

    ranked_stocks = stock_data.sort_values(by=['date', 'final_rank'], ascending=[True, True])
    ranked_stocks_normalized = stock_data.sort_values(by=['date', 'norm_final_rank'], ascending=[True, True])
    
    return ranked_stocks, ranked_stocks_normalized

if __name__ == "__main__":
    tickers = get_sp500_tickers()
    
    #тут менять даты
    start_date = '2021-01-01'
    end_date = '2024-10-05'
    #последняя доступная дата для проверки шорт реверса
    specific_date = '2024-10-04'
    
    stock_data = download_data(tickers, start=start_date, end=end_date)
    short_reversal_analysis = analyze_short_reversals_on_date(stock_data, specific_date)
    
    stock_features = calculate_features(stock_data)
    stock_features = stock_features.merge(short_reversal_analysis,on = 'ticker', how = 'left').reset_index(drop=True)
    
    ranked_stocks, ranked_stocks_normalized = rank_stocks(stock_features)    
    latest_date = ranked_stocks['date'].max()
    
    top_long_stocks = ranked_stocks[ranked_stocks['date'] == latest_date].head(20)
    top_long_stocks.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks[['ticker','realized_volatility_rank','detrended_vol', 'SR_rank', 'final_rank']])
    
    top_long_stocks_normalized = ranked_stocks_normalized[ranked_stocks_normalized['date'] == latest_date].head(20)
    top_long_stocks_normalized.index = [latest_date] * len(top_long_stocks)
    #print(top_long_stocks_normalized[['ticker', 'momentum','realized_volatility_rank','normalized_detrended_vol', 'norm_final_rank']])
    #top_long_stocks.to_csv(f'top_long_stocks_{latest_date}.csv', index=False)
