optimise the value of closness and/or make pair selection better

In [188]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from itertools import combinations
import tqdm
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
import warnings
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.sandbox.stats.runs import runstest_1samp
from scipy.stats import shapiro, anderson, kstest
import numpy as np
from statsmodels.stats.diagnostic import het_arch


data = pd.read_csv('data/daily_close.csv', index_col=[0])
X = data['ADANIENT']

# pair selection test

## test for stationarity (constant mean and variance)

| Test                              | Purpose                                         | Code                      |
| --------------------------------- | ----------------------------------------------- | ------------------------- |
| **Augmented Dickey-Fuller (ADF)** | Check for **unit root** (i.e. non-stationarity) | `adfuller(X)`             |
| **KPSS Test** (optional)          | Confirm stationarity from the other side        | `kpss(X, regression='c')` |

In [189]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    print("ADF p-value:", adfuller(X)[1])
    print("KPSS p-value:", kpss(X, regression='c')[1])

ADF p-value: 0.461900433557456
KPSS p-value: 0.01


## test for autocorrelation / independence

| Test               | Purpose                                 | Code                                           |
| ------------------ | --------------------------------------- | ---------------------------------------------- |
| **Ljung–Box Test** | Check if residuals are white noise      | `acorr_ljungbox(X, lags=[10], return_df=True)` |
| **Runs Test**      | Check for randomness above/below median | `runstest_1samp(X)`                            |


In [190]:
print("Ljung-Box p-value:", acorr_ljungbox(X, lags=[10], return_df=True)['lb_pvalue'].iloc[0])
print("Runs Test p-value:", runstest_1samp(X)[1])

Ljung-Box p-value: 0.0
Runs Test p-value: 2.812921911308517e-290


## test for normality of noise

| Test                        | Purpose                                  | Code                        |
| --------------------------- | ---------------------------------------- | --------------------------- |
| **Shapiro–Wilk Test**       | Tests normality (small samples)          | `shapiro(X)`                |
| **Anderson–Darling Test**   | More powerful than Shapiro               | `anderson(X, dist='norm')`  |
| **Kolmogorov–Smirnov Test** | Test against normal with est. parameters | `kstest((X - μ)/σ, 'norm')` |


In [191]:
print("Shapiro p-value:", shapiro(X)[1])
print("Anderson p-value:", anderson(X, dist='norm').significance_level[0])
mu, sigma = np.mean(X), np.std(X)
print("K-S p-value:", kstest((X - mu)/sigma, 'norm')[1])

Shapiro p-value: 1.954482567636484e-24
Anderson p-value: 15.0
K-S p-value: 2.7194915768252434e-18


## test for homoscedasticity (constant variance)

| Test          | Purpose                              | Code          |
| ------------- | ------------------------------------ | ------------- |
| **ARCH Test** | Checks if variance is time-dependent | `het_arch(X)` |

In [192]:
print("ARCH Test p-value:", het_arch(X)[1])

ARCH Test p-value: 2.604601729762402e-290


## filter
| Test                 | Desired Outcome                         |
| -------------------- | --------------------------------------- |
| ADF                  | **p < 0.05** (stationary)               |
| KPSS                 | **p > 0.05** (stationary)               |
| Ljung–Box            | **p > 0.05** (no autocorr)              |
| Runs Test            | **p > 0.05** (random signs)             |
| Shapiro/Anderson/K-S | **p > 0.05** (normality)                |
| ARCH                 | **p > 0.05** (no time-varying variance) |


In [193]:
def return_metric_given_spread(spread_X):
    X = spread_X
    p_values = []
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        p_values.append(("ADF", adfuller(X)[1], adfuller(X)[1] < 0.05))
        p_values.append(("KPSS", kpss(X, regression='c')[1], kpss(X, regression='c')[1] > 0.05))
        p_values.append(("Ljung-Box", acorr_ljungbox(X, lags=[10], return_df=True)['lb_pvalue'].iloc[0], acorr_ljungbox(X, lags=[10], return_df=True)['lb_pvalue'].iloc[0] > 0.05))
        p_values.append(("Runs Test", runstest_1samp(X)[1], runstest_1samp(X)[1] > 0.05))
        p_values.append(("Shapiro", shapiro(X)[1], shapiro(X)[1] > 0.05))
        p_values.append(("Anderson", anderson(X, dist='norm').significance_level[0], anderson(X, dist='norm').significance_level[0] > 0.05))
        mu, sigma = np.mean(X), np.std(X)
        p_values.append(("K-S", kstest((X - mu)/sigma, 'norm')[1], kstest((X - mu)/sigma, 'norm')[1] > 0.05))
        p_values.append(("ARCH Test", het_arch(X)[1], het_arch(X)[1] > 0.05))
    return p_values
    


In [194]:
def return_metric(ticker1, ticker2):
    S1 = data[:int(len(data)*0.7)][ticker1]
    S2 = data[:int(len(data)*0.7)][ticker2]
    X = sm.add_constant(S2)
    model = sm.OLS(S1, X).fit()
    spread_X = model.resid
    return return_metric_given_spread(spread_X)

In [195]:
def value(ticker1, ticker2):
    passes = 0;
    matrix = return_metric(ticker1, ticker2)
    for _, _, result in matrix:
        if matrix[0][2] == True:
            if result == True:
                passes+=1
    return passes

In [203]:
stock_pairs = list(combinations(data.columns, 2))
n = len(data.columns)
print(n*(n-1)/2)
len(stock_pairs)

1225.0


1225

In [197]:
value_matrix = pd.DataFrame(np.zeros((50, 50), dtype=int), columns=data.columns, index=data.columns)
list_of_pairs = []
for stock1, stock2 in tqdm.tqdm(stock_pairs):
    value_matrix.loc[stock1, stock2] = value(stock1, stock2)
    value_matrix.loc[stock2, stock1] = value_matrix.loc[stock1, stock2]
    if value_matrix.loc[stock1, stock2] > 0:
        list_of_pairs.append((stock1, stock2, value_matrix.loc[stock1, stock2]))

100%|██████████| 1225/1225 [01:31<00:00, 13.32it/s]


In [201]:
print([pair for pair in sorted(list_of_pairs, key=lambda x: x[2], reverse=True) if pair[2] >= 3])

[('ADANIPORTS', 'GRASIM', 4), ('ADANIPORTS', 'TATACONSUM', 4), ('ASIANPAINT', 'GRASIM', 4), ('ASIANPAINT', 'TATACONSUM', 4), ('BAJFINANCE', 'GRASIM', 4), ('BAJFINANCE', 'HINDALCO', 4), ('BAJFINANCE', 'TATAMOTORS', 4), ('BPCL', 'POWERGRID', 4), ('BHARTIARTL', 'POWERGRID', 4), ('BHARTIARTL', 'SUNPHARMA', 4), ('BHARTIARTL', 'TITAN', 4), ('EICHERMOT', 'ICICIBANK', 4), ('EICHERMOT', 'M&M', 4), ('HCLTECH', 'TATACONSUM', 4), ('HDFCBANK', 'JSWSTEEL', 4), ('HINDUNILVR', 'LT', 4), ('HINDUNILVR', 'NESTLEIND', 4), ('HINDUNILVR', 'TATAMOTORS', 4), ('HINDUNILVR', 'ULTRACEMCO', 4), ('NESTLEIND', 'SUNPHARMA', 4), ('SBILIFE', 'SBIN', 4), ('TCS', 'TATACONSUM', 4), ('ADANIPORTS', 'APOLLOHOSP', 3), ('ADANIPORTS', 'ASIANPAINT', 3), ('ADANIPORTS', 'BAJFINANCE', 3), ('ADANIPORTS', 'CIPLA', 3), ('ADANIPORTS', 'HINDALCO', 3), ('ADANIPORTS', 'JSWSTEEL', 3), ('APOLLOHOSP', 'GRASIM', 3), ('APOLLOHOSP', 'HINDALCO', 3), ('APOLLOHOSP', 'JSWSTEEL', 3), ('APOLLOHOSP', 'LTIM', 3), ('APOLLOHOSP', 'TCS', 3), ('ASIANPAINT


<div style="text-align: center">⁂</div>

[^1]: https://www.sbilife.co.in/en/learn/partners

[^2]: https://www.youtube.com/watch?v=Akfqq6NvqxY

[^3]: https://zerodha.com/varsity/chapter/the-pair-trade/

[^4]: https://economictimes.com/markets/stocks/news/market-trading-guide-ril-bpcl-among-5-stock-recommendations-for-monday/buy-reliance-industries-at-rs-3052/slideshow/113781544.cms

[^5]: https://www.business-standard.com/markets/news/rec-ntpc-powergrid-are-power-stocks-losing-steam-here-s-what-charts-say-125051400576_1.html

[^6]: https://en.wikipedia.org/wiki/SBI_Life_Insurance_Company

[^7]: https://www.bajajfinserv.in/bajaj-finserv-vs-bajaj-finance

[^8]: https://portfolioslab.com/tools/stock-comparison/BPCL.NS/RELIANCE.BO

[^9]: https://www.businesstoday.in/markets/company-stock/story/ntpc-vs-power-grid-shares-rise-outlook-target-price-349504-2022-10-11

[^10]: https://www.crisil.com/mnt/winshare/Ratings/RatingList/RatingDocs/SBILifeInsuranceCompanyLimited_May%2010_%202024_RR_342802.html

[^11]: https://upstox.com/news/market-news/stocks/how-bajaj-finance-and-bajaj-finserv-defied-market-slumps-to-deliver-consistent-growth/article-154913/

[^12]: https://www.ijfmr.com/papers/2024/2/15313.pdf

[^13]: https://adityatrading.in/posts/comparing-powergrid-and-ntpc-insights-for-investors/

[^14]: https://portfolioslab.com/tools/stock-comparison/INFY/TCS.NS

[^15]: https://enrichmoney.in/blog-article/pairs-trading

[^16]: https://www.moneycontrol.com/india/stockpricequote/lifehealth-insurance/sbilifeinsurancecompany/SLI03

[^17]: https://www.sbilife.co.in

[^18]: https://economictimes.indiatimes.com/sbi-life-insurance-company-ltd/stocks/companyid-2898.cms

[^19]: https://www.cnbctv18.com/market/investic-initiates-fast-pair-trade-long-sbi-life-short-icici-prudential-life-19539593.htm

[^20]: https://www.equitypandit.com/sbi-life-share-price/

[^21]: https://www.screener.in/company/SBILIFE/

[^22]: https://www.macroaxis.com/invest/pair-correlation/SBILIFE.NSE/MAXHEALTH.NSE/SBI-vs-Max

[^23]: https://www.business-standard.com/companies/news/tcs-partners-with-ai-firm-vianai-systems-founded-by-vishal-sikka-125041700941_1.html

[^24]: https://ticker.finology.in/company/SBILIFE

[^25]: https://www.macroaxis.com/invest/marketCorrelation/SBILIFE.NSE--SBI-Life-Insurance

[^26]: https://www.macroaxis.com/invest/pair-correlation/SBILIFE.NSE/21STCENMGM.NSE/SBI-vs-21st

[^27]: https://www.macroaxis.com/invest/pair-correlation/EEW.STU/A4XA.F/SBI-vs-American

[^28]: https://mvgrexams.com/video/Macro-Trends-and-Their-Impact-on-SBI-Life-Insurance-Company-Limited-(540719)

[^29]: https://www.tijorifinance.com/company/sbi-life-insurance-company-limited/

[^30]: https://www.smart-investing.in/main.php?Company=BAJAJ+FINANCE+LTD

[^31]: https://in.tradingview.com/symbols/BSE-SBILIFE/ideas/

[^32]: https://www.tradingview.com/symbols/NSE-SBILIFE/ideas/

[^33]: https://www.sbilife.co.in/en/individual-life-insurance/savings-plans

[^34]: https://in.investing.com/equities/sbi-life-insurance

SBILIFE and SBIN

BAJFINANCE and BAJAJFINSV

INFY and TCS

RELIANCE and BPCL

POWERGRID and NTPC

In [None]:

def dax_retail_state_handler(self, retail_state: RS) -> float:
        ask_res_5 = sum(retail_state.ask_volumes[:5])
        bid_res_5 = sum(retail_state.bid_volumes[:5])
        net_res = bid_res_5 - ask_res_5
        
        price_changes = [retail_state.ask_prices[i+1] - retail_state.ask_prices[i] 
                         for i in range(len(retail_state.ask_prices) - 1)]
        mean_change = sum(price_changes) / len(price_changes)
        variance = sum((x - mean_change) ** 2 for x in price_changes) / (len(price_changes) - 1)
        std_dev_of_ask_changes = variance ** 0.5
        
        
        price_changes_2 = [retail_state.bid_prices[i+1] - retail_state.bid_prices[i] 
                         for i in range(len(retail_state.bid_prices) - 1)]
        mean_change_2 = sum(price_changes_2) / len(price_changes_2)
        variance_2 = sum((x - mean_change_2) ** 2 for x in price_changes_2) / (len(price_changes_2) - 1)
        std_dev_of_bid_changes = variance_2 ** 0.5
        
        mid_price = (retail_state.best_bid_price + retail_state.best_ask_price)/2
        
        if net_res > 0:
            max_std_div = retail_state.ask_prices[4] - retail_state.ask_prices[0]
            if max_std_div == 0:
                max_std_div = 1e-6

            price_diff = retail_state.ask_prices[-1] - retail_state.ask_prices[0]
            self.valuation = mid_price + std_dev_of_ask_changes * (price_diff / max_std_div)
    
        elif net_res < 0:
            max_std_div = retail_state.bid_prices[4] - retail_state.bid_prices[0]
            if max_std_div == 0:
                max_std_div = 1e-6

            price_diff = retail_state.bid_prices[-1] - retail_state.bid_prices[0]
            self.valuation = mid_price - std_dev_of_bid_changes * (price_diff / max_std_div)
        
        elif net_res == 0:
            self.valuation = mid_price
        
        
        return self.valuation

In [None]:
class energy_math(Strategy):
    def __init__(self):
        self.valuation = 0
        self.last_traded_price = 0

    def dax_retail_state_handler(self, retail_state: RS) -> float:
        ask_res_5 = sum(retail_state.ask_volumes[:5])
        bid_res_5 = sum(retail_state.bid_volumes[:5])
        net_res = bid_res_5 - ask_res_5
        
        price_changes = [retail_state.ask_prices[i+1] - retail_state.ask_prices[i] 
                         for i in range(len(retail_state.ask_prices) - 1)]
        mean_change = sum(price_changes) / len(price_changes)
        variance = sum((x - mean_change) ** 2 for x in price_changes) / (len(price_changes) - 1)
        std_dev_of_ask_changes = variance ** 0.5
        
        
        price_changes_2 = [retail_state.bid_prices[i+1] - retail_state.bid_prices[i] 
                         for i in range(len(retail_state.bid_prices) - 1)]
        mean_change_2 = sum(price_changes_2) / len(price_changes_2)
        variance_2 = sum((x - mean_change_2) ** 2 for x in price_changes_2) / (len(price_changes_2) - 1)
        std_dev_of_bid_changes = variance_2 ** 0.5
        
        mid_price = (retail_state.best_bid_price + retail_state.best_ask_price)/2
        
        if net_res > 0:
            max_std_div = retail_state.ask_prices[4] - retail_state.ask_prices[0]
            if max_std_div == 0:
                max_std_div = 1e-6

            price_diff = retail_state.ask_prices[-1] - retail_state.ask_prices[0]
            self.valuation = mid_price + std_dev_of_ask_changes * (price_diff / max_std_div)
    
        elif net_res < 0:
            max_std_div = retail_state.bid_prices[4] - retail_state.bid_prices[0]
            if max_std_div == 0:
                max_std_div = 1e-6

            price_diff = retail_state.bid_prices[-1] - retail_state.bid_prices[0]
            self.valuation = mid_price - std_dev_of_bid_changes * (price_diff / max_std_div)
        
        elif net_res == 0:
            self.valuation = mid_price
        
        
        return self.valuation

    def dax_last_done_handler(self, last_done: LD) -> Tuple[float, int]:
        self.last_traded_price = last_done.price
        return self.valuation, 0 # dax_last_done_handler returns two values, first is the valuation

```math
\text{ask\_res}_5 = \sum_{i=0}^{4} V^a_i
```

```math
\text{bid\_res}_5 = \sum_{i=0}^{4} V^b_i
```

```math
\text{net\_res} = \text{bid\_res}_5 - \text{ask\_res}_5
```

```math
\Delta A_i = A_{i+1} - A_i
```

```math
\mu_a = \frac{1}{n-1} \sum_{i=0}^{n-2} \Delta A_i
```

```math
\sigma_a = \sqrt{ \frac{1}{n-2} \sum_{i=0}^{n-2} (\Delta A_i - \mu_a)^2 }
```

```math
\Delta B_i = B_{i+1} - B_i
```

```math
\mu_b = \frac{1}{n-1} \sum_{i=0}^{n-2} \Delta B_i
```

```math
\sigma_b = \sqrt{ \frac{1}{n-2} \sum_{i=0}^{n-2} (\Delta B_i - \mu_b)^2 }
```

```math
\text{mid\_price} = \frac{A_0 + B_0}{2}
```

```math
\text{If } \text{net\_res} > 0:
\quad
\text{valuation} = \text{mid\_price} + \sigma_a \cdot \left( \frac{A_{n-1} - A_0}{A_4 - A_0 + \varepsilon} \right)
```

```math
\text{If } \text{net\_res} < 0:
\quad
\text{valuation} = \text{mid\_price} - \sigma_b \cdot \left( \frac{B_{n-1} - B_0}{B_4 - B_0 + \varepsilon} \right)
```
