# Get VN Data


In [None]:
import pandas as pd
import yfinance as yf
from vnstock import Vnstock
from typing import List, Dict, Tuple
import numpy as np
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.stattools import adfuller
import os
import pandas as pd
from statsmodels.tsa.api import AutoReg
from typing import Dict
from scipy.stats import pearsonr
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.stattools import adfuller
from scipy.stats import pearsonr
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
import statsmodels.api as sm
import os
from vnstock import Vnstock  # Assuming this is the correct import for your data source

In [23]:
def get_stock_data(symbols, start_date, end_date, interval='1D'):
    stock_data = pd.DataFrame()  # Initialize empty DataFrame
    for symbol in symbols:
        # Fetch historical data for the symbol
        stock = Vnstock().stock(symbol=symbol, source='VCI')
        historical_data = stock.quote.history(
            start=start_date, 
            end=end_date, 
            interval=interval
        )
        # Set 'time' as the index and keep only the 'Close' column
        close_prices = historical_data[['close', 'time']].set_index('time')
        close_prices = close_prices.rename(columns={'close': symbol})  # Rename column to symbol
        # Concatenate with the main DataFrame
        stock_data = pd.concat([stock_data, close_prices], axis=1).dropna()
    return stock_data

In [None]:
class DataHandler:
    def __init__(self, futures, stocks, start_date, end_date, file_path, 
                 estimation_window=60, cluster_update_interval=5, 
                 futures_change_threshold=0.05, max_clusters=12):
        """Initialize the data handler for statistical arbitrage strategy.
        
        Parameters:
        - futures: Futures symbol (e.g., VN30F1M)
        - stocks: List of stock symbols
        - start_date, end_date: Date range for data
        - file_path: Path to store/load data
        - estimation_window: Window size for rolling analysis
        - cluster_update_interval: Days between clustering updates
        - futures_change_threshold: Threshold for futures price change to trigger re-clustering
        - max_clusters: Maximum number of clusters for K-means
        """
        self.futures = futures
        self.stocks = stocks
        self.start_date = start_date
        self.end_date = end_date
        self.file_path = file_path
        self.estimation_window = estimation_window
        self.cluster_update_interval = cluster_update_interval
        self.futures_change_threshold = futures_change_threshold
        self.max_clusters = max_clusters
        self.data = self.load_data()
        self.last_clusters = None
        self.last_cluster_day = None
        self.last_futures_price = None
        
    def load_data(self):
        """Load stock data from file or fetch if not available."""
        if os.path.exists(self.file_path):
            data = pd.read_csv(self.file_path)
        else:
            data = self.get_stock_data([self.futures] + self.stocks, self.start_date, self.end_date, '1D')
            data.to_csv(self.file_path, index=True)
        data['Date'] = pd.to_datetime(data['time'])
        return data.set_index('Date').dropna()

    def compute_residuals(self, window_data):
        """Compute residuals from regressing each stock against the futures."""
        residuals = pd.DataFrame(index=window_data.index)
        for stock in self.stocks:
            if stock in window_data.columns:
                X = sm.add_constant(window_data[self.futures])
                y = window_data[stock]
                model = sm.OLS(y, X).fit()
                residuals[stock] = model.resid
        return residuals.dropna()

    def cluster_stocks(self, window_data, current_day, futures_current_price):
        """Cluster stocks, updating only every cluster_update_interval days or if futures changes significantly."""
        if self.last_clusters is not None and self.last_cluster_day is not None:
            days_since_last_cluster = (current_day - self.last_cluster_day).days
            futures_change = abs(futures_current_price - self.last_futures_price) / self.last_futures_price if self.last_futures_price else 0
            if days_since_last_cluster < self.cluster_update_interval and futures_change < self.futures_change_threshold:
                return self.last_clusters
        residuals = self.compute_residuals(window_data)
        if residuals.empty or len(residuals.columns) < 2:
            self.last_clusters = [self.stocks]
        else:
            X = residuals.T
            best_k = 2
            best_score = -1
            for k in range(2, min(self.max_clusters + 1, len(self.stocks))):
                kmeans = KMeans(n_clusters=k, random_state=0).fit(X)
                if kmeans.n_clusters > 1:
                    score = silhouette_score(X, kmeans.labels_)
                    if score > best_score:
                        best_score = score
                        best_k = k
            kmeans = KMeans(n_clusters=best_k, random_state=0).fit(X)
            clusters = {i: [] for i in range(best_k)}
            for stock, label in zip(self.stocks, kmeans.labels_):
                clusters[label].append(stock)
            self.last_clusters = [cluster for cluster in clusters.values() if cluster]
        self.last_cluster_day = current_day
        self.last_futures_price = futures_current_price
        return self.last_clusters

class StatArbStrategy:    
    def __init__(self, data_handler, min_trading_days=30, threshold=0.05,
                 max_stocks=8, confidence_level=1, adf_significance=0.05,
                 correlation_threshold=0.6, dynamic_threshold=True,
                 residual_threshold=0.1):
        """Initialize the statistical arbitrage strategy.
        
        Parameters:
        - data_handler: Instance of DataHandler for data management
        - min_trading_days: Minimum days a combination must be traded
        - threshold: Minimum beta threshold for inclusion
        - max_stocks: Maximum stocks in a combination
        - confidence_level: Confidence level for Johansen test (0=90%, 1=95%, 2=99%)
        - adf_significance: Significance level for ADF test
        - correlation_threshold: Threshold for residual similarity
        - dynamic_threshold: Whether to adjust correlation threshold dynamically
        - residual_threshold: Max allowed residual as fraction of futures value
        """
        self.data_handler = data_handler
        self.futures = data_handler.futures
        self.stocks = data_handler.stocks
        self.estimation_window = data_handler.estimation_window
        self.data = data_handler.data
        self.min_trading_days = min_trading_days
        self.threshold = threshold
        self.max_stocks = max_stocks
        self.confidence_level = confidence_level
        self.confidence_level_joh_final = min(2,confidence_level + 1)  # Derived
        self.adf_significance = adf_significance
        self.adf_significance_trading = min(0.1, 2 * adf_significance)  # Derived
        self.correlation_threshold = correlation_threshold
        self.dynamic_threshold = dynamic_threshold
        self.residual_threshold = residual_threshold
        self.active_combinations = []
        self.combination_id = 0
        self.results = []
        self.validation_cache = {}

    def get_pairwise_candidates(self, window_data, stocks_pool):
        """Perform pairwise Johansen tests on a pool of stocks."""
        candidates = []
        for stock in stocks_pool:
            try:
                result = coint_johansen(window_data[[self.futures, stock]], det_order=1, k_ar_diff=1)
                if result.lr1[0] > result.cvt[0, self.confidence_level]:
                    candidates.append((stock, result.lr1[0]))
            except Exception as e:
                print(f"Pairwise test failed for {stock}: {e}")
        candidates.sort(key=lambda x: x[1], reverse=True)
        return [stock for stock, _ in candidates]

    def build_combination_greedy(self, window_data, candidates):
        """Greedily build a cointegrated combination from candidates with early stopping."""
        if not candidates:
            return []
        selected = [candidates[0]]
        best_trace_stat = coint_johansen(window_data[[self.futures, selected[0]]], det_order=1, k_ar_diff=1).lr1[0]
        for stock in candidates[1:]:
            if len(selected) >= self.max_stocks:
                break
            test_subset = selected + [stock]
            try:
                result = coint_johansen(window_data[[self.futures] + test_subset], det_order=1, k_ar_diff=1)
                if result.lr1[0] <= result.cvt[0, self.confidence_level]:
                    continue
                improvement = (result.lr1[0] - best_trace_stat) / best_trace_stat
                if improvement < 0.05:  # Early stopping if improvement < 5%
                    break
                evec = result.evec[:, 0]
                betas = -evec[1:] / evec[0]
                if not all(beta >= 0 for beta in betas):
                    continue
                selected.append(stock)
                best_trace_stat = result.lr1[0]
            except Exception as e:
                print(f"Combination test failed: {e}")
        return selected

    def validate_combination(self, window_data, selected):
        """Validate the combination with caching and residual constraints."""
        comb_key = frozenset(selected)
        if comb_key in self.validation_cache:
            return self.validation_cache[comb_key]
        try:
            # Johansen test with restricted intercept
            result = coint_johansen(window_data[[self.futures] + list(selected)], det_order=1, k_ar_diff=1)
            if result.lr1[0] <= result.cvt[0, self.confidence_level_joh_final]:
                self.validation_cache[comb_key] = (None, np.inf)
                return None, np.inf
            evec = result.evec[:, 0]
            betas = -evec[1:] / evec[0]
            if not all(beta >= 0 for beta in betas):
                self.validation_cache[comb_key] = (None, np.inf)
                return None, np.inf
            
            # Compute residuals with Johansen betas
            synthetic_portfolio = sum(window_data[s] * b for s, b in zip(selected, betas))
            residuals = window_data[self.futures] - synthetic_portfolio
            
            # Estimate the intercept (mean of residuals, negated to match cointegration equation)
            intercept = -residuals.mean()
            
            # Verify stationarity
            adf_pvalue = adfuller(residuals)[1]
            if adf_pvalue >= self.adf_significance:
                self.validation_cache[comb_key] = (None, adf_pvalue)
                return None, adf_pvalue
            
            # Check residual magnitude using 95th percentile
            futures_avg = window_data[self.futures].mean()
            if np.percentile(np.abs(residuals), 95) > self.residual_threshold * futures_avg:
                self.validation_cache[comb_key] = (None, adf_pvalue)
                return None, adf_pvalue
            
            # Filter betas by threshold
            selected_betas = {s: b for s, b in zip(selected, betas) if abs(b) > self.threshold}
            
            # Package betas and intercept together
            combination_params = {'intercept': intercept, 'betas': selected_betas}
            self.validation_cache[comb_key] = (combination_params, adf_pvalue)
            return combination_params, adf_pvalue
        except Exception as e:
            print(f"Validation failed for {selected}: {e}")
            self.validation_cache[comb_key] = (None, np.inf)
            return None, np.inf

    def is_similar(self, new_residuals, existing_residuals):
        """Check if two residual series are highly correlated."""
        if len(new_residuals) != len(existing_residuals):
            return False
        corr, _ = pearsonr(new_residuals, existing_residuals)
        return corr > self.correlation_threshold

    def adjust_correlation_threshold(self):
        """Dynamically adjust correlation threshold based on active combinations."""
        if self.dynamic_threshold:
            if len(self.active_combinations) < 10:
                self.correlation_threshold = min(0.8, self.correlation_threshold + 0.05)
            else:
                self.correlation_threshold = max(0.5, self.correlation_threshold - 0.05)

    def run_strategy(self):
        """Execute the rolling statistical arbitrage strategy."""
        for day in range(self.estimation_window, len(self.data)):
            estimation_data = self.data.iloc[day - self.estimation_window:day]
            current_day = self.data.index[day]
            futures_current_price = self.data.iloc[day][self.futures]
            self.adjust_correlation_threshold()
            clusters = self.data_handler.cluster_stocks(estimation_data, current_day, futures_current_price)

            # Within-cluster combinations
            for cluster in clusters:
                candidates = self.get_pairwise_candidates(estimation_data, cluster)
                selected = self.build_combination_greedy(estimation_data, candidates)
                if selected:
                    params, new_adf_pvalue = self.validate_combination(estimation_data, selected)
                    if params:
                        self.add_combination_if_not_similar(params, new_adf_pvalue, estimation_data, current_day)

            # Top candidates across clusters
            top_candidates = []
            for cluster in clusters:
                cluster_candidates = self.get_pairwise_candidates(estimation_data, cluster)[:3]  # Top 3 per cluster
                top_candidates.extend(cluster_candidates)
            top_candidates = list(set(top_candidates))  # Remove duplicates

            if top_candidates:
                cross_selected = self.build_combination_greedy(estimation_data, top_candidates)
                if cross_selected:
                    cross_params, cross_adf_pvalue = self.validate_combination(estimation_data, cross_selected)
                    if cross_params:
                        self.add_combination_if_not_similar(cross_params, cross_adf_pvalue, estimation_data, current_day)

            # Cross-cluster combinations
            all_candidates = self.get_pairwise_candidates(estimation_data, self.stocks)
            cross_selected = self.build_combination_greedy(estimation_data, all_candidates)
            if cross_selected:
                cross_params, cross_adf_pvalue = self.validate_combination(estimation_data, cross_selected)
                if cross_params:
                    self.add_combination_if_not_similar(cross_params, cross_adf_pvalue, estimation_data, current_day)

            # Evaluate active combinations
            for comb in self.active_combinations[:]:
                if day < comb['start_day']:
                    continue
                comb['trading_days'] += 1
                current_prices = self.data.iloc[day]
                synthetic_portfolio = sum(current_prices[s] * b for s, b in comb['params']['betas'].items())
                residual = current_prices[self.futures] - (comb['params']['intercept'] + synthetic_portfolio)
                comb['all_residuals'].append(residual)
                if comb['trading_days'] >= self.min_trading_days:
                    recent_residuals = pd.Series(comb['all_residuals'][-self.estimation_window:])
                    if adfuller(recent_residuals)[1] >= self.adf_significance_trading:
                        self.active_combinations.remove(comb)
                        continue
                row = {
                    'Date': current_day,
                    'Combination_ID': comb['id'],
                    'Residual': residual,
                    'Total_Combinations': len(self.active_combinations),
                    'Num_Stocks': len(comb['params']['betas']),
                    'Is_Estimation': False,
                    'Intercept': comb['params']['intercept'],
                    **{f'Beta_{s}': b for s, b in comb['params']['betas'].items()}
                }
                self.results.append(row)

    def add_combination_if_not_similar(self, params, new_adf_pvalue, estimation_data, current_day):
        """Add a new combination if it's not similar to existing ones."""
        synthetic_portfolio = sum(estimation_data[s] * b for s, b in params['betas'].items())
        residuals = estimation_data[self.futures] - (params['intercept'] + synthetic_portfolio)
        similar_found = False
        to_remove = []
        for comb in self.active_combinations:
            existing_residuals = pd.Series(comb['all_residuals'][-self.estimation_window:])
            if self.is_similar(residuals, existing_residuals):
                if comb['trading_days'] >= self.min_trading_days:
                    existing_adf_pvalue = adfuller(existing_residuals)[1]
                    if new_adf_pvalue < 0.5 * existing_adf_pvalue:
                        to_remove.append(comb)
                else:
                    similar_found = True
        for comb in to_remove:
            self.active_combinations.remove(comb)
        if not similar_found:
            self.combination_id += 1
            self.active_combinations.append({
                'id': self.combination_id,
                'params': params,
                'start_day': self.data.index.get_loc(current_day),
                'all_residuals': residuals.tolist(),
                'trading_days': 0
            })
            for i, res in enumerate(residuals):
                row = {
                    'Date': estimation_data.index[i],
                    'Combination_ID': self.combination_id,
                    'Residual': res,
                    'Total_Combinations': len(self.active_combinations),
                    'Num_Stocks': len(params['betas']),
                    'Is_Estimation': True,
                    'Intercept': params['intercept'],
                    **{f'Beta_{s}': b for s, b in params['betas'].items()}
                }
                self.results.append(row)
            print(f"\n=== New Combination {self.combination_id} at {current_day.date()} ===")
            print(f"VN30F1M = {params['intercept']:.3f} + " + " + ".join([f"{b:.3f}*{s}" for s, b in params['betas'].items()]))

    def get_results(self):
        """Return the results as a DataFrame."""
        self.results = pd.DataFrame(self.results)
        self.results = self.results.sort_values(by=['Combination_ID', 'Date'])
        return self.results

In [80]:
futures = 'VN30F1M'
stocks = ['ACB', 'BCM', 'BID', 'BVH', 'CTG', 'FPT', 'GAS', 'GVR', 'HDB', 'HPG', 'LPB', 'MBB', 'MSN', 'MWG',
          'PLX', 'SAB', 'SHB', 'SSI', 'STB', 'TCB', 'TPB', 'VCB', 'VHM', 'VIB', 'VIC', 'VJC', 'VNM', 'VRE',
          'VPB', 'FUEVFVND', 'FUESSVFL', 'E1VFVN30', 'FUEVN100']
start_date = '2024-01-01'
end_date = '2024-12-31'
file_path = 'data\\stock_data.csv'
data_handler = DataHandler(
    futures=futures,
    stocks=stocks, 
    start_date='2024-01-01',
    end_date='2025-01-01',
    file_path='stock_data.csv'
)

strategy = StatArbStrategy(data_handler)
strategy.run_strategy()
results_df = strategy.get_results()


=== New Combination 1 at 2024-04-03 ===
VN30F1M = 15.301 + 47.852*E1VFVN30 + 0.734*FUEVFVND + 0.930*ACB + 1.151*VJC + 1.465*VIB + 0.665*VCB

=== New Combination 2 at 2024-04-03 ===
VN30F1M = 105.392 + 41.458*E1VFVN30 + 1.776*FUEVFVND + 3.007*ACB + 1.700*VJC + 2.054*VIB + 1.531*VCB + 1.021*GVR

=== New Combination 3 at 2024-04-11 ===
VN30F1M = 25.967 + 8.528*E1VFVN30 + 40.259*FUEVN100 + 13.644*FUESSVFL + 1.669*MBB + 6.633*TPB

=== New Combination 4 at 2024-04-12 ===
VN30F1M = 50.015 + 25.961*E1VFVN30 + 25.804*FUEVN100 + 14.477*FUESSVFL + 0.298*STB

=== New Combination 5 at 2024-04-17 ===
VN30F1M = -92.187 + 5.952*STB + 45.117*E1VFVN30

=== New Combination 6 at 2024-05-07 ===
VN30F1M = 19.666 + 56.159*E1VFVN30 + 1.830*STB

=== New Combination 7 at 2024-06-12 ===
VN30F1M = 3.054 + 51.318*E1VFVN30 + 3.116*VIC

=== New Combination 8 at 2024-06-18 ===
VN30F1M = 44.750 + 59.481*E1VFVN30 + 0.150*VPB

=== New Combination 9 at 2024-07-09 ===
VN30F1M = 118.605 + 36.221*E1VFVN30 + 9.531*MBB + 32.

In [81]:
results_df.to_csv('draft\\results.csv', index=False)

## Generating Trading Signal

In [110]:
class SignalGenerator:
    def __init__(self, residuals: pd.DataFrame, ou_window: int = 60, fallback_days: int = 5):
        self.residuals = residuals
        self.ou_window = ou_window
        self.fallback_days = fallback_days
        self.ou_params = None
        self.last_valid_params = {col: None for col in residuals.columns}
        self.ou_cache = {}  # Cache for OU parameters

    def fit_ou_process(self, series: pd.Series, date: pd.Timestamp) -> Dict[str, float]:
        """Fit an Ornstein-Uhlenbeck process to the series and compute the s-score."""
        cache_key = (series.name, date)
        if cache_key in self.ou_cache:
            return self.ou_cache[cache_key]
        if len(series) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        series_window = series[-self.ou_window:].dropna().to_numpy()
        if len(series_window) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        try:
            model = AutoReg(series_window, lags=1).fit()
            a, b = model.params
            p_value_b = model.pvalues[1]
            if p_value_b >= 0.10 or b <= 0 or b >= 1:
                return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
            kappa = -np.log(b) * np.sqrt(252)  # Annualized mean reversion rate
            m = a / (1 - b)  # Long-term mean
            sigma = np.sqrt(model.sigma2 * 2 * kappa / (1 - b**2))  # Volatility
            latest = series.iloc[-1]
            sigma_eq = sigma / np.sqrt(2 * kappa) if kappa > 0 else np.inf
            s_score = (latest - m) / sigma_eq if sigma_eq != 0 else 0
            params = {'kappa': kappa, 'm': m, 'sigma': sigma, 's_score': s_score}
            self.ou_cache[cache_key] = params
            return params
        except (ValueError, np.linalg.LinAlgError):
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}

    def apply_ou_fitting(self):
        """Apply OU fitting across all residuals and dates."""
        columns = pd.MultiIndex.from_product([self.residuals.columns, ['kappa', 'm', 'sigma', 's_score']])
        self.ou_params = pd.DataFrame(index=self.residuals.index, columns=columns)
        for t in range(self.ou_window, len(self.residuals)):
            date = self.residuals.index[t]
            for stock in self.residuals.columns:
                series = self.residuals[stock].iloc[:t + 1]
                params = self.fit_ou_process(series, date)
                if not np.isnan(params['kappa']):
                    self.last_valid_params[stock] = {'params': params, 'date': date}
                elif self.last_valid_params[stock] and (date - self.last_valid_params[stock]['date']).days <= self.fallback_days:
                    last_params = self.last_valid_params[stock]['params']
                    latest = series.iloc[-1]
                    m, kappa, sigma = last_params['m'], last_params['kappa'], last_params['sigma']
                    sigma_eq = sigma / np.sqrt(2 * kappa) if kappa > 0 else np.inf
                    params['s_score'] = (latest - m) / sigma_eq if sigma_eq != 0 else 0
                for param, value in params.items():
                    self.ou_params.loc[date, (stock, param)] = value

def get_allocation_tier(s_score: float, prev_allocation: float, prev_s_score: float, is_decreasing_trend: bool) -> float:
    """
    Determine allocation percentage based on s-score levels and trends.

    Args:
        s_score (float): Current s-score
        prev_allocation (float): Previous allocation percentage
        prev_s_score (float): Previous s-score
        is_decreasing_trend (bool): Whether the s-score is in a decreasing trend

    Returns:
        float: Allocation percentage between 0.0 and 1.25
    """
    if s_score > 2.0:
        return 0.0  # Cut loss
    elif s_score > 1.5 and s_score > prev_s_score:
        return 1.25  # Increase position if rising
    elif s_score > 1.25 and s_score > prev_s_score:
        return 1.0  # Increase position if rising
    elif s_score > 1.0:
        return 0.75  # Start position when s-score is high
    elif s_score < -1.5:
        return 0.0  # Full exit
    elif s_score < prev_s_score and prev_allocation > 0 and not is_decreasing_trend:
        # Take profit if decreasing but not in a continuous trend
        if 0.5 <= s_score <= 1.2:
            return max(0.4, prev_allocation - 0.2)  # Gradual reduction
        return prev_allocation
    return prev_allocation  # Hold if in decreasing trend or no significant change

def process_results_df(results_df: pd.DataFrame, stocks: list, ou_window: int = 60, lockup_days: int = 2) -> Tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Process results to generate s-scores, positions, and trading log with a lockup period.

    Args:
        results_df (pd.DataFrame): DataFrame with Date, Combination_ID, Residual, and Beta columns
        stocks (list): List of stock symbols
        ou_window (int): Window size for OU process fitting
        lockup_days (int): Minimum holding period for combinations

    Returns:
        Tuple containing:
            - results_df (pd.DataFrame): Updated with s_score and Allocation
            - positions_df (pd.DataFrame): Positions for VN30F1M and each stock
            - trading_log (pd.DataFrame): Detailed trading actions with amounts bought/sold
    """
    results_df = results_df.sort_values('Combination_ID')
    residuals_pivot = results_df.pivot(index='Date', columns='Combination_ID', values='Residual')

    # Fit OU process
    signal_gen = SignalGenerator(residuals_pivot, ou_window=ou_window)
    signal_gen.apply_ou_fitting()
    ou_params = signal_gen.ou_params

    # Generate allocation percentages with lockup and trend logic
    allocation_percentages = pd.DataFrame(index=ou_params.index, columns=residuals_pivot.columns, dtype=float).fillna(0.0)
    lockup_tracker = {comb_id: None for comb_id in residuals_pivot.columns}  # Entry date for lockup
    trend_tracker = {comb_id: False for comb_id in residuals_pivot.columns}  # Decreasing trend flag

    for comb_id in allocation_percentages.columns:
        s_scores = ou_params[(comb_id, 's_score')]
        prev_allocation = 0.0
        prev_s_score = np.nan
        for i, date in enumerate(s_scores.index):
            if i < ou_window:
                allocation = 0.0
            else:
                s_score = s_scores[date]
                if pd.isna(s_score) or pd.isna(residuals_pivot.loc[date, comb_id]):
                    allocation = 0.0
                else:
                    is_decreasing = s_score < prev_s_score if not pd.isna(prev_s_score) else False
                    trend_tracker[comb_id] = is_decreasing and trend_tracker[comb_id] if not pd.isna(prev_s_score) else False
                    intended_allocation = get_allocation_tier(s_score, prev_allocation, prev_s_score, trend_tracker[comb_id])
                    
                    # Enforce lockup period
                    if intended_allocation > 0:
                        if prev_allocation == 0:
                            lockup_tracker[comb_id] = date  # Start lockup
                        allocation = intended_allocation  # Take the intended position
                    elif intended_allocation == 0 and prev_allocation > 0:
                        if lockup_days > 0 and lockup_tracker[comb_id] and (date - lockup_tracker[comb_id]).days < lockup_days:
                            allocation = prev_allocation  # Hold due to lockup
                        else:
                            allocation = 0.0
                            lockup_tracker[comb_id] = None  # Reset lockup
                    else:
                        allocation = 0.0  # No position if intended_allocation == 0 and prev_allocation == 0
                    
                    # Update prev_s_score for next iteration
                    prev_s_score = s_score if not pd.isna(s_score) else prev_s_score
            allocation_percentages.loc[date, comb_id] = allocation
            prev_allocation = allocation

    # Update results_df
    results_df['s_score'] = results_df.apply(
        lambda row: ou_params.loc[row['Date'], (row['Combination_ID'], 's_score')]
        if row['Date'] in ou_params.index else np.nan, axis=1
    )
    results_df['Allocation'] = results_df.apply(
        lambda row: allocation_percentages.loc[row['Date'], row['Combination_ID']]
        if row['Date'] in allocation_percentages.index else 0.0, axis=1
    )

    # Compute positions_df with scaled allocations
    dates = results_df['Date'].unique()
    positions_df = pd.DataFrame(index=dates, dtype=float).fillna(0.0)
    trading_log = pd.DataFrame(index=dates, dtype=float)

    for date in dates:
        active_combs = allocation_percentages.loc[date][allocation_percentages.loc[date] > 0]
        num_active = len(active_combs)
        # Initialize scale_factor to a default value
        scale_factor = 1.0
        if num_active == 0:
            total_short = 0.0
        else:
            # Each combination gets an equal portfolio weight, scaled by intended allocation
            scale_factor = 1.0 / num_active if num_active > 0 else 1.0
            scaled_allocations = active_combs * scale_factor
            total_short = scaled_allocations.sum()
        
        positions_df.loc[date, 'Position_VN30F1M'] = -total_short if total_short > 0 else 0.0
        for stock in stocks:
            beta_col = f'Beta_{stock}'
            if beta_col in results_df.columns:
                active_rows = results_df[(results_df['Date'] == date) & (results_df['Allocation'] > 0)]
                positions_df.loc[date, f'Position_{stock}'] = (active_rows[beta_col] * active_rows['Allocation'] * scale_factor).sum()
            else:
                positions_df.loc[date, f'Position_{stock}'] = 0.0

    # Generate trading_log with portfolio allocation changes
    assets = ['VN30F1M'] + stocks
    for asset in assets:
        pos_col = f'Position_{asset}'
        trading_log[f'Delta_{asset}'] = positions_df[pos_col].diff().fillna(0.0)
        trading_log[f'Action_{asset}'] = np.where(trading_log[f'Delta_{asset}'] > 0, 'buy',
                                                  np.where(trading_log[f'Delta_{asset}'] < 0, 'sell', 'hold'))

    return results_df, positions_df, trading_log

In [111]:
results_df, positions_df, trading_log = process_results_df(results_df, stocks=stocks)

In [113]:
positions_df.to_csv('signal\\positions.csv')

In [114]:
results_df.to_csv('signal\\result.csv')

In [112]:
trading_log.to_csv('signal\\trading_log.csv')

# Porfolio Management
