# Get VN Data


In [None]:
import pandas as pd
import yfinance as yf
from vnstock import Vnstock
from typing import List, Dict, Tuple
import numpy as np
from statsmodels.tsa.vector_ar.vecm import coint_johansen
from statsmodels.tsa.stattools import adfuller
import os
import pandas as pd
from statsmodels.tsa.api import AutoReg
from typing import Dict
from scipy.stats import pearsonr

In [None]:
def get_stock_data(symbols, start_date, end_date, interval='1D'):
    stock_data = pd.DataFrame()  # Initialize empty DataFrame
    for symbol in symbols:
        # Fetch historical data for the symbol
        stock = Vnstock().stock(symbol=symbol, source='VCI')
        historical_data = stock.quote.history(
            start=start_date, 
            end=end_date, 
            interval=interval
        )
        # Set 'time' as the index and keep only the 'Close' column
        close_prices = historical_data[['close', 'time']].set_index('time')
        close_prices = close_prices.rename(columns={'close': symbol})  # Rename column to symbol
        # Concatenate with the main DataFrame
        stock_data = pd.concat([stock_data, close_prices], axis=1).dropna()
    return stock_data

In [None]:
class StatArbStrategy:
    def __init__(self, etf, stocks, start_date, end_date, file_path,
                 estimation_window=60, min_trading_days=30, threshold=0.05,
                 max_stocks=6, confidence_level=1, confidence_level_joh_final=2,
                 adf_significance=0.005, adf_significance_trading=0.05,
                 correlation_threshold=0.8):
        """Initialize the statistical arbitrage strategy with parameters."""
        self.etf = etf
        self.stocks = stocks
        self.start_date = start_date
        self.end_date = end_date
        self.file_path = file_path
        self.estimation_window = estimation_window
        self.min_trading_days = min_trading_days
        self.threshold = threshold
        self.max_stocks = max_stocks
        self.confidence_level = confidence_level
        self.confidence_level_joh_final = confidence_level_joh_final
        self.adf_significance = adf_significance
        self.adf_significance_trading = adf_significance_trading
        self.correlation_threshold = correlation_threshold  # New parameter for similarity filtering
        self.data = self.load_data()
        self.active_combinations = []
        self.combination_id = 0
        self.results = []

    def load_data(self):
        """Load stock data from file or fetch if not available."""
        if os.path.exists(self.file_path):
            data = pd.read_csv(self.file_path)
        else:
            data = get_stock_data([self.etf] + self.stocks, self.start_date, self.end_date, '1D')
            data.to_csv(self.file_path, index=True)
        data['Date'] = pd.to_datetime(data['time'])
        return data.set_index('Date')

    def get_pairwise_candidates(self, window_data):
        """Perform pairwise Johansen tests to identify cointegrated stocks."""
        candidates = []
        for stock in self.stocks:
            try:
                result = coint_johansen(window_data[[self.etf, stock]], det_order=0, k_ar_diff=1)
                if result.lr1[0] > result.cvt[0, self.confidence_level]:
                    candidates.append((stock, result.lr1[0]))
            except Exception as e:
                print(f"Pairwise test failed for {stock}: {e}")
        candidates.sort(key=lambda x: x[1], reverse=True)
        return [stock for stock, _ in candidates]

    def build_combination(self, window_data, filtered_stocks):
        """Incrementally build a cointegrated combination of stocks."""
        if not filtered_stocks:
            return []
        selected = [filtered_stocks[0]]
        best_trace_stat = coint_johansen(window_data[[self.etf, selected[0]]], det_order=0, k_ar_diff=1).lr1[0]

        for stock in filtered_stocks[1:]:
            if len(selected) >= self.max_stocks:
                break
            if stock in selected:
                continue
            test_subset = selected + [stock]
            try:
                result = coint_johansen(window_data[[self.etf] + test_subset], det_order=0, k_ar_diff=0)
                if result.lr1[0] <= result.cvt[0, self.confidence_level]:
                    continue
                evec = result.evec[:, 0]
                betas = -evec[1:] / evec[0]
                if not all(beta >= 0 for beta in betas) or result.lr1[0] <= best_trace_stat:
                    continue
                selected.append(stock)
                best_trace_stat = result.lr1[0]
            except Exception as e:
                print(f"Combination test failed: {e}")
        return selected

    def validate_combination(self, window_data, selected):
        """Validate the combination with stricter criteria and ADF test."""
        if not selected:
            return None, np.inf  # Return high p-value if no combination
        try:
            result = coint_johansen(window_data[[self.etf] + selected], det_order=0, k_ar_diff=1)
            if result.lr1[0] <= result.cvt[0, self.confidence_level_joh_final]:
                return None, np.inf
            evec = result.evec[:, 0]
            betas = -evec[1:] / evec[0]
            if not all(beta >= 0 for beta in betas):
                return None, np.inf
            selected_betas = {s: b for s, b in zip(selected, betas) if abs(b) > self.threshold}
            residuals = window_data[self.etf] - sum(window_data[s] * b for s, b in selected_betas.items())
            adf_pvalue = adfuller(residuals)[1]
            if adf_pvalue >= self.adf_significance:
                return None, adf_pvalue
            return selected_betas, adf_pvalue
        except Exception as e:
            print(f"Final validation failed: {e}")
            return None, np.inf

    def is_similar(self, new_residuals, existing_residuals):
        """Check if two residual series are highly correlated."""
        if len(new_residuals) != len(existing_residuals):
            return False  # Cannot compare if lengths differ
        corr, _ = pearsonr(new_residuals, existing_residuals)
        return corr > self.correlation_threshold

    def run_strategy(self):
        """Execute the rolling statistical arbitrage strategy with similarity filtering."""
        for day in range(self.estimation_window, len(self.data)):
            estimation_data = self.data.iloc[day - self.estimation_window:day]
            current_day = self.data.index[day]

            # Generate potential new combination
            filtered_stocks = self.get_pairwise_candidates(estimation_data)
            selected = self.build_combination(estimation_data, filtered_stocks)
            selected_betas, new_adf_pvalue = self.validate_combination(estimation_data, selected)

            if selected_betas:
                new_residuals = estimation_data[self.etf] - sum(
                    estimation_data[s] * b for s, b in selected_betas.items()
                )

                # Check similarity with active combinations
                similar_found = False
                for comb in self.active_combinations:
                    if self.is_similar(new_residuals, pd.Series(comb['all_residuals'][-self.estimation_window:])):
                        # If similar, compare ADF p-values
                        existing_adf_pvalue = adfuller(pd.Series(comb['all_residuals'][-self.estimation_window:]))[1]
                        if new_adf_pvalue < existing_adf_pvalue:
                            # Replace the existing combination with the new one
                            self.active_combinations.remove(comb)
                            break
                        else:
                            similar_found = True
                            break

                if not similar_found:
                    self.combination_id += 1
                    self.active_combinations.append({
                        'id': self.combination_id,
                        'betas': selected_betas,
                        'start_day': day,
                        'all_residuals': new_residuals.tolist(),
                        'trading_days': 0
                    })
                    for i, res in enumerate(new_residuals):
                        row = {
                            'Date': estimation_data.index[i],
                            'Combination_ID': self.combination_id,
                            'Residual': res,
                            'Total_Combinations': len(self.active_combinations),
                            'Num_Stocks': len(selected_betas),
                            'Is_Estimation': True,
                            **{f'Beta_{s}': b for s, b in selected_betas.items()}
                        }
                        self.results.append(row)
                    print(f"\n=== New Combination {self.combination_id} at {current_day.date()} ===")
                    print("VN30F1M = " + " + ".join([f"{b:.3f}*{s}" for s, b in selected_betas.items()]))

            # Evaluate active combinations
            for comb in self.active_combinations[:]:
                if day < comb['start_day']:
                    continue
                comb['trading_days'] += 1
                current_prices = self.data.iloc[day]
                residual = current_prices[self.etf] - sum(
                    current_prices[s] * b for s, b in comb['betas'].items()
                )
                comb['all_residuals'].append(residual)

                if comb['trading_days'] >= self.min_trading_days:
                    all_residuals_series = pd.Series(comb['all_residuals'])
                    if adfuller(all_residuals_series, autolag='AIC')[1] >= self.adf_significance_trading:
                        self.active_combinations.remove(comb)
                        continue

                row = {
                    'Date': current_day,
                    'Combination_ID': comb['id'],
                    'Residual': residual,
                    'Total_Combinations': len(self.active_combinations),
                    'Num_Stocks': len(comb['betas']),
                    'Is_Estimation': False,
                    **{f'Beta_{s}': b for s, b in comb['betas'].items()}
                }
                self.results.append(row)

    def get_results(self):
        """Return the results as a DataFrame."""
        self.results = pd.DataFrame(self.results)
        self.results = self.results.sort_values(by=['Combination_ID','Date'])
        return self.results

In [122]:
etf = 'VN30F1M'
stocks = ['ACB', 'BCM', 'BID', 'BVH', 'CTG', 'FPT', 'GAS', 'GVR', 'HDB', 'HPG', 'LPB', 'MBB', 'MSN', 'MWG',
            'PLX', 'SAB', 'SHB', 'SSI', 'STB', 'TCB', 'TPB', 'VCB', 'VHM', 'VIB', 'VIC', 'VJC', 'VNM', 'VRE',
            'VPB', 'FUEVFVND', 'FUESSVFL', 'E1VFVN30', 'FUEVN100']
start_date = '2024-01-01'
end_date = '2024-12-31'
file_path = 'data\\stock_data.csv'

strategy = StatArbStrategy(etf, stocks, start_date, end_date, file_path)
strategy.run_strategy()
results_df = strategy.get_results()
print("DONE")



=== New Combination 1 at 2024-04-02 ===
VN30F1M = 50.942*E1VFVN30 + 3.319*MBB

=== New Combination 2 at 2024-04-23 ===
VN30F1M = 47.353*E1VFVN30 + 10.305*FUESSVFL

=== New Combination 3 at 2024-04-24 ===
VN30F1M = 42.153*E1VFVN30 + 13.461*FUESSVFL + 0.561*GVR

=== New Combination 4 at 2024-05-02 ===
VN30F1M = 38.228*E1VFVN30 + 12.636*FUESSVFL + 0.620*VIC + 1.374*VCB + 0.897*HDB

=== New Combination 5 at 2024-05-06 ===
VN30F1M = 32.356*E1VFVN30 + 15.745*FUEVN100 + 0.611*VIC + 8.328*ACB + 0.348*VJC + 1.046*VCB

=== New Combination 6 at 2024-05-07 ===
VN30F1M = 33.143*E1VFVN30 + 16.065*FUEVN100 + 0.277*VIC + 6.302*ACB + 0.910*VCB + 2.999*HDB

=== New Combination 7 at 2024-05-10 ===
VN30F1M = 37.655*E1VFVN30 + 6.688*ACB + 14.571*FUEVN100 + 0.655*VCB + 2.484*HDB

=== New Combination 8 at 2024-05-17 ===
VN30F1M = 49.461*E1VFVN30 + 0.534*VIC + 12.338*FUEVN100

=== New Combination 9 at 2024-05-20 ===
VN30F1M = 60.181*E1VFVN30 + 0.561*VIC

=== New Combination 10 at 2024-05-22 ===
VN30F1M = 50.

In [123]:
results_df.to_csv('draft\\results.csv', index=False)

In [151]:
class SignalGenerator:
    def __init__(self, residuals: pd.DataFrame, ou_window: int = 60, fallback_days: int = 5):
        self.residuals = residuals
        self.ou_window = ou_window  # Increased default window
        self.fallback_days = fallback_days  # Max days to carry forward parameters
        self.ou_params = None
        self.signals = None
        self.last_valid_params = {col: None for col in residuals.columns}  # Track last valid params

    def fit_ou_process(self, series: pd.Series, date: pd.Timestamp) -> Dict[str, float]:
        """Fit OU process with relaxed conditions."""
        if len(series) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        series_window = series[-self.ou_window:].dropna()
        series_window = series_window.to_numpy()
        if len(series_window) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        try:
            model = AutoReg(series_window, lags=1).fit()
            a, b = model.params
            p_value_b = model.pvalues[1]
            # Relaxed conditions: 0 < b < 1.05 and p-value < 0.10
            if p_value_b >= 0.10 or b <= 0 or b >= 1:
                return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
            kappa = -np.log(b) * np.sqrt(252)
            m = a / (1 - b)
            sigma = np.sqrt(model.sigma2 * 2 * kappa / (1 - b**2))
            latest = series.iloc[-1]
            sigma_eq = sigma / np.sqrt(2 * kappa) if kappa > 0 else np.inf
            s_score = (latest - m) / sigma_eq if sigma_eq != 0 else 0
            return {'kappa': kappa, 'm': m, 'sigma': sigma, 's_score': s_score}
        except (ValueError, np.linalg.LinAlgError):
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}

    def apply_ou_fitting(self):
        """Fit OU process with fallback mechanism."""
        columns = pd.MultiIndex.from_product([self.residuals.columns, ['kappa', 'm', 'sigma', 's_score']])
        self.ou_params = pd.DataFrame(index=self.residuals.index, columns=columns)
        for t in range(self.ou_window, len(self.residuals)):
            date = self.residuals.index[t]
            for stock in self.residuals.columns:
                series = self.residuals[stock].iloc[:t + 1]
                params = self.fit_ou_process(series, date)
                if not np.isnan(params['kappa']):
                    # Store valid params with timestamp
                    self.last_valid_params[stock] = {'params': params, 'date': date}
                else:
                    # Fallback to recent valid params
                    if self.last_valid_params[stock] and (date - self.last_valid_params[stock]['date']).days <= self.fallback_days:
                        last_params = self.last_valid_params[stock]['params']
                        latest = series.iloc[-1]
                        m = last_params['m']
                        sigma_eq = last_params['sigma'] / np.sqrt(2 * last_params['kappa']) if last_params['kappa'] > 0 else np.inf
                        params['s_score'] = (latest - m) / sigma_eq if sigma_eq != 0 else 0
                for param, value in params.items():
                    self.ou_params.loc[date, (stock, param)] = value
    def count_valid_s_scores(self, verbose: bool = True) -> Dict[str, int]:
        """Count valid (non-NaN) s-scores in ou_params, overall and per series."""
        if self.ou_params is None:
            raise ValueError("Run apply_ou_fitting() before counting s-scores.")
        # Total possible entries per series (excluding first ou_window rows)
        total_possible = len(self.ou_params) - self.ou_window
        # Count valid s-scores per series
        s_score_counts = {}
        total_score = 0  # Initialize total_score outside the loop
        for stock in self.residuals.columns:
            valid_s_scores = self.ou_params[(stock, 's_score')].dropna().count()
            s_score_counts[stock] = valid_s_scores
            total_score += valid_s_scores #Increment total_score
            if verbose:
                print(f"Valid s-scores for {stock}: {valid_s_scores} out of {total_possible}")
        # Count total valid s-scores
        total_valid_s_scores = self.ou_params.xs('s_score', level=1, axis=1).dropna().count().sum()
        s_score_counts['Total'] = total_valid_s_scores
        if verbose:
            print(f"Total valid s-scores: {total_valid_s_scores} out of {total_possible * len(self.residuals.columns)}")
        return s_score_counts, total_score

In [154]:
# Pivot residuals: rows are Date, columns are Combination_ID, values are Residual
residuals_pivot = results_df.pivot(index='Date', columns='Combination_ID', values='Residual')

In [185]:
# Initialize and fit OU process
signal_gen = SignalGenerator(residuals_pivot, ou_window=60)
signal_gen.apply_ou_fitting()
ou_params = signal_gen.ou_params

In [157]:
ou_params.to_csv('draft\\ou_params.csv')

In [159]:
# Define thresholds
upper_threshold = 1.25
lower_threshold = -0.5
# Continue until shorting until the lower_threshold change direction strongly (e.g. -1.5)
# 1 --> small short, 1.25 --> bigger short kiểu z
# Initialize trade_states DataFrame
trade_states = pd.DataFrame(index=ou_params.index, columns=residuals_pivot.columns)

# Generate trade states for each combination
for comb_id in trade_states.columns:
    s_scores = ou_params[(comb_id, 's_score')]
    trade_state = 0  # 0: not in trade, 1: in trade
    for date in s_scores.index:
        s_score = s_scores[date]
        if pd.isna(s_score) or pd.isna(residuals_pivot.loc[date, comb_id]):
            trade_state = 0  # Combination not active
        else:
            if trade_state == 0 and s_score > upper_threshold:
                trade_state = 1  # Enter trade
            elif trade_state == 1 and s_score < lower_threshold:
                trade_state = 0  # Exit trade
        trade_states.loc[date, comb_id] = trade_state

In [160]:
# Add s_score and Trade_State to results_df
results_df['s_score'] = results_df.apply(
    lambda row: ou_params.loc[row['Date'], (row['Combination_ID'], 's_score')] 
    if row['Date'] in ou_params.index else np.nan, axis=1
)
results_df['Trade_State'] = results_df.apply(
    lambda row: trade_states.loc[row['Date'], row['Combination_ID']] 
    if row['Date'] in trade_states.index else 0, axis=1
)

In [161]:
results_df

Unnamed: 0,Date,Combination_ID,Residual,Total_Combinations,Num_Stocks,Is_Estimation,Beta_E1VFVN30,Beta_MBB,Beta_FUESSVFL,Beta_GVR,...,Beta_VHM,Beta_TPB,Beta_TCB,Beta_FPT,Beta_BCM,Beta_SSI,Beta_GAS,Beta_STB,s_score,Trade_State
0,2024-01-02,1,85.372056,1,2,True,50.941836,3.318821,,,...,,,,,,,,,,0
1,2024-01-03,1,91.191657,1,2,True,50.941836,3.318821,,,...,,,,,,,,,,0
2,2024-01-04,1,83.458534,1,2,True,50.941836,3.318821,,,...,,,,,,,,,,0
3,2024-01-05,1,87.754788,1,2,True,50.941836,3.318821,,,...,,,,,,,,,,0
4,2024-01-08,1,80.643497,1,2,True,50.941836,3.318821,,,...,,,,,,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3094,2024-12-25,41,-54.174682,6,1,True,,,,,...,,,,,,,,,,0
3095,2024-12-26,41,-53.574682,6,1,True,,,,,...,,,,,,,,,,0
3096,2024-12-27,41,-62.050770,6,1,True,,,,,...,,,,,,,,,,0
3102,2024-12-30,41,-64.566259,6,1,False,,,,,...,,,,,,,,,,0


In [162]:
# List of all stocks (you need to define this based on your portfolio)
# Replace with your full stock list
# Initialize positions_df
dates = results_df['Date'].unique()
positions_df = pd.DataFrame(index=dates)

# Compute total positions
positions_df['Position_VN30F1M'] = -results_df.groupby('Date')['Trade_State'].sum()
for stock in stocks:
    beta_col = f'Beta_{stock}'
    if beta_col in results_df.columns:
        positions_df[f'Position_{stock}'] = results_df.groupby('Date').apply(
            lambda x: (x['Trade_State'] * x[beta_col]).sum()
        )
    else:
        positions_df[f'Position_{stock}'] = 0  # If beta column missing



In [164]:
results_df.to_csv('draft\\results.csv', index=False)

In [None]:
class SignalGenerator:
    def __init__(self, residuals: pd.DataFrame, ou_window: int = 60, fallback_days: int = 5):
        self.residuals = residuals
        self.ou_window = ou_window
        self.fallback_days = fallback_days
        self.ou_params = None
        self.last_valid_params = {col: None for col in residuals.columns}

    def fit_ou_process(self, series: pd.Series, date: pd.Timestamp) -> Dict[str, float]:
        if len(series) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        series_window = series[-self.ou_window:].dropna()
        series_window = series_window.to_numpy()
        if len(series_window) < self.ou_window:
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
        try:
            model = AutoReg(series_window, lags=1).fit()
            a, b = model.params
            p_value_b = model.pvalues[1]
            if p_value_b >= 0.10 or b <= 0 or b >= 1:
                return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}
            kappa = -np.log(b) * np.sqrt(252)
            m = a / (1 - b)
            sigma = np.sqrt(model.sigma2 * 2 * kappa / (1 - b**2))
            latest = series.iloc[-1]
            sigma_eq = sigma / np.sqrt(2 * kappa) if kappa > 0 else np.inf
            s_score = (latest - m) / sigma_eq if sigma_eq != 0 else 0
            return {'kappa': kappa, 'm': m, 'sigma': sigma, 's_score': s_score}
        except (ValueError, np.linalg.LinAlgError):
            return {'kappa': np.nan, 'm': np.nan, 'sigma': np.nan, 's_score': np.nan}

    def apply_ou_fitting(self):
        columns = pd.MultiIndex.from_product([self.residuals.columns, ['kappa', 'm', 'sigma', 's_score']])
        self.ou_params = pd.DataFrame(index=self.residuals.index, columns=columns)
        for t in range(self.ou_window, len(self.residuals)):
            date = self.residuals.index[t]
            for stock in self.residuals.columns:
                series = self.residuals[stock].iloc[:t + 1]
                params = self.fit_ou_process(series, date)
                if not np.isnan(params['kappa']):
                    self.last_valid_params[stock] = {'params': params, 'date': date}
                else:
                    if self.last_valid_params[stock] and (date - self.last_valid_params[stock]['date']).days <= self.fallback_days:
                        last_params = self.last_valid_params[stock]['params']
                        latest = series.iloc[-1]
                        m = last_params['m']
                        sigma_eq = last_params['sigma'] / np.sqrt(2 * last_params['kappa']) if last_params['kappa'] > 0 else np.inf
                        params['s_score'] = (latest - m) / sigma_eq if sigma_eq != 0 else 0
                for param, value in params.items():
                    self.ou_params.loc[date, (stock, param)] = value

def get_allocation_tier(s_score: float, prev_allocation: float, prev_s_score: float = None) -> float:
    """
    Determine allocation percentage based on s-score, considering previous allocation and s-score trend.
    """
    # Cut-loss
    if prev_s_score is not None and s_score < prev_s_score and prev_allocation > 0:
            return prev_allocation  # Hold during continuous decline
    # If s-score is between -0.5 and 0.5, hold position if decreasing, otherwise no position
    elif s_score > 2.0:
        return 0.5  # Reduce to 50%
    # Entry tiers
    elif s_score > 1.5:
        return 1.0  # Full allocation
    elif s_score > 1.0:
        return 0.8
    elif s_score > 0.5:
        return 0.6
    # Exit tiers
    elif s_score < -0.5:
        return 0.4  # Exit completely
    elif s_score < -1.0:
        return 0.2
    elif s_score < -1.5:
        return 0.0
    else:
        return 0.0

def process_results_df(results_df: pd.DataFrame, stocks: list, ou_window: int = 60) -> tuple[pd.DataFrame, pd.DataFrame]:
    # Sort by Combination_ID
    results_df = results_df.sort_values('Combination_ID')
    
    # Pivot residuals
    residuals_pivot = results_df.pivot(index='Date', columns='Combination_ID', values='Residual')
    
    # Fit OU process
    signal_gen = SignalGenerator(residuals_pivot, ou_window=ou_window)
    signal_gen.apply_ou_fitting()
    ou_params = signal_gen.ou_params
    
    # Generate allocation percentages
    allocation_percentages = pd.DataFrame(index=ou_params.index, columns=residuals_pivot.columns)
    for comb_id in allocation_percentages.columns:
        s_scores = ou_params[(comb_id, 's_score')]
        prev_allocation = 0.0
        prev_s_score = None
        for date in s_scores.index:
            s_score = s_scores[date]
            if pd.isna(s_score) or pd.isna(residuals_pivot.loc[date, comb_id]):
                allocation = 0.0
            else:
                allocation = get_allocation_tier(s_score, prev_allocation, prev_s_score)
            allocation_percentages.loc[date, comb_id] = allocation
            prev_allocation = allocation
            prev_s_score = s_score
    
    # Update results_df with s_score and Allocation
    results_df['s_score'] = results_df.apply(
        lambda row: ou_params.loc[row['Date'], (row['Combination_ID'], 's_score')] 
        if row['Date'] in ou_params.index else np.nan, axis=1
    )
    results_df['Allocation'] = results_df.apply(
        lambda row: allocation_percentages.loc[row['Date'], row['Combination_ID']] 
        if row['Date'] in allocation_percentages.index else 0.0, axis=1
    )
    
    # Compute positions_df with scaled allocations
    dates = results_df['Date'].unique()
    positions_df = pd.DataFrame(index=dates)
    
    for date in dates:
        # Get active combinations with allocation > 0
        active_combs = allocation_percentages.loc[date][allocation_percentages.loc[date] > 0]
        scale_factor=1
        if len(active_combs) == 0:
            total_short = 0.0
        else:
            # Sum intended short positions
            intended_shorts = active_combs.values
            total_intended_short = sum(intended_shorts)
            # Scale if exceeding 1.0
            scale_factor = min(1.0 / total_intended_short, 1.0) if total_intended_short > 0 else 1.0
            scaled_allocations = active_combs * scale_factor
            total_short = sum(scaled_allocations)
        
        positions_df.loc[date, 'Position_VN30F1M'] = -total_short if total_short > 0 else 0.0
        for stock in stocks:
            beta_col = f'Beta_{stock}'
            if beta_col in results_df.columns:
                active_rows = results_df[(results_df['Date'] == date) & (results_df['Allocation'] > 0)]
                positions_df.loc[date, f'Position_{stock}'] = (active_rows[beta_col] * active_rows['Allocation'] * scale_factor).sum()
            else:
                positions_df.loc[date, f'Position_{stock}'] = 0.0
    
    return results_df, positions_df

In [180]:
# Example usage
updated_results_df, positions_df = process_results_df(results_df, stocks)

In [182]:
positions_df.to_csv('data\\positions.csv')

In [183]:
updated_results_df.to_csv('result.csv')