In [None]:
from statsmodels.stats.outliers_influence import variance_inflation_factor

def calculate_vif(X):
    vif_data = pd.DataFrame()
    vif_data["Variable"] = X.columns
    vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
    return vif_data

# After creating X in your run_correlation_model function:


In [7]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

def prepare_data(asset1_data, asset2_data, market_data, risk_free_data, earnings_dates):
    """
    Prepare data for asset correlation analysis with excess returns and additional controls
    """
    # Ensure dates are datetime
    asset1_data['Date'] = pd.to_datetime(asset1_data['Date'])
    asset2_data['Date'] = pd.to_datetime(asset2_data['Date'])
    market_data['Date'] = pd.to_datetime(market_data['Date'])
    risk_free_data['Date'] = pd.to_datetime(risk_free_data['Date'])

    # Merge all data
    merged_data = pd.merge(asset1_data, asset2_data, on='Date', suffixes=('_asset1', '_asset2'))
    merged_data = pd.merge(merged_data, market_data, on='Date')
    merged_data = pd.merge(merged_data, risk_free_data, on='Date')

    # Calculate returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_return'] = merged_data[f'Close_{col}'].pct_change()

    # Calculate excess returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_excess_return'] = merged_data[f'{col}_return'] - merged_data['risk_free_rate']

    # Create earnings dummy for asset1
    merged_data['earnings_dummy'] = merged_data['Date'].isin(earnings_dates).astype(int)

    # Create interaction term
    merged_data['earnings_interaction'] = merged_data['earnings_dummy'] * merged_data['asset1_excess_return']

    # Drop first row (will have NaN due to returns calculation) and any other NaNs
    merged_data = merged_data.dropna()

    return merged_data

def run_correlation_model(data):
    """
    Run regression model to analyze correlation between assets, controlling for market returns
    """
    X = sm.add_constant(data[['asset1_excess_return', 'market_excess_return', 'earnings_dummy', 'earnings_interaction']])
    y = data['asset2_excess_return']

    model = sm.OLS(y, X).fit()

    print(model.summary())
    vif = calculate_vif(X)
    print('*******')
    print(vif)

    return model

def analyze_results(model, data):
    """
    Analyze and interpret the results of the correlation model
    """
    beta_asset1 = model.params['asset1_excess_return']
    beta_market = model.params['market_excess_return']
    earnings_effect = model.params['earnings_dummy']
    interaction_effect = model.params['earnings_interaction']

    print(f"\nKey Results:")
    print(f"Beta (Asset 1): {beta_asset1:.4f}")
    print(f"P-value: {model.pvalues['asset1_excess_return']:.4f}")
    print(f"Beta (Market): {beta_market:.4f}")
    print(f"P-value: {model.pvalues['market_excess_return']:.4f}")
    print(f"Earnings Date Effect: {earnings_effect:.4f}")
    print(f"P-value: {model.pvalues['earnings_dummy']:.4f}")
    print(f"Interaction Effect: {interaction_effect:.4f}")
    print(f"P-value: {model.pvalues['earnings_interaction']:.4f}")

    # Calculate partial correlations
    partial_corr = data['asset2_excess_return'].corr(data['asset1_excess_return'], method=lambda x, y: np.corrcoef(x, y)[0, 1])
    earnings_partial_corr = data[data['earnings_dummy'] == 1]['asset2_excess_return'].corr(
        data[data['earnings_dummy'] == 1]['asset1_excess_return'],
        method=lambda x, y: np.corrcoef(x, y)[0, 1])
    non_earnings_partial_corr = data[data['earnings_dummy'] == 0]['asset2_excess_return'].corr(
        data[data['earnings_dummy'] == 0]['asset1_excess_return'],
        method=lambda x, y: np.corrcoef(x, y)[0, 1])

    print(f"\nPartial Correlations (controlling for market):")
    print(f"General partial correlation: {partial_corr:.4f}")
    print(f"Partial correlation on earnings dates: {earnings_partial_corr:.4f}")
    print(f"Partial correlation on non-earnings dates: {non_earnings_partial_corr:.4f}")

def main():
    # Example usage (you would replace this with actual data)
    np.random.seed(42)  # for reproducibility
    dates = pd.date_range(start='2020-01-01', end='2021-12-31')

    # Simulate correlated returns
    market_returns = np.random.normal(0, 0.01, len(dates))
    asset1_returns = 0.8 * market_returns + np.random.normal(0, 0.005, len(dates))
    asset2_returns = 0.6 * market_returns + 0.3 * asset1_returns + np.random.normal(0, 0.005, len(dates))

    market_data = pd.DataFrame({
        'Date': dates,
        'Close_market': 100 * (1 + market_returns).cumprod()
    })

    asset1_data = pd.DataFrame({
        'Date': dates,
        'Close_asset1': 100 * (1 + asset1_returns).cumprod()
    })

    asset2_data = pd.DataFrame({
        'Date': dates,
        'Close_asset2': 100 * (1 + asset2_returns).cumprod()
    })

    # Simulate risk-free rate
    risk_free_data = pd.DataFrame({
        'Date': dates,
        'risk_free_rate': np.random.normal(0.0001, 0.00001, len(dates))  # Approximately 2.5% annual rate
    })

    # Simulate some different behavior on earnings dates
    earnings_dates = pd.to_datetime(['2020-03-15', '2020-06-15', '2020-09-15', '2020-12-15',
                                     '2021-03-15', '2021-06-15', '2021-09-15', '2021-12-15'])

    earnings_indices = dates.isin(earnings_dates)
    asset2_returns[earnings_indices] = 0.6 * market_returns[earnings_indices] + 0.5 * asset1_returns[earnings_indices] + np.random.normal(0, 0.01, earnings_indices.sum())
    asset2_data.loc[earnings_indices, 'Close_asset2'] = 100 * (1 + asset2_returns).cumprod()[earnings_indices]

    data = prepare_data(asset1_data, asset2_data, market_data, risk_free_data, earnings_dates)
    model = run_correlation_model(data)
    analyze_results(model, data)

if __name__ == "__main__":
    main()

                             OLS Regression Results                             
Dep. Variable:     asset2_excess_return   R-squared:                       0.694
Model:                              OLS   Adj. R-squared:                  0.692
Method:                   Least Squares   F-statistic:                     410.2
Date:                  Mon, 16 Sep 2024   Prob (F-statistic):          1.56e-184
Time:                          04:40:30   Log-Likelihood:                 2762.6
No. Observations:                   730   AIC:                            -5515.
Df Residuals:                       725   BIC:                            -5492.
Df Model:                             4                                         
Covariance Type:              nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const       

Separate dummies for asset 1 earnings date < asset 2 earnings date, or re-run regressions separately?

This updated model addresses both of your concerns:

Excess Returns:
We now subtract the risk-free rate from all asset returns, including the market return. This gives us excess returns, which are more appropriate for this type of analysis.
Omitted Variables:
We've included market excess returns as an additional control variable. This helps address omitted variable bias by accounting for overall market movements that might affect both assets.

Key changes and interpretations:

Data Preparation:

We now include market data and risk-free rate data.
All returns are converted to excess returns by subtracting the risk-free rate.


Correlation Model:

We now regress asset 2's excess returns on asset 1's excess returns, market excess returns, the earnings dummy, and the interaction term.
This model allows us to see how asset 2 tends to move with asset 1 under normal circumstances, controlling for market movements, and how this relationship changes on earnings dates.


Result Analysis:

Beta (Asset 1): Shows the relationship between asset 1 and asset 2 excess returns, controlling for market movements.
Beta (Market): Shows how asset 2 moves with the overall market.
Earnings Date Effect: Captures any fixed effect on asset 2 excess returns on earnings dates.
Interaction Effect: Shows how the relationship between asset 1 and asset 2 excess returns changes on earnings dates.
We now calculate partial correlations, controlling for market returns.



Interpretation:

Beta coefficients:

Asset 1 beta shows how asset 2 typically moves with asset 1, controlling for market movements.
Market beta shows asset 2's sensitivity to overall market movements.


Earnings Date Effect:

If significant, it suggests asset 2 has a consistent abnormal excess return on asset 1's earnings dates.


Interaction Effect:

If significant, it indicates that the relationship between asset 1 and asset 2 changes on earnings dates, beyond what can be explained by market movements.


Partial Correlations:

These show the correlation between asset 1 and asset 2, controlling for market movements.
Compare the general partial correlation to the earnings-date and non-earnings-date partial correlations to see how the relationship changes around earnings announcements.



This enhanced model should provide a more robust analysis of how asset 2 moves with asset 1, both generally and on earnings dates, while controlling for overall market movements and accounting for the risk-free rate.
Remember to replace the example data in the main function with your actual asset data, market data, risk-free rate data, and earnings dates. Also, consider if there are any other relevant factors specific to your assets that should be included as control variables.

Excellent question. Understanding the meaning of each coefficient's magnitude and sign is crucial for interpreting the results of our regression model. Let's break this down for each coefficient in our model:

Constant (Intercept):

Magnitude: The expected excess return of asset 2 when all other variables are zero.
Sign:

Positive: Asset 2 tends to have positive excess returns on average.
Negative: Asset 2 tends to have negative excess returns on average.


Interpretation: This often isn't particularly meaningful in this context, as it's rare for all other variables to be exactly zero.


Beta (Asset 1) - coefficient for 'asset1_excess_return':

Magnitude: The expected change in asset 2's excess return for a 1-unit change in asset 1's excess return, holding other variables constant.
Sign:

Positive: Asset 2 tends to move in the same direction as asset 1.
Negative: Asset 2 tends to move in the opposite direction to asset 1.


Interpretation: If this coefficient is 0.5, for example, it means that for every 1% increase in asset 1's excess return, asset 2's excess return is expected to increase by 0.5%, on average, controlling for market returns and earnings effects.


Beta (Market) - coefficient for 'market_excess_return':

Magnitude: The expected change in asset 2's excess return for a 1-unit change in the market's excess return, holding other variables constant.
Sign:

Positive: Asset 2 tends to move in the same direction as the market.
Negative: Asset 2 tends to move in the opposite direction to the market.


Interpretation: If this coefficient is 1.2, for example, it means that for every 1% increase in the market's excess return, asset 2's excess return is expected to increase by 1.2%, on average, controlling for asset 1's returns and earnings effects.


Earnings Date Effect - coefficient for 'earnings_dummy':

Magnitude: The expected difference in asset 2's excess return on asset 1's earnings dates compared to non-earnings dates, holding other variables constant.
Sign:

Positive: Asset 2 tends to have higher excess returns on asset 1's earnings dates.
Negative: Asset 2 tends to have lower excess returns on asset 1's earnings dates.


Interpretation: If this coefficient is 0.02, for example, it means that on asset 1's earnings dates, asset 2's excess return is expected to be 2 percentage points higher than on non-earnings dates, on average, regardless of how asset 1 or the market performs.


Interaction Effect - coefficient for 'earnings_interaction':

Magnitude: How much the relationship between asset 1 and asset 2 changes on earnings dates.
Sign:

Positive: The relationship between asset 1 and asset 2 strengthens on earnings dates.
Negative: The relationship between asset 1 and asset 2 weakens on earnings dates.


Interpretation: If this coefficient is 0.3, for example, it means that on earnings dates, for every 1% increase in asset 1's excess return, asset 2's excess return is expected to increase by an additional 0.3 percentage points compared to non-earnings dates.



To interpret the full model:

On non-earnings dates, asset 2's expected excess return would be:
Constant + (Beta Asset 1 * Asset 1's excess return) + (Beta Market * Market's excess return)
On earnings dates, asset 2's expected excess return would be:
Constant + (Beta Asset 1 * Asset 1's excess return) + (Beta Market * Market's excess return) + Earnings Date Effect + (Interaction Effect * Asset 1's excess return)

Remember, these interpretations assume that all other variables are held constant, which is a key assumption of multiple regression analysis.
The statistical significance of each coefficient (usually indicated by p-values) is also crucial. A coefficient might have a large magnitude, but if it's not statistically significant, we can't be confident that the effect is real and not just due to random chance in our sample.

In [15]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

def prepare_data(asset1_data, asset2_data, market_data, risk_free_data, asset1_earnings_dates, asset2_earnings_dates):
    """
    Prepare data for asset correlation analysis with excess returns and additional controls
    """
    # Ensure dates are datetime
    asset1_data['Date'] = pd.to_datetime(asset1_data['Date'])
    asset2_data['Date'] = pd.to_datetime(asset2_data['Date'])
    market_data['Date'] = pd.to_datetime(market_data['Date'])
    risk_free_data['Date'] = pd.to_datetime(risk_free_data['Date'])

    # Merge all data
    merged_data = pd.merge(asset1_data, asset2_data, on='Date', suffixes=('_asset1', '_asset2'))
    merged_data = pd.merge(merged_data, market_data, on='Date')
    merged_data = pd.merge(merged_data, risk_free_data, on='Date')

    # Calculate returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_return'] = merged_data[f'Close_{col}'].pct_change()

    # Calculate excess returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_excess_return'] = merged_data[f'{col}_return'] - merged_data['risk_free_rate']

    # Create earnings dummies for both assets
    merged_data['asset1_earnings_dummy'] = merged_data['Date'].isin(asset1_earnings_dates).astype(int)
    merged_data['asset2_earnings_dummy'] = merged_data['Date'].isin(asset2_earnings_dates).astype(int)

    # Create a dummy for when asset1 reports before asset2
    merged_data['asset1_reports_first'] = 0
    for q in range(len(asset1_earnings_dates)):
        if asset1_earnings_dates[q] < asset2_earnings_dates[q]:
            merged_data.loc[(merged_data['Date'] >= asset1_earnings_dates[q]) &
                            (merged_data['Date'] < asset2_earnings_dates[q]), 'asset1_reports_first'] = 1

    # Create interaction terms
    merged_data['earnings_interaction'] = merged_data['asset1_earnings_dummy'] * merged_data['asset1_excess_return']
    merged_data['asset1_first_interaction'] = merged_data['asset1_reports_first'] * merged_data['asset1_excess_return']

    # Drop first row (will have NaN due to returns calculation) and any other NaNs
    merged_data = merged_data.dropna()

    return merged_data

def run_correlation_model(data):
    """
    Run regression model to analyze correlation between assets, controlling for market returns
    """
    X = sm.add_constant(data[['asset1_excess_return', 'market_excess_return',
                              'asset1_earnings_dummy', 'earnings_interaction',
                              'asset1_reports_first', 'asset1_first_interaction']])
    y = data['asset2_excess_return']

    model = sm.OLS(y, X).fit()
    print(model.summary())
    print('\n*****************\n')

    newey_west_errors = model.get_robustcov_results(cov_type='HAC', maxlags=10)
    print(newey_west_errors.summary())
    print('\n*****************\n')

    vif = calculate_vif(X)
    print(vif)
    print('\n*****************\n')

    return model

def analyze_results(model, data):
    """
    Analyze and interpret the results of the correlation model
    """
    beta_asset1 = model.params['asset1_excess_return']
    beta_market = model.params['market_excess_return']
    earnings_effect = model.params['asset1_earnings_dummy']
    interaction_effect = model.params['earnings_interaction']
    asset1_first_effect = model.params['asset1_reports_first']
    asset1_first_interaction = model.params['asset1_first_interaction']

    print(f"\nKey Results:")
    print(f"Beta (Asset 1): {beta_asset1:.4f}")
    print(f"P-value: {model.pvalues['asset1_excess_return']:.4f}")
    print(f"Beta (Market): {beta_market:.4f}")
    print(f"P-value: {model.pvalues['market_excess_return']:.4f}")
    print(f"Earnings Date Effect: {earnings_effect:.4f}")
    print(f"P-value: {model.pvalues['asset1_earnings_dummy']:.4f}")
    print(f"Interaction Effect: {interaction_effect:.4f}")
    print(f"P-value: {model.pvalues['earnings_interaction']:.4f}")
    print(f"Effect when Asset 1 reports first: {asset1_first_effect:.4f}")
    print(f"P-value: {model.pvalues['asset1_reports_first']:.4f}")
    print(f"Interaction effect when Asset 1 reports first: {asset1_first_interaction:.4f}")
    print(f"P-value: {model.pvalues['asset1_first_interaction']:.4f}")

    # Calculate partial correlations
    partial_corr = data['asset2_excess_return'].corr(data['asset1_excess_return'], method=lambda x, y: stats.pearsonr(x, y)[0])
    earnings_partial_corr = data[data['asset1_earnings_dummy'] == 1]['asset2_excess_return'].corr(
        data[data['asset1_earnings_dummy'] == 1]['asset1_excess_return'],
        method=lambda x, y: stats.pearsonr(x, y)[0])
    non_earnings_partial_corr = data[data['asset1_earnings_dummy'] == 0]['asset2_excess_return'].corr(
        data[data['asset1_earnings_dummy'] == 0]['asset1_excess_return'],
        method=lambda x, y: stats.pearsonr(x, y)[0])
    asset1_first_partial_corr = data[data['asset1_reports_first'] == 1]['asset2_excess_return'].corr(
        data[data['asset1_reports_first'] == 1]['asset1_excess_return'],
        method=lambda x, y: stats.pearsonr(x, y)[0])
    asset2_first_partial_corr = data[data['asset1_reports_first'] == 0]['asset2_excess_return'].corr(
        data[data['asset1_reports_first'] == 0]['asset1_excess_return'],
        method=lambda x, y: stats.pearsonr(x, y)[0])

    print(f"\nPartial Correlations (controlling for market):")
    print(f"General partial correlation: {partial_corr:.4f}")
    print(f"Partial correlation on earnings dates: {earnings_partial_corr:.4f}")
    print(f"Partial correlation on non-earnings dates: {non_earnings_partial_corr:.4f}")
    print(f"Partial correlation when asset 1 reports first: {asset1_first_partial_corr:.4f}")
    print(f"Partial correlation when asset 2 reports first: {asset2_first_partial_corr:.4f}")

def main():
    # Example usage (you would replace this with actual data)
    np.random.seed(42)  # for reproducibility
    dates = pd.date_range(start='2020-01-01', end='2021-12-31')

    # Simulate correlated returns
    market_returns = np.random.normal(0, 0.01, len(dates))
    asset1_returns = 0.8 * market_returns + np.random.normal(0, 0.005, len(dates))
    asset2_returns = 0.6 * market_returns + 0.3 * asset1_returns + np.random.normal(0, 0.005, len(dates))

    market_data = pd.DataFrame({
        'Date': dates,
        'Close_market': 100 * (1 + market_returns).cumprod()
    })

    asset1_data = pd.DataFrame({
        'Date': dates,
        'Close_asset1': 100 * (1 + asset1_returns).cumprod()
    })

    asset2_data = pd.DataFrame({
        'Date': dates,
        'Close_asset2': 100 * (1 + asset2_returns).cumprod()
    })

    # Simulate risk-free rate
    risk_free_data = pd.DataFrame({
        'Date': dates,
        'risk_free_rate': np.random.normal(0.0001, 0.00001, len(dates))  # Approximately 2.5% annual rate
    })

    # Simulate earnings dates
    asset1_earnings_dates = pd.to_datetime(['2020-03-15', '2020-06-15', '2020-09-15', '2020-12-15',
                                            '2021-03-15', '2021-06-15', '2021-09-15', '2021-12-15'])
    asset2_earnings_dates = pd.to_datetime(['2020-03-20', '2020-06-10', '2020-09-20', '2020-12-10',
                                            '2021-03-20', '2021-06-10', '2021-09-20', '2021-12-10'])

    # Simulate some different behavior on earnings dates
    earnings_indices = dates.isin(asset1_earnings_dates)
    asset2_returns[earnings_indices] = 0.6 * market_returns[earnings_indices] + 0.5 * asset1_returns[earnings_indices] + np.random.normal(0, 0.01, earnings_indices.sum())
    asset2_data.loc[earnings_indices, 'Close_asset2'] = 100 * (1 + asset2_returns).cumprod()[earnings_indices]

    data = prepare_data(asset1_data, asset2_data, market_data, risk_free_data, asset1_earnings_dates, asset2_earnings_dates)
    model = run_correlation_model(data)
    analyze_results(model, data)

if __name__ == "__main__":
    main()

                             OLS Regression Results                             
Dep. Variable:     asset2_excess_return   R-squared:                       0.696
Model:                              OLS   Adj. R-squared:                  0.693
Method:                   Least Squares   F-statistic:                     275.6
Date:                  Mon, 16 Sep 2024   Prob (F-statistic):          4.75e-183
Time:                          06:02:43   Log-Likelihood:                 2765.3
No. Observations:                   730   AIC:                            -5517.
Df Residuals:                       723   BIC:                            -5484.
Df Model:                             6                                         
Covariance Type:              nonrobust                                         
                               coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------------
cons

## abs returns

In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from scipy import stats

def prepare_data(asset1_data, asset2_data, market_data, earnings_dates):
    """
    Prepare data for asset correlation analysis with absolute returns
    """
    # Ensure dates are datetime
    asset1_data['Date'] = pd.to_datetime(asset1_data['Date'])
    asset2_data['Date'] = pd.to_datetime(asset2_data['Date'])
    market_data['Date'] = pd.to_datetime(market_data['Date'])

    # Merge all data
    merged_data = pd.merge(asset1_data, asset2_data, on='Date', suffixes=('_asset1', '_asset2'))
    merged_data = pd.merge(merged_data, market_data, on='Date')

    # Calculate returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_return'] = merged_data[f'Close_{col}'].pct_change()

    # Calculate absolute returns
    for col in ['asset1', 'asset2', 'market']:
        merged_data[f'{col}_abs_return'] = merged_data[f'{col}_return'].abs()

    # Create earnings dummy for asset1
    merged_data['earnings_dummy'] = merged_data['Date'].isin(earnings_dates).astype(int)

    # Create interaction term
    merged_data['earnings_interaction'] = merged_data['earnings_dummy'] * merged_data['asset1_abs_return']

    # Drop first row (will have NaN due to returns calculation) and any other NaNs
    merged_data = merged_data.dropna()

    return merged_data

def run_absolute_returns_model(data):
    """
    Run regression model to analyze correlation between assets' absolute returns
    """
    X = sm.add_constant(data[['asset1_abs_return', 'market_abs_return', 'earnings_dummy', 'earnings_interaction']])
    y = data['asset2_abs_return']

    model = sm.OLS(y, X).fit()

    print(model.summary())

    return model

def analyze_results(model):
    """
    Analyze and interpret the results of the absolute returns model
    """
    constant = model.params['const']
    beta_asset1 = model.params['asset1_abs_return']
    beta_market = model.params['market_abs_return']
    earnings_effect = model.params['earnings_dummy']
    interaction_effect = model.params['earnings_interaction']

    print(f"\nKey Results:")
    print(f"Constant: {constant:.4f}")
    print(f"P-value: {model.pvalues['const']:.4f}")
    print(f"Beta (Asset 1): {beta_asset1:.4f}")
    print(f"P-value: {model.pvalues['asset1_abs_return']:.4f}")
    print(f"Beta (Market): {beta_market:.4f}")
    print(f"P-value: {model.pvalues['market_abs_return']:.4f}")
    print(f"Earnings Date Effect: {earnings_effect:.4f}")
    print(f"P-value: {model.pvalues['earnings_dummy']:.4f}")
    print(f"Interaction Effect: {interaction_effect:.4f}")
    print(f"P-value: {model.pvalues['earnings_interaction']:.4f}")

def main():
    # Example usage (you would replace this with actual data)
    np.random.seed(42)  # for reproducibility
    dates = pd.date_range(start='2020-01-01', end='2021-12-31')

    # Simulate correlated returns
    market_returns = np.random.normal(0, 0.01, len(dates))
    asset1_returns = 0.8 * market_returns + np.random.normal(0, 0.005, len(dates))
    asset2_returns = 0.6 * market_returns + 0.3 * asset1_returns + np.random.normal(0, 0.005, len(dates))

    market_data = pd.DataFrame({
        'Date': dates,
        'Close_market': 100 * (1 + market_returns).cumprod()
    })

    asset1_data = pd.DataFrame({
        'Date': dates,
        'Close_asset1': 100 * (1 + asset1_returns).cumprod()
    })

    asset2_data = pd.DataFrame({
        'Date': dates,
        'Close_asset2': 100 * (1 + asset2_returns).cumprod()
    })

    # Simulate some different behavior on earnings dates
    earnings_dates = pd.to_datetime(['2020-03-15', '2020-06-15', '2020-09-15', '2020-12-15',
                                     '2021-03-15', '2021-06-15', '2021-09-15', '2021-12-15'])

    earnings_indices = dates.isin(earnings_dates)
    asset2_returns[earnings_indices] = 0.6 * market_returns[earnings_indices] + 0.5 * asset1_returns[earnings_indices] + np.random.normal(0, 0.01, earnings_indices.sum())
    asset2_data.loc[earnings_indices, 'Close_asset2'] = 100 * (1 + asset2_returns).cumprod()[earnings_indices]

    data = prepare_data(asset1_data, asset2_data, market_data, earnings_dates)
    model = run_absolute_returns_model(data)
    analyze_results(model)

if __name__ == "__main__":
    main()

                            OLS Regression Results                            
Dep. Variable:      asset2_abs_return   R-squared:                       0.441
Model:                            OLS   Adj. R-squared:                  0.438
Method:                 Least Squares   F-statistic:                     143.1
Date:                Sat, 14 Sep 2024   Prob (F-statistic):           3.73e-90
Time:                        17:30:41   Log-Likelihood:                 2845.4
No. Observations:                 730   AIC:                            -5681.
Df Residuals:                     725   BIC:                            -5658.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                           coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------
const                    0.0023 

Certainly. Analyzing with absolute returns is an interesting approach that can give us insights into the magnitude of movements, regardless of direction. Let's modify our model to use absolute returns and then interpret the new coefficients.


Now, let's interpret the coefficients of this new model, focusing on their magnitudes and signs:

Constant (Intercept):

Magnitude: The expected absolute return of asset 2 when all other variables are zero.
Sign: Always positive (or zero) because it represents an absolute value.
Interpretation: This represents the base level of volatility in asset 2, independent of asset 1, market movements, or earnings effects.


Beta (Asset 1) - coefficient for 'asset1_abs_return':

Magnitude: The expected change in asset 2's absolute return for a 1-unit change in asset 1's absolute return, holding other variables constant.
Sign: Typically positive, as larger movements in one asset often correspond to larger movements in related assets.
Interpretation: If this coefficient is 0.5, for every 1 percentage point increase in the magnitude of asset 1's return, the magnitude of asset 2's return is expected to increase by 0.5 percentage points, on average.


Beta (Market) - coefficient for 'market_abs_return':

Magnitude: The expected change in asset 2's absolute return for a 1-unit change in the market's absolute return, holding other variables constant.
Sign: Typically positive, as larger market movements often correspond to larger movements in individual assets.
Interpretation: If this coefficient is 0.7, for every 1 percentage point increase in the magnitude of the market's return, the magnitude of asset 2's return is expected to increase by 0.7 percentage points, on average.


Earnings Date Effect - coefficient for 'earnings_dummy':

Magnitude: The expected difference in asset 2's absolute return on asset 1's earnings dates compared to non-earnings dates, holding other variables constant.
Sign: Typically positive if earnings dates are associated with higher volatility.
Interpretation: If this coefficient is 0.02, on asset 1's earnings dates, the magnitude of asset 2's return is expected to be 2 percentage points higher than on non-earnings dates, on average.


Interaction Effect - coefficient for 'earnings_interaction':

Magnitude: How much the relationship between asset 1's and asset 2's absolute returns changes on earnings dates.
Sign: Can be positive or negative, depending on whether the relationship strengthens or weakens on earnings dates.
Interpretation: If this coefficient is 0.3, on earnings dates, for every 1 percentage point increase in the magnitude of asset 1's return, the magnitude of asset 2's return is expected to increase by an additional 0.3 percentage points compared to non-earnings dates.



Key differences in interpretation compared to the excess returns model:

We're now looking at the magnitude of movements, not their direction. A positive relationship means that larger movements in one variable correspond to larger movements in the other, regardless of whether they're positive or negative returns.
The constant term now represents a base level of volatility, rather than a base level of return.
All coefficients (except potentially the interaction term) are typically positive in this model, as we're dealing with absolute values.
This model tells us about the relationship between the sizes of movements in the assets, not about whether they move in the same or opposite directions.

To interpret the full model:

On non-earnings dates, asset 2's expected absolute return would be:
Constant + (Beta Asset 1 * Asset 1's absolute return) + (Beta Market * Market's absolute return)
On earnings dates, asset 2's expected absolute return would be:
Constant + (Beta Asset 1 * Asset 1's absolute return) + (Beta Market * Market's absolute return) + Earnings Date Effect + (Interaction Effect * Asset 1's absolute return)

This model is particularly useful for understanding how the volatility or magnitude of movements in asset 2 relates to the volatility of asset 1 and the market, and how this relationship might change around earnings announcements.