In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Load and preprocess data
def load_and_prepare_data(file_path):
    # Load data with error handling for missing values
    df = pd.read_csv(file_path, 
                    parse_dates=['date'], 
                    na_values=['#N/A', 'N/A', 'NaN'])
    
    # Create lagged sentiment score (t-1)
    df['sentiment_lag1'] = df['sentiment_score'].shift(1)
    
    # Drop missing values
    df = df.dropna(subset=['sentiment_lag1', 'yield', 'binary'])
    
    return df

# Run regression for multiple assets
def run_regressions(df, dependent_vars):
    results = {}
    
    for asset in dependent_vars:
        # Prepare independent variables
        X = df[['sentiment_lag1', 'yield', 'binary']]
        X = sm.add_constant(X)  # Add intercept term
        
        # Prepare dependent variable
        y = df[asset]
        
        # Ensure no missing values in the regression data
        valid_data = ~y.isna() & X.notna().all(axis=1)
        X_valid = X[valid_data]
        y_valid = y[valid_data]
        
        # Run OLS regression
        model = sm.OLS(y_valid, X_valid).fit()
        results[asset] = model
        
        # Print summary
        print(f"Regression Results for {asset}:")
        print(model.summary())
        print("\n" + "="*80 + "\n")
    
    return results

# Main execution
if __name__ == "__main__":
    # Load data
    file_path = 'data/VC0.csv'
    df = load_and_prepare_data(file_path)
    
    # Define dependent variables (index and ETFs)
    dependent_vars = [
        'S&P500', 'NASDAQ', 'SPY', 'QQQ', 
        'Tech_ETF', 'Financials_ETF', 'Energy_ETF','VIX'
    ]
    
    # Run regressions
    regression_results = run_regressions(df, dependent_vars)

Regression Results for S&P500:
                            OLS Regression Results                            
Dep. Variable:                 S&P500   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.2088
Date:                Wed, 12 Mar 2025   Prob (F-statistic):              0.890
Time:                        18:21:12   Log-Likelihood:                -379.30
No. Observations:                 242   AIC:                             766.6
Df Residuals:                     238   BIC:                             780.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const        

In [2]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Load and preprocess data
def load_and_prepare_data(file_path):
    # Load data with error handling for missing values
    df = pd.read_csv(file_path, 
                    parse_dates=['date'], 
                    na_values=['#N/A', 'N/A', 'NaN'])
    
    # Create lagged sentiment score (t-1)
    df['sentiment_lag1'] = df['sentiment_score'].shift(1)
    
    # Drop missing values
    df = df.dropna(subset=['sentiment_lag1', 'yield', 'binary'])
    
    return df

# Run regression for multiple assets
def run_regressions(df, dependent_vars):
    results = {}
    
    for asset in dependent_vars:
        # Prepare independent variables
        X = df[['sentiment_lag1', 'yield', 'binary']]
        X = sm.add_constant(X)  # Add intercept term
        
        # Prepare dependent variable
        y = df[asset]
        
        # Ensure no missing values in the regression data
        valid_data = ~y.isna() & X.notna().all(axis=1)
        X_valid = X[valid_data]
        y_valid = y[valid_data]
        
        # Run OLS regression
        model = sm.OLS(y_valid, X_valid).fit()
        results[asset] = model
        
        # Print summary
        print(f"Regression Results for {asset}:")
        print(model.summary())
        print("\n" + "="*80 + "\n")
    
    return results

# Main execution
if __name__ == "__main__":
    # Load data
    file_path = 'data/VC1.csv'
    df = load_and_prepare_data(file_path)
    
    # Define dependent variables (index and ETFs)
    dependent_vars = [
        'S&P500', 'NASDAQ', 'SPY', 'QQQ', 
        'Tech_ETF', 'Financials_ETF', 'Energy_ETF','VIX'
    ]
    
    # Run regressions
    regression_results = run_regressions(df, dependent_vars)

Regression Results for S&P500:
                            OLS Regression Results                            
Dep. Variable:                 S&P500   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.2417
Date:                Wed, 12 Mar 2025   Prob (F-statistic):              0.867
Time:                        18:21:22   Log-Likelihood:                -377.99
No. Observations:                 241   AIC:                             764.0
Df Residuals:                     237   BIC:                             777.9
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const        

In [3]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Load and preprocess data
def load_and_prepare_data(file_path):
    # Load data with error handling for missing values
    df = pd.read_csv(file_path, 
                    parse_dates=['date'], 
                    na_values=['#N/A', 'N/A', 'NaN'])
    
    # Create lagged sentiment score (t-1)
    df['sentiment_lag1'] = df['sentiment_score'].shift(1)
    
    # Drop missing values
    df = df.dropna(subset=['sentiment_lag1', 'yield', 'binary'])
    
    return df

# Run regression for multiple assets
def run_regressions(df, dependent_vars):
    results = {}
    
    for asset in dependent_vars:
        # Prepare independent variables
        X = df[['sentiment_lag1', 'yield', 'binary']]
        X = sm.add_constant(X)  # Add intercept term
        
        # Prepare dependent variable
        y = df[asset]
        
        # Ensure no missing values in the regression data
        valid_data = ~y.isna() & X.notna().all(axis=1)
        X_valid = X[valid_data]
        y_valid = y[valid_data]
        
        # Run OLS regression
        model = sm.OLS(y_valid, X_valid).fit()
        results[asset] = model
        
        # Print summary
        print(f"Regression Results for {asset}:")
        print(model.summary())
        print("\n" + "="*80 + "\n")
    
    return results

# Main execution
if __name__ == "__main__":
    # Load data
    file_path = 'data/FC0.csv'
    df = load_and_prepare_data(file_path)
    
    # Define dependent variables (index and ETFs)
    dependent_vars = [
        'S&P500', 'NASDAQ', 'SPY', 'QQQ', 
        'Tech_ETF', 'Financials_ETF', 'Energy_ETF','VIX'
    ]
    
    # Run regressions
    regression_results = run_regressions(df, dependent_vars)

Regression Results for S&P500:
                            OLS Regression Results                            
Dep. Variable:                 S&P500   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                 -0.010
Method:                 Least Squares   F-statistic:                    0.2094
Date:                Wed, 12 Mar 2025   Prob (F-statistic):              0.890
Time:                        18:21:28   Log-Likelihood:                -379.30
No. Observations:                 242   AIC:                             766.6
Df Residuals:                     238   BIC:                             780.6
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const        

In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

# Load and preprocess data
def load_and_prepare_data(file_path):
    # Load data with error handling for missing values
    df = pd.read_csv(file_path, 
                    parse_dates=['date'], 
                    na_values=['#N/A', 'N/A', 'NaN'])
    
    # Create lagged sentiment score (t-1)
    df['sentiment_lag1'] = df['sentiment_score'].shift(1)
    
    # Drop missing values
    df = df.dropna(subset=['sentiment_lag1', 'yield', 'binary'])
    
    return df

# Run regression for multiple assets
def run_regressions(df, dependent_vars):
    results = {}
    
    for asset in dependent_vars:
        # Prepare independent variables
        X = df[['sentiment_lag1', 'yield', 'binary']]
        X = sm.add_constant(X)  # Add intercept term
        
        # Prepare dependent variable
        y = df[asset]
        
        # Ensure no missing values in the regression data
        valid_data = ~y.isna() & X.notna().all(axis=1)
        X_valid = X[valid_data]
        y_valid = y[valid_data]
        
        # Run OLS regression
        model = sm.OLS(y_valid, X_valid).fit()
        results[asset] = model
        
        # Print summary
        print(f"Regression Results for {asset}:")
        print(model.summary())
        print("\n" + "="*80 + "\n")
    
    return results

# Main execution
if __name__ == "__main__":
    # Load data
    file_path = 'data/FC1.csv'
    df = load_and_prepare_data(file_path)
    
    # Define dependent variables (index and ETFs)
    dependent_vars = [
        'S&P500', 'NASDAQ', 'SPY', 'QQQ', 
        'Tech_ETF', 'Financials_ETF', 'Energy_ETF','VIX'
    ]
    
    # Run regressions
    regression_results = run_regressions(df, dependent_vars)

Regression Results for S&P500:
                            OLS Regression Results                            
Dep. Variable:                 S&P500   R-squared:                       0.006
Model:                            OLS   Adj. R-squared:                 -0.007
Method:                 Least Squares   F-statistic:                    0.4523
Date:                Wed, 12 Mar 2025   Prob (F-statistic):              0.716
Time:                        18:21:35   Log-Likelihood:                -378.93
No. Observations:                 242   AIC:                             765.9
Df Residuals:                     238   BIC:                             779.8
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const        