In [1]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load and merge datasets
tesla_data = pd.read_csv('Clean_data/Cleaned_Tesla_Close.csv', parse_dates=['Date'], index_col='Date')
sp500_data = pd.read_csv('Clean_data/Cleaned_SP500_Close.csv', parse_dates=['Date'], index_col='Date')
ixic_data = pd.read_csv('Clean_data/Cleaned_IXIC_Close.csv', parse_dates=['Date'], index_col='Date')

# Merge datasets on the Date index
merged_data = tesla_data.merge(sp500_data, left_index=True, right_index=True, suffixes=('', '_sp500'))
merged_data = merged_data.merge(ixic_data, left_index=True, right_index=True, suffixes=('', '_ixic'))

# Step 2: Preprocess data
# Select relevant columns
data = merged_data[['Close', 'Close_sp500', 'Close_ixic']]

# Normalize the data using MinMaxScaler
scaler = MinMaxScaler()
normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)

# Step 3: Define the ANOVA model
# Formula: 'Tesla stock price ~ S&P 500 + NASDAQ'
model = ols('Close ~ Close_sp500 + Close_ixic', data=normalized_data).fit()

# Step 4: Perform ANOVA
anova_table = sm.stats.anova_lm(model, typ=2)  # Type II ANOVA
print("\n--- ANOVA Results ---")
print(anova_table)

# Step 5: Interpretation
print("\n--- Interpretation ---")
if anova_table['PR(>F)']['Close_sp500'] < 0.05:
    print("S&P 500 has a statistically significant impact on Tesla stock prices (p-value = {:.4f}).".format(anova_table['PR(>F)']['Close_sp500']))
else:
    print("S&P 500 does not have a statistically significant impact on Tesla stock prices (p-value = {:.4f}).".format(anova_table['PR(>F)']['Close_sp500']))

if anova_table['PR(>F)']['Close_ixic'] < 0.05:
    print("NASDAQ has a statistically significant impact on Tesla stock prices (p-value = {:.4f}).".format(anova_table['PR(>F)']['Close_ixic']))
else:
    print("NASDAQ does not have a statistically significant impact on Tesla stock prices (p-value = {:.4f}).".format(anova_table['PR(>F)']['Close_ixic']))



--- ANOVA Results ---
                sum_sq      df           F        PR(>F)
Close_sp500   0.576258     1.0   22.921611  1.858033e-06
Close_ixic    6.543858     1.0  260.292599  5.003956e-54
Residual     36.855812  1466.0         NaN           NaN

--- Interpretation ---
S&P 500 has a statistically significant impact on Tesla stock prices (p-value = 0.0000).
NASDAQ has a statistically significant impact on Tesla stock prices (p-value = 0.0000).
