In [4]:
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load data
tesla_data = pd.read_csv('Clean_data/Cleaned_Tesla_Close.csv', parse_dates=['Date'], index_col='Date')
sp500_data = pd.read_csv('Clean_data/Cleaned_SP500_Close.csv', parse_dates=['Date'], index_col='Date')
ixic_data = pd.read_csv('Clean_data/Cleaned_IXIC_Close.csv', parse_dates=['Date'], index_col='Date')

# Merge datasets
merged_data = tesla_data.merge(sp500_data, left_index=True, right_index=True, suffixes=('', '_sp500'))
merged_data = merged_data.merge(ixic_data, left_index=True, right_index=True, suffixes=('', '_ixic'))

# Step 2: Preprocess data
data = merged_data[['Close', 'Close_sp500', 'Close_ixic']]

# Normalize the data (MinMaxScaler)
scaler = MinMaxScaler()
normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns, index=data.index)

# Step 3: Define dependent and independent variables
Y = normalized_data['Close']  # Tesla stock price
X = normalized_data[['Close_sp500', 'Close_ixic']]  # S&P 500 and NASDAQ stock prices

# Add constant for intercept
X = sm.add_constant(X)

# Step 4: Fit the model
model = sm.OLS(Y, X).fit()

# Step 5: Output results
print(model.summary())

# Step 6: Interpret results
if model.pvalues['Close_sp500'] < 0.05:
    print("S&P 500 stock prices have a significant impact on Tesla stock prices.")
else:
    print("S&P 500 stock prices do not have a significant impact on Tesla stock prices.")

if model.pvalues['Close_ixic'] < 0.05:
    print("NASDAQ stock prices have a significant impact on Tesla stock prices.")
else:
    print("NASDAQ stock prices do not have a significant impact on Tesla stock prices.")


                            OLS Regression Results                            
Dep. Variable:                  Close   R-squared:                       0.614
Model:                            OLS   Adj. R-squared:                  0.614
Method:                 Least Squares   F-statistic:                     1166.
Date:                Sun, 17 Nov 2024   Prob (F-statistic):          8.10e-304
Time:                        16:42:21   Log-Likelihood:                 622.45
No. Observations:                1469   AIC:                            -1239.
Df Residuals:                    1466   BIC:                            -1223.
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const           0.0462      0.010      4.575      