In [9]:
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# List of stock symbols you want to analyze
stock_symbols = ['AAPL', 'MSFT', 'GOOGL', 'TSLA']

results = []

for symbol in stock_symbols:
    # Fetch historical stock data
    stock_data = yf.download(symbol, start="2022-01-01", end="2023-11-01")

    # Calculate MACD manually
    stock_data['EMA_12'] = stock_data['Close'].ewm(span=12, adjust=False).mean()
    stock_data['EMA_26'] = stock_data['Close'].ewm(span=26, adjust=False).mean()
    stock_data['MACD'] = stock_data['EMA_12'] - stock_data['EMA_26']
    stock_data['Signal'] = stock_data['MACD'].ewm(span=9, adjust=False).mean()
    stock_data['Histogram'] = stock_data['MACD'] - stock_data['Signal']

    # Feature engineering
    stock_data['Next_Day_Price'] = stock_data['Close'].shift(-1)
    stock_data['Next_Day_Price'].fillna(stock_data['Next_Day_Price'].mean(), inplace=True)


    X = stock_data[['MACD']]  # Use 'MACD' as a feature
    y = stock_data['Next_Day_Price']

    # Create and train the linear regression model
    model = LinearRegression()
    model.fit(X, y)

    # Calculate R-squared
    r_squared = r2_score(y, model.predict(X))

    results.append({'Stock Symbol': symbol, 'R-squared': r_squared})

# Sort the results based on R-squared values
sorted_results = sorted(results, key=lambda x: x['R-squared'], reverse=True)

# Display the stocks with the highest R-squared values
for result in sorted_results:
    print(f"{result['Stock Symbol']} R^2: {result['R-squared']}")


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
AAPL R^2: 0.2080137795711393
MSFT R^2: 0.12436646439357735
TSLA R^2: 0.07591790538186327
GOOGL R^2: 0.06898581110383628
