In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import yfinance as yf
import datetime

# Define the stock symbol and date range for historical data
stock_symbol = 'AAPL'
start_date = '2020-01-01'
end_date = '2021-12-31'

# Fetch historical stock data using Yahoo Finance
df = yf.download(stock_symbol, start=start_date, end=end_date)

# Prepare the data
df['Date'] = df.index
df.reset_index(drop=True, inplace=True)
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = df['Date'].apply(lambda x: x.toordinal())

# Create a feature matrix (X) and target variable (y)
X = df[['Date']].values
y = df['Close'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model (you can use various metrics)
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

# Predict the stock price for a future date
future_date = datetime.date(2024, 1, 1)
future_date_ordinal = future_date.toordinal()
future_price = model.predict([[future_date_ordinal]])

print(f'Predicted stock price on {future_date}: {future_price[0]:.2f}')

[*********************100%%**********************]  1 of 1 completed
Mean Squared Error: 95.40524615606847
R-squared: 0.9144121377255523
Predicted stock price on 2024-01-01: 259.37
