This notebook aims to use simple linear models to predict the returns of a specific stock given historical data.

## Get initial data

In [24]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
import plotly.graph_objects as go
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [25]:
GetFacebookInformation = yf.Ticker("META")


# put the data into a pandas dataframe
df = GetFacebookInformation.history(period="1mo")
# plot the data
fig = go.Figure(go.Scatter(x=df.index, y=df['High'], mode='markers', name='Close'))
fig.update_layout(
    title='Facebook Stock Price',
    xaxis_title='Date',
    yaxis_title='Highest Price (USD)',
    xaxis_rangeslider_visible=False
)

fig.show()

print(df.head())



                                 Open        High         Low       Close  \
Date                                                                        
2025-02-24 00:00:00-05:00  685.670024  686.659135  661.861189  667.536133   
2025-02-25 00:00:00-05:00  665.378031  667.406256  641.289475  656.915588   
2025-02-26 00:00:00-05:00  659.063697  682.402919  657.415139  673.101196   
2025-02-27 00:00:00-05:00  681.843410  688.037911  656.985520  657.654907   
2025-02-28 00:00:00-05:00  657.455076  669.034801  642.028797  667.606079   

                             Volume  Dividends  Stock Splits  
Date                                                          
2025-02-24 00:00:00-05:00  15677000        0.0           0.0  
2025-02-25 00:00:00-05:00  20579700        0.0           0.0  
2025-02-26 00:00:00-05:00  14488700        0.0           0.0  
2025-02-27 00:00:00-05:00  12500000        0.0           0.0  
2025-02-28 00:00:00-05:00  17534200        0.0           0.0  


## Perform linear regression

In [26]:

# splitting data into training and testing sets (80-20 split)
X = df.index.astype('int64').values.reshape(-1, 1)
y = df['High'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Coefficients: {model.coef_}")
print(f"Intercept: {model.intercept_}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")


Coefficients: [-4.58602256e-14]
Intercept: 80502.46300413097
Mean Squared Error (MSE): 96.07626297699703
R² Score: 0.9351395318423101


## Predict results

In [None]:
df['Predicted_High'] = model.predict(X)
fig = go.Figure()

#  actual high prices
fig.add_trace(go.Scatter(
    x=df.index,
    y=df['High'],
    mode='lines',
    name='Actual High'
))

# predicted high prices
fig.add_trace(go.Scatter(
    x=df.index,
    y=df['Predicted_High'],
    mode='lines',
    name='Predicted High'
))

fig.update_layout(
    title='Linear Regression: Actual vs Predicted High',
    xaxis_title='Date',
    yaxis_title='Stock High Price',
    legend_title='Legend'
)

fig.show()

In [None]:
last_day = df.index[-1]
next_day = last_day + pd.Timedelta(days=1)

X_next = np.array([next_day.value]).reshape(-1, 1)

# Predict the next day's highest price
predicted_high = model.predict(X_next)
print(f"Predicted high for {next_day.date()}: {predicted_high[0]}")


Predicted high for 2025-03-22: 585.7000482190051
