In [31]:
import yfinance as yf
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [40]:
# Define Params, Collect Data

symbol = 'AAPL'
start_date = '2023-11-01'
end_date = '2023-11-23'
 
stock_data = yf.download(symbol, start_date, end_date)

stock_data


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2023-11-01,171.0,174.229996,170.119995,173.970001,173.741104,56934900
2023-11-02,175.520004,177.779999,175.460007,177.570007,177.33638,77334800
2023-11-03,174.240005,176.820007,173.350006,176.649994,176.417572,79763700
2023-11-06,176.380005,179.429993,176.210007,179.229996,178.994186,63841300
2023-11-07,179.179993,182.440002,178.970001,181.820007,181.58078,70530000
2023-11-08,182.350006,183.449997,181.589996,182.889999,182.649368,49340300
2023-11-09,182.960007,184.119995,181.809998,182.410004,182.169998,53763500
2023-11-10,183.970001,186.570007,183.529999,186.399994,186.399994,66133400
2023-11-13,185.820007,186.029999,184.210007,184.800003,184.800003,43627500
2023-11-14,187.699997,188.110001,186.300003,187.440002,187.440002,60108400


In [41]:
stock_data["delta"] = stock_data["Close"].diff()
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,delta
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-11-01,171.0,174.229996,170.119995,173.970001,173.741104,56934900,
2023-11-02,175.520004,177.779999,175.460007,177.570007,177.33638,77334800,3.600006
2023-11-03,174.240005,176.820007,173.350006,176.649994,176.417572,79763700,-0.920013
2023-11-06,176.380005,179.429993,176.210007,179.229996,178.994186,63841300,2.580002
2023-11-07,179.179993,182.440002,178.970001,181.820007,181.58078,70530000,2.590012
2023-11-08,182.350006,183.449997,181.589996,182.889999,182.649368,49340300,1.069992
2023-11-09,182.960007,184.119995,181.809998,182.410004,182.169998,53763500,-0.479996
2023-11-10,183.970001,186.570007,183.529999,186.399994,186.399994,66133400,3.98999
2023-11-13,185.820007,186.029999,184.210007,184.800003,184.800003,43627500,-1.599991
2023-11-14,187.699997,188.110001,186.300003,187.440002,187.440002,60108400,2.639999


In [43]:
# Add a Delta column, that shoes the diffeerence in closing prices day to day

stock_data = stock_data.dropna() 
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,delta
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-11-02,175.520004,177.779999,175.460007,177.570007,177.33638,77334800,3.600006
2023-11-03,174.240005,176.820007,173.350006,176.649994,176.417572,79763700,-0.920013
2023-11-06,176.380005,179.429993,176.210007,179.229996,178.994186,63841300,2.580002
2023-11-07,179.179993,182.440002,178.970001,181.820007,181.58078,70530000,2.590012
2023-11-08,182.350006,183.449997,181.589996,182.889999,182.649368,49340300,1.069992
2023-11-09,182.960007,184.119995,181.809998,182.410004,182.169998,53763500,-0.479996
2023-11-10,183.970001,186.570007,183.529999,186.399994,186.399994,66133400,3.98999
2023-11-13,185.820007,186.029999,184.210007,184.800003,184.800003,43627500,-1.599991
2023-11-14,187.699997,188.110001,186.300003,187.440002,187.440002,60108400,2.639999
2023-11-15,187.850006,189.5,187.779999,188.009995,188.009995,53790500,0.569992


In [44]:
# Add a Delta OC column, that hows the delta from Open to Close for that day

stock_data["delta OC"] = stock_data["Close"] - stock_data["Open"]
stock_data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,delta,delta OC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-11-02,175.520004,177.779999,175.460007,177.570007,177.33638,77334800,3.600006,2.050003
2023-11-03,174.240005,176.820007,173.350006,176.649994,176.417572,79763700,-0.920013,2.409988
2023-11-06,176.380005,179.429993,176.210007,179.229996,178.994186,63841300,2.580002,2.849991
2023-11-07,179.179993,182.440002,178.970001,181.820007,181.58078,70530000,2.590012,2.640015
2023-11-08,182.350006,183.449997,181.589996,182.889999,182.649368,49340300,1.069992,0.539993
2023-11-09,182.960007,184.119995,181.809998,182.410004,182.169998,53763500,-0.479996,-0.550003
2023-11-10,183.970001,186.570007,183.529999,186.399994,186.399994,66133400,3.98999,2.429993
2023-11-13,185.820007,186.029999,184.210007,184.800003,184.800003,43627500,-1.599991,-1.020004
2023-11-14,187.699997,188.110001,186.300003,187.440002,187.440002,60108400,2.639999,-0.259995
2023-11-15,187.850006,189.5,187.779999,188.009995,188.009995,53790500,0.569992,0.159988


In [45]:
# Preparing to train data

X_train = stock_data[["Open", "High", "Low", "Volume"]]
y_train = stock_data["delta"]

X_train, y_train

(                  Open        High         Low    Volume
 Date                                                    
 2023-11-02  175.520004  177.779999  175.460007  77334800
 2023-11-03  174.240005  176.820007  173.350006  79763700
 2023-11-06  176.380005  179.429993  176.210007  63841300
 2023-11-07  179.179993  182.440002  178.970001  70530000
 2023-11-08  182.350006  183.449997  181.589996  49340300
 2023-11-09  182.960007  184.119995  181.809998  53763500
 2023-11-10  183.970001  186.570007  183.529999  66133400
 2023-11-13  185.820007  186.029999  184.210007  43627500
 2023-11-14  187.699997  188.110001  186.300003  60108400
 2023-11-15  187.850006  189.500000  187.779999  53790500
 2023-11-16  189.570007  190.960007  188.649994  54412900
 2023-11-17  190.250000  190.380005  188.570007  50922700
 2023-11-20  189.889999  191.910004  189.880005  46505100
 2023-11-21  191.410004  191.520004  189.740005  38134500
 2023-11-22  191.490005  192.929993  190.830002  39617700,
 Date
 2023-1

In [46]:
# Train the model
model = LinearRegression().fit(X_train, y_train)

# Get predicted deltas 
predicted_deltas = model.predict(X_train)

# Calculate Accuracy
r_squared = r2_score(y_train, predicted_deltas)

# Compare values

result_df = pd.DataFrame({
    'Actual': y_train,
    'Predicted': predicted_deltas
})

result_df

Unnamed: 0_level_0,Actual,Predicted
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-11-02,3.600006,2.900144
2023-11-03,-0.920013,1.514104
2023-11-06,2.580002,1.36737
2023-11-07,2.590012,2.444749
2023-11-08,1.069992,0.121529
2023-11-09,-0.479996,0.027075
2023-11-10,3.98999,2.494899
2023-11-13,-1.599991,-1.194619
2023-11-14,2.639999,1.161608
2023-11-15,0.569992,2.600176


In [57]:
X_pred = stock_data.iloc[-1:][["Open", "High", "Low", "Volume"]]
X_pred
delta_pred = model.predict(X_pred)[0]
delta_pred

0.7867864182640361

In [47]:
# Plot 

fig = go.Figure()
            
fig.add_trace(go.Scatter(x=stock_data.index, y=y_train, mode='lines', name='True'))
fig.add_trace(go.Scatter(x=stock_data.index, y=predicted_deltas, mode='lines', name='Predicted', line=dict(dash='dash')))

fig.update_layout(height=800) 
fig.update_layout(font_color="Black")
fig.update_layout(title=f"Regression for {symbol} (R-squared: {r_squared:.4f})")