# Fetch Stock Price Data by `yfinanace`

In [35]:
import yfinance as yf

# Download historical data for Apple (AAPL)

data = yf.download('AAPL', start='2020-01-01', end='2024-12-31')

data.head()



YF.download() has changed argument auto_adjust default to True

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-02,72.620827,72.681274,71.373203,71.627077,135480400
2020-01-03,71.91481,72.676439,71.68995,71.84711,146322800
2020-01-06,72.487846,72.526533,70.783248,71.034709,118387200
2020-01-07,72.14695,72.753831,71.926922,72.497537,108872000
2020-01-08,73.30751,73.609745,71.849533,71.849533,132079200


In [36]:
data.shape

(1257, 5)

# Prepare Features and Target

- Target:
    - Open, High, Low and Volumn
- Features:
    - Next Day

In [37]:
data.isnull().sum()

Price   Ticker
Close   AAPL      0
High    AAPL      0
Low     AAPL      0
Open    AAPL      0
Volume  AAPL      0
dtype: int64

In [38]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1257 entries, 2020-01-02 to 2024-12-30
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   (Close, AAPL)   1257 non-null   float64
 1   (High, AAPL)    1257 non-null   float64
 2   (Low, AAPL)     1257 non-null   float64
 3   (Open, AAPL)    1257 non-null   float64
 4   (Volume, AAPL)  1257 non-null   int64  
dtypes: float64(4), int64(1)
memory usage: 58.9 KB


In [39]:
data['Target'] = data['Close'].shift(-1)  # next day's Close
data.dropna(inplace=True)

X = data[['Open', 'High', 'Low', 'Volume']]
y = data['Target']


In [40]:
X.shape, y.shape

((1256, 4), (1256,))

# Train-Test Split

In [41]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Model

In [42]:
from sklearn.linear_model import LinearRegression

model = LinearRegression(
    fit_intercept=True,
    copy_X=True,
    n_jobs=None
)
model.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


# Predict and Evaluate

In [43]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

predictions = model.predict(X_test)

mse = mean_squared_error(y_test, predictions)
mae = mean_absolute_error(y_test, predictions)

print(f'MSE: {mse:.2f}')
print(f'MAE: {mae:.2f}')


MSE: 7.39
MAE: 2.03


# Visualization Prediction vs Actual

In [44]:
import plotly.graph_objects as go
index = list(range(len(y_test)))

fig = go.Figure()

# Actual
fig.add_trace(go.Scatter(
    x=index,
    y=y_test.values,
    mode='lines',
    name='Actual',
    line=dict(color='blue')
))

# Predicted
fig.add_trace(go.Scatter(
    x=index,
    y=predictions,
    mode='lines',
    name='Predicted',
    line=dict(color='orange')
))

fig.update_layout(
    title='Actual vs Predicted Closing Prices',
    xaxis_title='Time',
    yaxis_title='Price',
    hovermode='x unified',
    template='plotly_white',
    width=900,
    height=500
)

fig.show()
