In [19]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import yfinance as yf
import mplfinance as mpf
import joblib

In [2]:
start_date='2014-01-01'
end_date='2024-12-21'

stock='GOOG'

data=yf.download(stock,start_date,end_date)

[*********************100%%**********************]  1 of 1 completed


In [5]:
data['Open-Close']=data['Open']-data['Close']
data['High-Low']=data['High']-data['Low']
data=data.dropna()

In [6]:
X=data[['Open-Close','High-Low']]
X.tail()

Unnamed: 0_level_0,Open-Close,High-Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-06-26,-1.169998,1.939987
2024-06-27,-1.214996,2.050003
2024-06-28,2.300003,3.255005
2024-07-01,-0.01001,2.610001
2024-07-02,-3.139999,3.889999


In [9]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [10]:
# Prepare features and target
data['Target'] = data['Close'].shift(-1)  # Next day's closing price
data = data.dropna()

features = ['Open', 'High', 'Low', 'Close', 'Volume']
X = data[features]
y = data['Target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
model = LinearRegression()
model.fit(X_train, y_train)


In [12]:
pred=model.predict(X_test)

In [13]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, pred)
r2 = r2_score(y_test, pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

Mean Squared Error: 3.0757075608965363
R-squared Score: 0.9981456005326805


In [14]:
last_data = data[features].iloc[-1].values.reshape(1, -1)
next_day_prediction = model.predict(last_data)
print(f"Predicted price for next day: {next_day_prediction[0]}")

Predicted price for next day: 184.55023065088318




In [16]:
last_5_days = data.tail(5)
last_5_days_features = last_5_days[features]
last_5_days_predictions = model.predict(last_5_days_features)

comparison_df = pd.DataFrame({
    'Date': last_5_days.index.date,  # Convert to date to avoid dimensionality issues
    'Actual Price': last_5_days['Close'].values,  # Use .values to ensure 1D array
    'Predicted Price': last_5_days_predictions
})
print(comparison_df)

         Date  Actual Price  Predicted Price
0  2024-06-25    185.580002       185.366839
1  2024-06-26    185.369995       185.416678
2  2024-06-27    186.860001       186.913067
3  2024-06-28    183.419998       183.825327
4  2024-07-01    184.490005       184.550231


In [20]:
joblib.dump(model, 'stock_prediction_model.pkl')

['stock_prediction_model.pkl']