In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import yfinance as yf
import mplfinance as mpf
import joblib

In [2]:
start_date='2014-01-01'
end_date='2024-12-21'

stock='GOOG'

data=yf.download(stock,start_date,end_date)

[*********************100%%**********************]  1 of 1 completed


In [3]:
data['Open-Close']=data['Open']-data['Close']
data['High-Low']=data['High']-data['Low']
data=data.dropna()

In [4]:
X=data[['Open-Close','High-Low']]
X.tail()

Unnamed: 0_level_0,Open-Close,High-Low
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-12-16,-3.794998,6.529999
2024-12-17,1.410004,6.190002
2024-12-18,6.680008,9.410004
2024-12-19,3.580002,5.080002
2024-12-20,-5.950012,7.764999


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [6]:
# Prepare features and target
data['Target'] = data['Close'].shift(-1)  # Next day's closing price
data = data.dropna()

features = ['Open', 'High', 'Low', 'Close', 'Volume']
X = data[features]
y = data['Target']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
model = LinearRegression()
model.fit(X_train, y_train)


In [8]:
pred=model.predict(X_test)

In [9]:
from sklearn.metrics import mean_squared_error, r2_score
mse = mean_squared_error(y_test, pred)
r2 = r2_score(y_test, pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

Mean Squared Error: 2.697094956125411
R-squared Score: 0.9987349529238009


In [10]:
last_data = data[features].iloc[-1].values.reshape(1, -1)
next_day_prediction = model.predict(last_data)
print(f"Predicted price for next day: {next_day_prediction[0]}")

Predicted price for next day: 190.02538309576158




In [11]:
last_5_days = data.tail(5)
last_5_days_features = last_5_days[features]
last_5_days_predictions = model.predict(last_5_days_features)

comparison_df = pd.DataFrame({
    'Date': last_5_days.index.date,  # Convert to date to avoid dimensionality issues
    'Actual Price': last_5_days['Close'].values,  # Use .values to ensure 1D array
    'Predicted Price': last_5_days_predictions
})
print(comparison_df)

         Date  Actual Price  Predicted Price
0  2024-12-13    191.380005       191.677979
1  2024-12-16    198.160004       198.158108
2  2024-12-17    197.119995       197.430899
3  2024-12-18    190.149994       190.488850
4  2024-12-19    189.699997       190.025383


In [12]:
joblib.dump(model, 'stock_prediction_model.pkl')

['stock_prediction_model.pkl']