**IMPORTING LIBRARIES**

![](http://www.mundocuentas.com/img-servicios/yahoo.png)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# LOADING DATA

In [None]:
df = pd.read_csv("/kaggle/input/time-series-forecasting-with-yahoo-stock-price/yahoo_stock.csv")
df.head()

In [None]:
df.isnull().sum()

In [None]:
df.shape

In [None]:
df.describe()

# DATA VISUALIZATION

In [None]:
sns.kdeplot(data = df[['High', 'Low', 'Open', 'Close']])

In [None]:
plt.figure(figsize=(12,7))
sns.histplot(data=df, x='Open', kde=True)

# DISTRIBUTION OF YAHOO STOCK CLOSING PRICE

In [None]:
plt.figure(figsize=(12,7))
sns.histplot(data=df, x='Close', bins=20, kde=True)
plt.xlabel('Closeing Price')
plt.ylabel('Frequency')
plt.title('Distribution of Yahoo Stock Closing Price')
plt.show()

# DISTRIBUTION OF YAHOO STOCK CLOSING PRICE BY YEAR

In [None]:
df['Year'] = pd.to_datetime(df['Date']).dt.year
plt.figure(figsize=(12,7))
sns.boxplot(data=df, x='Year', y='Close')
plt.xlabel('Year')
plt.ylabel('Closeing Price')
plt.title('Distribution of Yahoo Stock Closing Price by Year')

plt.show()

# Split the Data Into Features (X) and Target Variable (y)

In [None]:
x = df[['High', 'Low', 'Open', 'Volume']].values
y = df['Close'].values

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

# Visualization the Linear Regression Predictions for 'Close' Values

In [None]:
plt.figure(figsize=(12, 7))
plt.style.use('dark_background')
plt.title('Linear Regression', color="green")
plt.scatter(y_pred, y_test, color="#FFC0CB")
plt.scatter(y_test, y_test, color="red")
plt.plot(y_test, y_test, color="yellow")
plt.legend(["Predicted_Close", "Actual_Close", "Regression Line"], loc="lower right", facecolor='green', labelcolor='white')

plt.xlabel('Predicted Close Price')
plt.ylabel('Actual Close Price')

plt.show()

In [None]:
plt.figure(figsize=(12, 7))
plt.plot(y_test, color='red', label='Actual')
plt.plot(y_pred, color='white', label='Predicted')
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.title('Linear Regresion Actual vs Predicted(Close Price)')
plt.legend()

plt.show()

# Model Evaluation

In [None]:
#Calculate MSE
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error(MSE):", mse)

#Calculate RMSE
rmse = np.sqrt(mse)
print("Root Mean Squared Error(RMSE):", rmse)

#Calculate R2 Score
r2 = r2_score(y_test, y_pred)
print("R-Squared Score:", r2)

# Saving The Prediction Data as CSV file

In [None]:
#created df w/the predicted values
prediction_df = pd.DataFrame({'Predicted': y_pred})

#save df as a csv file
prediction_df.to_csv('Prediction.csv', index=False)

print("CSV file 'Prediction.csv' successfully saved!!!")

# THANK YOU, GIVE ME FEEDBACK