In [None]:
# msft_notebook.ipynb (Notebook code)

# Section 1: Import libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Section 2: Load data
data = yf.download('MSFT', start='2015-01-01', end='2024-12-31')
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
data['Date'] = data.index

# Section 3: Feature Engineering
data['MA10'] = data['Close'].rolling(10).mean()
data['MA50'] = data['Close'].rolling(50).mean()
data['Target'] = data['Close'].shift(-1)
data = data.dropna()

# Section 4: Train-test split
features = ['Close', 'MA10', 'MA50']
X = data[features]
y = data['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Section 5: Train model (Linear Regression)
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Section 6: Evaluation
y_pred = lr_model.predict(X_test)
print("Linear Regression")
print("MAE:", mean_absolute_error(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
print("R2 Score:", r2_score(y_test, y_pred))

# Section 7: Save model (optional)
import joblib
joblib.dump(lr_model, 'model.pkl')

# Section 8: Visualization
plt.figure(figsize=(10,6))
plt.plot(y_test.values, label='Actual')
plt.plot(y_pred, label='Predicted')
plt.title('MSFT Stock Price Prediction')
plt.legend()
plt.show()
