# Overview
- Goal: Predict future stock prices based on historical data using a simple regression model
- Concepts Covered: Time series forecasting, feature extraction, regression.

# Tools/Packages
Make sure you have the following Python packages installed:
```pip install pandas numpy matplotlib scikit-learn yfinance```

## Step 1: Import Libraries
Start by importing the necessary libraries

In [1]:
# Standard imports
from datetime import datetime

# Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

## Step 2: Fetch Stock Data
Use the ```yfinance``` library to fetch historical stock data. For example, let's use Nvidia (NVDA):

In [None]:
# Fetch stock data
stock_data = yf.download("NVDA", start="2020-01-01", end="2024-12-31")
print(stock_data.head())

## Step 3: Data Preparation
Prepare the data for prediction analysis:

In [45]:
# Use the 'Close' price for predictions
stock_data['Date'] = stock_data.index
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data['Day'] = (stock_data['Date'] - stock_data['Date'].min()).dt.days

# Features and target
X = stock_data[['Day']].values # Independent variable: Days
y = stock_data['Close'].values # Dependent variable: Close prices

# Split into training and testing datasets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 4: Train the Model
Fit a simple linear regression model:

In [None]:
# Create and train the model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate model
print(f"Model R^2 Score {model.score(X_test, y_test):.2f}")

## Step 5: Make Predictions
Predict stock prices using the model:

In [None]:
# Make Predictions
future_days = np.array(range(X[-1][0] + 1, X[-1][0] + 366)).reshape(-1, 1)  # Ensure 365 predictions
predicted_prices = model.predict(future_days)

# Convert future_days to actual dates
future_dates = pd.date_range(start=stock_data['Date'].iloc[-1], periods=365)

# Plot predictions with dates on the x-axis
plt.figure(figsize=(12, 6))

# Plot historical prices
plt.plot(stock_data['Date'], stock_data['Close'], label="Historical Prices")

# Plot predicted prices
plt.plot(future_dates, predicted_prices, label="Predicted Prices", linestyle="--")

# Add labels, title, and legend
plt.xlabel("Date")
plt.ylabel("Stock Price (USD)")
plt.title("Stock Price Prediction")
plt.legend()

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
## Step 6: Feature Engineering
# Add technical indicators as new features
stock_data['MA20'] = stock_data['Close'].rolling(window=20).mean()  # 20-day moving average
stock_data['MA50'] = stock_data['Close'].rolling(window=50).mean()  # 50-day moving average

# Add price momentum
stock_data['Returns'] = stock_data['Close'].pct_change()
stock_data['Volatility'] = stock_data['Returns'].rolling(window=20).std()

# Remove NaN values from the new features
stock_data = stock_data.dropna()

# Update features for prediction
X = stock_data[['Day', 'MA20', 'MA50', 'Volatility']].values
y = stock_data['Close'].values

# Split the enhanced dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 7: Model Evaluation
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

# Train the model with new features
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on test set
y_pred = model.predict(X_test)

# Calculate error metrics
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"Mean Absolute Error: ${mae:.2f}")
print(f"Root Mean Squared Error: ${rmse:.2f}")
print(f"Mean Absolute Percentage Error: {mape:.2f}%")

In [None]:
#Step 7