# Stock Market Analysis and Forecasting

This notebook demonstrates how to use the stock forecasting pipeline for analyzing and predicting stock prices.

In [None]:
# Import necessary libraries
import os
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Add project root to path for imports
sys.path.append('..')

# Import project modules
from src.data.data_loader import StockDataLoader
from src.data.feature_engineering import FeatureEngineer
from src.models.model_training import ModelTrainer
from src.models.prediction import StockPredictor
from src.visualization.visualize import StockVisualizer
from src.pipeline.pipeline import StockForecastingPipeline

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_context("talk")

# Ensure all plots are displayed in the notebook
%matplotlib inline

## 1. Load and Explore Stock Data

First, let's load historical stock data for a few companies and explore it.

In [None]:
# Initialize the data loader
data_loader = StockDataLoader(output_dir='../data/raw')

# Define the tickers and date range
tickers = ['AAPL', 'MSFT', 'GOOGL']
start_date = '2022-01-01'
end_date = '2023-12-31'

# Download data
stock_data = data_loader.download_multiple_stocks(tickers, start_date, end_date)

# Show the first few rows of Apple stock data
stock_data['AAPL'].head()

In [None]:
# Let's visualize the stock price history
visualizer = StockVisualizer(output_dir='../visualization')

# Plot Apple's stock price history
fig, axes = visualizer.plot_stock_price(stock_data['AAPL'], 'AAPL', save=False)
plt.show()

## 2. Feature Engineering

Now, let's engineer features that will be useful for predicting stock prices.

In [None]:
# Initialize feature engineer
feature_engineer = FeatureEngineer(input_dir='../data/raw', output_dir='../data/features')

# Let's add features to Apple stock data
apple_data = stock_data['AAPL'].copy()

# Add time-based features
apple_features = feature_engineer.add_time_features(apple_data)

# Add technical indicators
apple_features = feature_engineer.add_technical_indicators(apple_features)

# Add target variables (5-day future price)
apple_features = feature_engineer.add_target_variables(apple_features, forecast_horizon=5)

# Show the engineered features
apple_features.head()

In [None]:
# Visualize some technical indicators
indicators = ['SMA_20', 'SMA_50', 'BB_upper', 'BB_lower']
fig, ax = visualizer.plot_technical_indicators(apple_features, 'AAPL', indicators=indicators, save=False)
plt.show()

## 3. Model Training

Let's train a model to predict future stock prices.

In [None]:
# Initialize model trainer
model_trainer = ModelTrainer(data_dir='../data/features', models_dir='../models')

# Define target column (5-day future price)
target_col = 'future_price_5d'

# Prepare data for training
X_train, X_test, y_train, y_test, scaler, feature_cols = model_trainer.prepare_data(
    apple_features, target_col, test_size=0.2, time_series_split=True
)

# Train a random forest model
model = model_trainer.train_model(X_train, y_train, model_type='random_forest', n_estimators=100)

# Evaluate the model
metrics = model_trainer.evaluate_model(model, X_test, y_test)

print(f"Model evaluation metrics:
{metrics}")

## 4. Making Predictions

Now that we have a trained model, let's use it to make predictions.

In [None]:
# Create a model package for the StockPredictor
model_package = {
    'model': model,
    'scaler': scaler,
    'feature_cols': feature_cols,
    'ticker': 'AAPL',
    'target_col': target_col,
    'model_type': 'random_forest'
}

# Initialize predictor
predictor = StockPredictor(models_dir='../models', output_dir='../data/predictions')

# Make predictions
predictions = predictor.predict(model_package, apple_features)

# Add predictions to DataFrame for visualization
result_df = pd.DataFrame(index=apple_features.index)
result_df['actual_price'] = apple_features['Close']
result_df['predicted_price'] = predictions

# Visualize predictions
fig, ax = visualizer.plot_predictions(result_df, 'AAPL', days_ahead=5, save=False)
plt.show()

## 5. Feature Importance

Let's examine which features are most important for our model.

In [None]:
# Plot feature importance
fig, ax = visualizer.plot_feature_importance(model_package, top_n=15, save=False)
plt.show()

## 6. Running the Complete Pipeline

Now, let's use the complete pipeline to process multiple stocks.

In [None]:
# Initialize the pipeline
pipeline = StockForecastingPipeline(
    tickers=['AAPL', 'MSFT', 'GOOGL'],
    start_date='2022-01-01',
    end_date='2023-12-31',
    forecast_horizon=5,
    model_type='random_forest'
)

# Run the pipeline (uncomment to execute - this will take some time)
# results = pipeline.run_pipeline()

# After running the pipeline, you can generate a performance report
# report = pipeline.generate_report(results['predictions'])
# print(f"Performance Report:
{report}")