In [26]:
import torch
import os
import json
import utils
import sys
import model.net as net  # Ensure this matches your model definition
import numpy as np
import pandas as pd
from pandas.tseries.offsets import BDay 

from scipy import stats
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.dates as mdates
# matplotlib.use("TkAgg")  # Use a GUI-compatible backend
matplotlib.use('Agg')  # replace TkAgg as Agg

## Prediction Graph

In [22]:
# Load trained model
current_dir = os.getcwd()

model_dir = os.path.join("experiments", "base_model")
params = utils.Params(os.path.join(model_dir, "params.json"))
params.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = net.Net(params).to(params.device)
utils.load_checkpoint(os.path.join(model_dir, "best.pth.tar"), model) # test a bunch of model
model.eval()

# Load dataset
data_path = os.path.join("..", "data", "stock", "amzn_stock_wsenti.csv") #  load data here
data = pd.read_csv(data_path, parse_dates=['Date'])
data['Date'] = pd.to_datetime(data['Date'])
data.set_index("Date", inplace=True)


forecast_start = "2025-02-18"  # date started to predict
train_window = 30
num_covariates = 9
future_steps = 5  # predict number of days

# Load the full dataset to get the trading calendar
data_path = os.path.join("..", "data", "stock", "amzn_stock_wsenti.csv")
full_data = pd.read_csv(data_path, parse_dates=['Date'])
full_data['Date'] = pd.to_datetime(full_data['Date'])
full_data.set_index("Date", inplace=True)

# Get the training data
last_30_days = full_data.loc[:forecast_start].iloc[-train_window:]
last_30_days.fillna(method='ffill', inplace=True)
price_data = last_30_days[['High', 'Low', 'Open', 'Close', 'Volume', 'Sentiment_Score', 'Daily Return']]

# Generate covariates (same as before)
def gen_covariates(times, price_data, num_covariates):
    covariates = np.zeros((len(times), num_covariates))
    covariates[:, 0] = stats.zscore([t.weekday() for t in times])
    covariates[:, 1] = stats.zscore([t.month for t in times])
    covariates[:, 2] = stats.zscore(price_data['Close'].shift(5).values)
    covariates[:, 3] = stats.zscore(price_data['Volume'].shift(5).values)
    intraday_return = (price_data['Close'] - price_data['Open']) / price_data['Open']
    covariates[:, 4] = stats.zscore(intraday_return.shift(5).values)
    ma5 = price_data['Close'].rolling(window=5).mean()
    covariates[:, 5] = stats.zscore((price_data['Close'] - ma5).values)
    exp1 = price_data['Close'].ewm(span=12, adjust=False).mean()
    exp2 = price_data['Close'].ewm(span=26, adjust=False).mean()
    macd = exp1 - exp2
    covariates[:, 6] = stats.zscore(macd.shift(2).values)
    volatility = (price_data['High'] - price_data['Low']) / price_data['Close']
    covariates[:, 7] = stats.zscore(volatility.values)
    covariates[:, 8] = stats.zscore(price_data['Sentiment_Score'].shift(5).values)
    return np.nan_to_num(covariates)

covariates = gen_covariates(last_30_days.index, price_data, num_covariates)

# Prepare initial input tensor
x_input = np.zeros((1, train_window, 1 + num_covariates), dtype='float32')
x_input[0, 1:, 0] = last_30_days['Daily Return'].values[1:]
x_input[0, :, 1:1 + num_covariates] = covariates[-train_window:, :]
new_input_tensor = torch.tensor(x_input, dtype=torch.float32).permute(1, 0, 2).to(params.device)

# Generate future trading days
start_date = pd.to_datetime(forecast_start)
# Use business days offset to generate trading days
future_trading_days = pd.date_range(start=start_date, periods=future_steps, freq=BDay())

# Predict for trading days
batch_size = new_input_tensor.shape[1]
hidden = model.init_hidden(batch_size)
cell = model.init_cell(batch_size)
idx = torch.zeros(1, batch_size, dtype=torch.long, device=params.device)
predictions = []

for _ in range(future_steps):
    mu, sigma, hidden, cell = model(new_input_tensor[-1].unsqueeze_(0), idx, hidden, cell)
    next_value = mu.cpu().detach().numpy().squeeze()
    predictions.append(next_value)
    new_input = np.roll(new_input_tensor.cpu().numpy(), shift=-1, axis=0)
    new_input[-1, 0, 0] = next_value
    new_input_tensor = torch.tensor(new_input, dtype=torch.float32).to(params.device)

# Convert returns to stock prices
last_price = last_30_days['Close'].iloc[-1]
predicted_prices = [last_price]
for ret in predictions:
    next_price = predicted_prices[-1] * (1 + ret)
    predicted_prices.append(next_price)
predicted_prices = predicted_prices[1:]  # Remove the initial price

# Plot predictions
plt.figure(figsize=(12, 6))
plt.plot(future_trading_days, predicted_prices, marker='o', linestyle='-', label="Predicted Prices")
plt.axvline(x=pd.to_datetime(forecast_start), color='red', linestyle='--', label='Prediction Start')
plt.title("Predicted Stock Prices (Trading Days Only)")
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.xticks(rotation=45)
plt.legend()
plt.grid()

figures_dir = os.path.join(model_dir, "figures")
os.makedirs(figures_dir, exist_ok=True)
plot_path = os.path.join(figures_dir, "amzn_predict_trading_days.png")
plt.savefig(plot_path)
print(f"Plot saved to {plot_path}")

  checkpoint = torch.load(checkpoint, map_location='cpu')
  last_30_days.fillna(method='ffill', inplace=True)


Plot saved to experiments/base_model/figures/amzn_predict_trading_days.png


## Comparison Graph

In [42]:
# Setup paths
capstone_dir = os.path.join(os.path.expanduser("~"), "CAPSTONE-stockreturn")
model_dir = os.path.join(capstone_dir, "DeepAR", "experiments", "cvs_base_model")
data_path = os.path.join(capstone_dir, "data", "stock", "cvs_stock_wsenti.csv")

# Load the dataset
data = pd.read_csv(data_path, parse_dates=['Date'])
data['Date'] = pd.to_datetime(data['Date'])
data.set_index("Date", inplace=True)

# Parameters for prediction
forecast_start = "2025-02-18"  # date started to predict
end_date = "2025-02-24"
train_window = 30
num_covariates = 9
future_steps = 5  # predict number of days

# Load model (assuming utils and net modules are in the same directory)
sys.path.append(os.path.join(capstone_dir, "DeepAR"))

params = utils.Params(os.path.join(model_dir, "params.json"))
params.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = net.Net(params).to(params.device)
utils.load_checkpoint(os.path.join(model_dir, "epoch_14.pth.tar"), model)
model.eval()

# Get the training data
last_30_days = data.loc[:forecast_start].iloc[-train_window:]
last_30_days.fillna(method='ffill', inplace=True)
price_data = last_30_days[['High', 'Low', 'Open', 'Close', 'Volume', 'Sentiment_Score', 'Daily Return']]

# Generate covariates
def gen_covariates(times, price_data, num_covariates):
    covariates = np.zeros((len(times), num_covariates))
    covariates[:, 0] = stats.zscore([t.weekday() for t in times])
    covariates[:, 1] = stats.zscore([t.month for t in times])
    covariates[:, 2] = stats.zscore(price_data['Close'].shift(5).values)
    covariates[:, 3] = stats.zscore(price_data['Volume'].shift(5).values)
    intraday_return = (price_data['Close'] - price_data['Open']) / price_data['Open']
    covariates[:, 4] = stats.zscore(intraday_return.shift(5).values)
    ma5 = price_data['Close'].rolling(window=5).mean()
    covariates[:, 5] = stats.zscore((price_data['Close'] - ma5).values)
    exp1 = price_data['Close'].ewm(span=12, adjust=False).mean()
    exp2 = price_data['Close'].ewm(span=26, adjust=False).mean()
    macd = exp1 - exp2
    covariates[:, 6] = stats.zscore(macd.shift(2).values)
    volatility = (price_data['High'] - price_data['Low']) / price_data['Close']
    covariates[:, 7] = stats.zscore(volatility.values)
    covariates[:, 8] = stats.zscore(price_data['Sentiment_Score'].shift(5).values)
    return np.nan_to_num(covariates)

covariates = gen_covariates(last_30_days.index, price_data, num_covariates)

# Prepare initial input tensor
x_input = np.zeros((1, train_window, 1 + num_covariates), dtype='float32')
x_input[0, 1:, 0] = last_30_days['Daily Return'].values[1:]
x_input[0, :, 1:1 + num_covariates] = covariates[-train_window:, :]
new_input_tensor = torch.tensor(x_input, dtype=torch.float32).permute(1, 0, 2).to(params.device)

# Generate future trading days
start_date = pd.to_datetime(forecast_start)
future_trading_days = pd.date_range(start=start_date, periods=future_steps, freq=BDay())

# Predict for trading days
batch_size = new_input_tensor.shape[1]
hidden = model.init_hidden(batch_size)
cell = model.init_cell(batch_size)
idx = torch.zeros(1, batch_size, dtype=torch.long, device=params.device)
predictions = []

for _ in range(future_steps):
    mu, sigma, hidden, cell = model(new_input_tensor[-1].unsqueeze(0), idx, hidden, cell)
    next_value = mu.cpu().detach().numpy().squeeze()
    predictions.append(next_value)
    new_input = np.roll(new_input_tensor.cpu().numpy(), shift=-1, axis=0)
    new_input[-1, 0, 0] = next_value
    new_input_tensor = torch.tensor(new_input, dtype=torch.float32).to(params.device)

# Convert returns to stock prices
last_price = last_30_days['Close'].iloc[-1]
predicted_prices = [last_price]
for ret in predictions:
    next_price = predicted_prices[-1] * (1 + ret)
    predicted_prices.append(next_price)
predicted_prices = predicted_prices[1:]  # Remove the initial price

# Get actual data for comparison
actual_data = data.reset_index()
actual_data = actual_data[(actual_data["Date"] >= forecast_start) & (actual_data["Date"] <= end_date)]

# Create the combined plot
plt.figure(figsize=(12, 6))

# Plot predictions
plt.plot(future_trading_days, predicted_prices, marker='o', linestyle='-', color='blue', label="Predicted Prices")

# Plot actual prices
plt.plot(actual_data["Date"], actual_data["Close"], marker='x', linestyle='--', color='green', label="Actual Prices")

# Add vertical line at prediction start
plt.axvline(x=pd.to_datetime(forecast_start), color='red', linestyle='--', label='Prediction Start')

# Format the plot
plt.title("CVS Stock: Predicted vs Actual Prices")
plt.xlabel("Date")
plt.ylabel("Stock Price")
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m-%d'))
plt.gca().xaxis.set_major_locator(mdates.DayLocator())
plt.xticks(rotation=45)
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the figure
figures_dir = os.path.join(model_dir, "figures")
os.makedirs(figures_dir, exist_ok=True)
plot_path = os.path.join(figures_dir, "cvs_prediction_vs_actual.png")
plt.savefig(plot_path)
print(f"Comparison plot saved to {plot_path}")


# with evaluation metrics
# 确保两边的日期格式完全一致
# 方法1: 使用字符串格式进行比较
actual_data_dates_str = [d.strftime('%Y-%m-%d') for d in actual_data["Date"]]
future_days_str = [d.strftime('%Y-%m-%d') for d in future_trading_days]

# 使用字符串匹配重新计算
matching_actual = []
matching_indices = []

for i, pred_date_str in enumerate(future_days_str):
    if pred_date_str in actual_data_dates_str:
        idx = actual_data_dates_str.index(pred_date_str)
        matching_actual.append(actual_data["Close"].iloc[idx])
        matching_indices.append(i)

# 转换为numpy数组方便计算
matching_actual = np.array(matching_actual)
predicted_prices = np.array(predicted_prices)

# Basic metrics
mae = np.mean(np.abs(predicted_prices - matching_actual))
mape = np.mean(np.abs((matching_actual - predicted_prices) / matching_actual)) * 100
rmse = np.sqrt(np.mean((predicted_prices - matching_actual)**2))

# Direction accuracy
actual_direction = np.diff(matching_actual) > 0
predicted_direction = np.diff(predicted_prices) > 0
direction_accuracy = np.mean(actual_direction == predicted_direction) * 100

# Short-term vs long-term accuracy
short_term_mae = np.mean(np.abs(predicted_prices[:2] - matching_actual[:2]))
long_term_mae = np.mean(np.abs(predicted_prices[3:] - matching_actual[3:]))

# Add a text box with metrics to the plot
metrics_text = (
    f"Metrics:\n"
    f"MAE: ${mae:.2f}\n"
    f"MAPE: {mape:.2f}%\n"
    f"RMSE: ${rmse:.2f}\n"
    f"Direction Accuracy: {direction_accuracy:.1f}%\n"
    f"Short-term MAE (1-2d): ${short_term_mae:.2f}\n"
    f"Long-term MAE (4-5d): ${long_term_mae:.2f}"
)

# Position the text box in the upper right corner with some padding
plt.annotate(
    metrics_text,
    xy=(0.97, 0.97),
    xycoords='axes fraction',
    fontsize=9,
    ha='right',
    va='top',
    bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.8)
)

# Continue with your existing code
plt.legend()
plt.grid(True)
plt.tight_layout()

# Save the figure with the added metrics
figures_dir = os.path.join(model_dir, "figures")
os.makedirs(figures_dir, exist_ok=True)
plot_path = os.path.join(figures_dir, "cvs_prediction_vs_actual_with_metrics.png")
plt.savefig(plot_path)
print(f"Comparison plot with metrics saved to {plot_path}")

  checkpoint = torch.load(checkpoint, map_location='cpu')
  last_30_days.fillna(method='ffill', inplace=True)


Comparison plot saved to /home/yic075/CAPSTONE-stockreturn/DeepAR/experiments/cvs_base_model/figures/cvs_prediction_vs_actual.png
Comparison plot with metrics saved to /home/yic075/CAPSTONE-stockreturn/DeepAR/experiments/cvs_base_model/figures/cvs_prediction_vs_actual_with_metrics.png


## Actual Price Graph

In [21]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the data
current_dir = os.getcwd()
capstone_dir = os.path.join(os.path.expanduser("~"), "CAPSTONE-stockreturn")
file_path = os.path.join(capstone_dir, "data", "stock", "amzn_stock_wsenti.csv")
data = pd.read_csv(file_path, parse_dates=["Date"])

# Filter the data for the given date range
start_date = "2025-02-18"
end_date = "2025-02-24"
filtered_data = data[(data["Date"] >= start_date) & (data["Date"] <= end_date)]

# Plot the Close price
plt.figure(figsize=(10, 5))
plt.plot(filtered_data["Date"], filtered_data["Close"], marker='o', linestyle='-', label="Close Price")
plt.xlabel("Date")
plt.ylabel("Close Price")
plt.title(f"AMZN Stock Close Price from {start_date} to {end_date}")
plt.xticks(filtered_data["Date"], rotation=45)  # Ensure the labels reflect only the selected date range
plt.legend()
plt.grid()

# Save the figure instead of showing it
figures_dir = os.path.join(capstone_dir, "DeepAR", "experiments", "base_model", "figures")
os.makedirs(figures_dir, exist_ok=True) 
plot_path = os.path.join(figures_dir, "amzn_stock_plot.png")
plt.savefig(plot_path)
print(f"Plot saved to {plot_path}")

Plot saved to /home/yic075/CAPSTONE-stockreturn/DeepAR/experiments/base_model/figures/amzn_stock_plot.png
