In [1]:
from itertools import islice

from matplotlib import pyplot as plt
import matplotlib.dates as mdates

import torch
from gluonts.evaluation import make_evaluation_predictions, Evaluator
from gluonts.dataset.repository.datasets import get_dataset

from gluonts.dataset.pandas import PandasDataset
from gluonts.dataset.common import ListDataset
from gluonts.model.forecast import SampleForecast

import pandas as pd
import numpy as np

from lag_llama.gluon.estimator import LagLlamaEstimator
import yfinance as yf
from ts_rag import TimeSeriesRAG

from scipy.stats import skew, kurtosis
import faiss
from typing import List, Tuple, Dict



In [None]:
print(torch.cuda.is_available())

In [2]:
time_series_data = np.random.rand(20, 8).astype('float32')
print(time_series_data)

index = faiss.IndexFlatL2(8)
print(index)
index.add(time_series_data)

query = np.random.rand(1, 8).astype('float32') 
D, I = index.search(query, 5)

print('Jarak:', D)
print('Indeks:', I)

similar_time_series = time_series_data[I[0]]
print(similar_time_series)

[[0.00369009 0.48747525 0.36218342 0.28627118 0.4686708  0.47406274
  0.33767748 0.15505007]
 [0.08535599 0.6126571  0.9311839  0.0643617  0.21316479 0.6362434
  0.37396628 0.12177262]
 [0.15464704 0.00990624 0.33698612 0.41654465 0.14822309 0.53762555
  0.40876317 0.7903428 ]
 [0.40824002 0.07783359 0.03861525 0.57236826 0.7004606  0.8714922
  0.89963615 0.11670821]
 [0.6481953  0.45665488 0.9394476  0.93626153 0.579651   0.06387179
  0.27538157 0.7074047 ]
 [0.48537177 0.6829692  0.6686738  0.6175575  0.5389405  0.70927894
  0.7924568  0.29609832]
 [0.4092778  0.23745726 0.26349208 0.6909923  0.27169982 0.99399626
  0.20828663 0.9134733 ]
 [0.77403045 0.43002063 0.9089085  0.18681653 0.3867646  0.18769099
  0.03825889 0.8326541 ]
 [0.0308917  0.07205846 0.8901135  0.3127538  0.59862953 0.30868712
  0.31308222 0.40041345]
 [0.6990329  0.3040496  0.35295743 0.7534351  0.62928313 0.9106741
  0.32662997 0.08992064]
 [0.6108108  0.076092   0.01940372 0.80075675 0.7271038  0.29510275
  0.3

In [3]:
def prepare_stock_data(symbol: list, start_date: str) -> pd.DataFrame:
    df = (
        yf.Tickers(symbol)
        .history(period="max", start=start_date)
        .Close
        .resample('1d')
        .ffill()
    )
    df = df.rolling(5).mean().pct_change().dropna()
    return df

def create_gluonts_dataset(dataset):
    dataset = dataset.copy()

    for col in dataset.columns:
        if dataset[col].dtype != "object" and not pd.api.types.is_string_dtype(dataset[col]):
            dataset[col] = dataset[col].astype("float32")

    backtest_dataset = PandasDataset(dict(dataset))
    return backtest_dataset

def evaluate_predictions(actual, predictions, prediction_length):
    actual = actual[-prediction_length:]
    mse = np.mean((actual - predictions) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(actual - predictions))
    mape = np.mean(np.abs((actual - predictions) / actual)) * 100
    
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "MAPE": mape}

In [4]:
def get_lag_llama_predictions(dataset, prediction_length, device, context_length=32, use_rope_scaling=False, num_samples=100):
    ckpt = torch.load("./lag-llama-model/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.
    estimator_args = ckpt["hyper_parameters"]["model_kwargs"]

    rope_scaling_arguments = {
        "type": "linear",
        "factor": max(1.0, (context_length + prediction_length) / estimator_args["context_length"]),
    }

    estimator = LagLlamaEstimator(
        ckpt_path="./lag-llama-model/lag-llama.ckpt",
        prediction_length=prediction_length,
        context_length=context_length, # Lag-Llama was trained with a context length of 32, but can work with any context length

        # estimator args
        input_size=estimator_args["input_size"],
        n_layer=estimator_args["n_layer"],
        n_embd_per_head=estimator_args["n_embd_per_head"],
        n_head=estimator_args["n_head"],
        scaling=estimator_args["scaling"],
        time_feat=estimator_args["time_feat"],
        rope_scaling=rope_scaling_arguments if use_rope_scaling else None,

        batch_size=1,
        num_parallel_samples=100,
        device=device,
    )

    lightning_module = estimator.create_lightning_module()
    transformation = estimator.create_transformation()
    predictor = estimator.create_predictor(transformation, lightning_module)

    forecast_it, ts_it = make_evaluation_predictions(
        dataset=dataset,
        predictor=predictor,
        num_samples=num_samples
    )
    forecasts = list(forecast_it)
    tss = list(ts_it)

    return forecasts, tss

In [5]:
def get_lag_llama_rag_predictions(
    dataset,
    prediction_length,
    device,
    context_length=32,
    use_rope_scaling=False,
    num_samples=100
):
    """Get Lag-Llama predictions augmented with RAG"""
    
    # Initialize RAG
    rag = TimeSeriesRAG(
        embedding_dim=128,
        num_neighbors=5,
        similarity_threshold=0.7
    )
    
    # Get base predictions from Lag-Llama
    base_forecasts, tss = get_lag_llama_predictions(
        dataset,
        prediction_length,
        device,
        context_length,
        use_rope_scaling,
        num_samples
    )
    print(base_forecasts)
    print(type(base_forecasts))
    print(type(tss))
    
    augmented_forecasts = []
    
    # Process each prediction
    for i, (forecast, ts) in enumerate(zip(base_forecasts, tss)):
        # Get similar sequences from RAG
        series_values = ts[0].values
        similar_seqs = rag.retrieve_similar(series_values)  # Retrieve similar time series based on the target series
        print(similar_seqs)
        # Augment prediction using RAG
        base_samples = forecast
        print('HALOOOO',base_samples)
        augmented_samples = rag.augment_prediction(base_samples, similar_seqs)
        
        # Create new forecast object with augmented predictions
        augmented_forecast = forecast
        augmented_forecast.samples = augmented_samples
        augmented_forecasts.append(augmented_forecast)
        print('Augment', augmented_forecasts)
        
        # Add current prediction to index for future use
        rag.add_to_index(
        series_values,
        {
            'historical_prediction': forecast.samples,
            'metadata': {'item_id': forecast.item_id}
        }
    )
    
    return augmented_forecasts, tss


In [6]:
def predict_stock_with_rag(symbol, start_date, prediction_length=5, context_length=20, device="cuda:0"):
    df = prepare_stock_data(symbol, start_date)
    dataset = create_gluonts_dataset(df)

    # Get predictions using Lag-Llama augmented with RAG
    forecasts, tss = get_lag_llama_rag_predictions(dataset, prediction_length, device, context_length, num_samples=100)

    mean_predictions = []
    for forecast in forecasts:
        mean_pred = forecast.mean
        mean_predictions.append(mean_pred)

    predictions = np.array(mean_predictions)

    # Calculate metrics
    metrics = evaluate_predictions(df['target'].values, predictions, prediction_length)
    print(predictions)
    return predictions, metrics


In [7]:
df = prepare_stock_data(['AAPL', 'MSFT'], '2013-01-01')
train_dataset = create_gluonts_dataset(df.iloc[:int(0.7*len(df))])
test_dataset = create_gluonts_dataset(df.iloc[int(0.7*len(df)):])

[*********************100%***********************]  2 of 2 completed


In [None]:
prediction_length = 60
context_length = 32
device = torch.device("cuda:0") 
torch.cuda.empty_cache()

forecasts_rag, tss = get_lag_llama_rag_predictions(test_dataset, prediction_length, device, context_length)

In [8]:
prediction_length = 60
context_length = 32
device = torch.device("cuda:0") 

forecast_it, tss_it = get_lag_llama_predictions(test_dataset, prediction_length, device, context_length)

  ckpt = torch.load("./lag-llama-model/lag-llama.ckpt", map_location=device) # Uses GPU since in this Colab we use a GPU.
/home/abizard/miniconda3/envs/finetuning/lib/python3.12/site-packages/lightning/fabric/utilities/cloud_io.py:56: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded

In [None]:
rag = TimeSeriesRAG(
        embedding_dim=32,
        num_neighbors=5,
        similarity_threshold=0.7
    )

for forecast, ts in zip(forecast_it, tss_it):
    series_values = ts[0].values
    rag.add_to_index(
        series_values,
        {
            'historical_prediction': forecast.samples,
            'metadata': {'item_id': forecast.item_id}
        }
    )
print(f"Index built with {rag.index.ntotal} sequences")

# Coba retrieval
similar_seqs = rag.retrieve_similar(series_values)
print(similar_seqs)
print("Retrieved sequences:", len(similar_seqs))

Index built with 2 sequences
Current index size: 2
Query embedding shape: (32,)
Query embedding dtype: float32
Query embedding sample: [-0.23121846 -0.2272958  -2.9947195   2.649545   -0.23393995 -0.23103146
 -0.22871274 -0.23155153  0.26563936 -0.18134485  0.28666496  0.5742782
  0.06282052  0.28124228  0.16755196  0.01591028  0.2561621   0.
  0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.
  0.          0.        ]
Searching for 2 neighbors
Raw distances: [[0.         0.33531463]]
Raw indices: [[1 0]]
Distance: 0.0, Similarity: 1.0
Distance: 0.33531463146209717, Similarity: 0.7488871734334658
[({'historical_prediction': array([[-0.00129487,  0.00127371,  0.00111405, ...,  0.00099258,
        -0.00328832,  0.00028685],
       [-0.00155724,  0.00595453,  0.00863928, ...,  0.00161305,
         0.00486501,  0.00572697],
       [-0.00522363,  0.00145362,  0.00121425, ...,  0.00443372,
         0.00722506,  0.0

In [None]:
print(forecast_it[1].samples)

In [None]:
print(forecast_it[0].samples)

In [None]:
symbol = ["AAPL"]
start_date = "2013-01-01"
prediction_length = 60
context_length = 32
device = torch.device("cuda:0") 

predictions, metrics = predict_stock_with_rag(symbol, start_date, prediction_length, context_length, device)
print(f"Prediksi untuk {symbol}:")
print(f"Prediksi harga saham 5 hari ke depan: {predictions}")
print("\nMetrik Evaluasi:")
for metric, value in metrics.items():
    print(f"{metric}: {value:.4f}")
