In [25]:
import pandas as pd
import numpy as np
from collections import defaultdict, Counter
from typing import Dict

In [3]:
df = pd.read_csv("data/indicators/VNI_020114_141125_indicators.csv")

In [4]:
df = df.sort_values(by="Date").reset_index(drop=True)

In [6]:
theta = 0.003

def label_trend(change_pct):
    if change_pct > theta:
        return "UP"
    elif change_pct < -theta:
        return "DOWN"
    else:
        return "FLAT"
    
df["Trend"] = df["Change_Pct"].shift(-1).apply(label_trend)

In [7]:
def rsi_bucket(x):
    if pd.isna(x):
        return None
    if x < 30:
        return "LOW"
    elif x > 70:
        return "HIGH"
    else:
        return "MID"
    
df["RSI_14_disc"] = df["RSI_14"].apply(rsi_bucket)

In [8]:
def macd_hist_bucket(x, eps=0.05):
    if pd.isna(x):
        return None
    if x > eps:
        return "POS"
    elif x < -eps:
        return "NEG"
    else:
        return "NEU"

df["MACD_Hist_disc"] = df["MACD_Hist"].apply(macd_hist_bucket)

In [15]:
df["OBV_change"] = df["OBV"].diff()

def obv_bucket(x):
    if pd.isna(x):
        return None
    if x > 0:
        return "UP"
    elif x < 0:
        return "DOWN"
    else:
        return "FLAT"
df["OBV_disc"] = df["OBV_change"].apply(obv_bucket)

In [10]:
def adx_bucket(x):
    if pd.isna(x):
        return None
    if x < 20:
        return "WEAK"
    elif x < 40:
        return "MODERATE"
    else:
        return "STRONG"

df["ADX_disc"] = df["ADX"].apply(adx_bucket)

In [11]:
def stoch_state(k, d):
    if pd.isna(k) or pd.isna(d):
        return None
    if k > 80 and d > 80:
        return "OVERBOUGHT"
    elif k < 20 and d < 20:
        return "OVERSOLD"
    else:
        return "NEUTRAL"

df["STOCH_disc"] = [
    stoch_state(k, d) for k, d in zip(df["STOCH_K"], df["STOCH_D"])
]

In [12]:
df["MA_Rel"] = (df["Close"] - df["SMA_20"]) / df["SMA_20"]

def ma_rel_bucket(x, eps=0.01):
    if pd.isna(x):
        return None
    if x > eps:
        return "ABOVE"
    elif x < -eps:
        return "BELOW"
    else:
        return "NEAR"

df["MA_Rel_disc"] = df["MA_Rel"].apply(ma_rel_bucket)


In [13]:
df["DI_Spread"] = df["DI_Plus"] - df["DI_Minus"]

def di_spread_bucket(x, eps=0.5):
    if pd.isna(x):
        return None
    if x > eps:
        return "BULL"
    elif x < -eps:
        return "BEAR"
    else:
        return "NEUTRAL"

df["DI_Spread_disc"] = df["DI_Spread"].apply(di_spread_bucket)

In [16]:
discrete_cols = [
    "Trend", "RSI_14_disc", "MACD_Hist_disc", 
    "OBV_disc", "ADX_disc", "STOCH_disc",
    "MA_Rel_disc", "DI_Spread_disc"
]
df_disc = df.dropna(subset=discrete_cols).reset_index(drop=True)

In [None]:
from dbn import DynamicBayesianNetwork
dbn = DynamicBayesianNetwork(name="VNI")
nodes = [
    "Trend",
    "RSI_14_disc",
    "MACD_Hist_disc",
    "OBV_disc",
    "ADX_disc",
    "STOCH_disc",
    "MA_Rel_disc",
    "DI_Spread_disc"
]

for node in nodes:
    dbn.add_node(node)
    
for indicator in nodes:
    if indicator == "Trend":
        continue
    dbn.add_intra_edge("Trend", indicator)

dbn.add_inter_edge("Trend", "Trend")

In [23]:
def learn_cpt_for_node_from_df(
    dbn: DynamicBayesianNetwork,
    df: pd.DataFrame,
    node: str,
    node_to_col: Dict[str, str],
) -> Dict[tuple, Dict[str, float]]:
    parents = dbn.get_parents(node)
    cpt_counts = defaultdict(Counter)
    
    if len(parents) == 0:
        for _, row in df.iterrows():
            val = row[node_to_col[node]]
            cpt_counts[()][val] += 1
    else:
        n = len(df)
        for t in range(n):
            row_t = df.iloc[t]
            parent_vals = []
            skip = False
            
            for parent_name, offset in parents:
                t_parent = t + offset
                if t_parent < 0 or t_parent >= n:
                    skip = True
                    break
                row_parent = df.iloc[t_parent]
                parent_vals.append(row_parent[node_to_col[parent_name]])
                
            if skip:
                continue
            
            parent_vals = tuple(parent_vals)
            child_val = row_t[node_to_col[node]]
            cpt_counts[parent_vals][child_val] += 1
            
    cpt = {}
    for parent_vals, counter in cpt_counts.items():
        total = sum(counter.values())
        if total == 0:
            continue
        cpt[parent_vals] = {
            val: count / total
            for val, count in counter.items()
        }
    
    if len(parents) == 0 and () not in cpt:
        cpt[()] = {
        }
        
    return cpt
        

In [21]:
# ============================================================================
# TRAIN-TEST SPLIT
# ============================================================================
# Split data: 80% training, 20% testing
# Use temporal split (not random) to respect time series nature
# ============================================================================

train_size = int(0.8 * len(df_disc))
df_train = df_disc.iloc[:train_size].reset_index(drop=True)
df_test = df_disc.iloc[train_size:].reset_index(drop=True)

print(f"Total samples: {len(df_disc)}")
print(f"Training samples: {len(df_train)} ({len(df_train)/len(df_disc)*100:.1f}%)")
print(f"Testing samples: {len(df_test)} ({len(df_test)/len(df_disc)*100:.1f}%)")
print(f"\nTraining period: {df_train['Date'].min()} to {df_train['Date'].max()}")
print(f"Testing period: {df_test['Date'].min()} to {df_test['Date'].max()}")


Total samples: 2928
Training samples: 2342 (80.0%)
Testing samples: 586 (20.0%)

Training period: 2014-02-27 to 2023-07-12
Testing period: 2023-07-13 to 2025-11-14


In [26]:
# ============================================================================
# LEARN CPTs FROM TRAINING DATA
# ============================================================================
# For each node, learn its Conditional Probability Table from training data
# This captures the statistical relationships between indicators and trends
# ============================================================================

# Mapping from node names to DataFrame column names
node_to_col = {
    "Trend": "Trend",
    "RSI_14_disc": "RSI_14_disc",
    "MACD_Hist_disc": "MACD_Hist_disc",
    "OBV_disc": "OBV_disc",
    "ADX_disc": "ADX_disc",
    "STOCH_disc": "STOCH_disc",
    "MA_Rel_disc": "MA_Rel_disc",
    "DI_Spread_disc": "DI_Spread_disc"
}

print("Learning CPTs from training data...\n")

for node in nodes:
    cpt = learn_cpt_for_node_from_df(dbn, df_train, node, node_to_col)
    dbn.set_cpt(node, cpt)
    
    # Display CPT statistics
    parents = dbn.get_parents(node)
    num_entries = len(cpt)
    
    if len(parents) == 0:
        print(f"✓ {node} (root node)")
        print(f"  Prior probabilities: {cpt.get((), {})}")
    else:
        parent_names = [f"{p[0]}(t{p[1]:+d})" for p in parents]
        print(f"✓ {node} <- {', '.join(parent_names)}")
        print(f"  CPT entries: {num_entries}")
        
        # Show a sample entry
        if num_entries > 0:
            sample_key = list(cpt.keys())[0]
            print(f"  Sample: parents={sample_key} -> {cpt[sample_key]}")
    print()

print("="*80)
print("Training completed! All CPTs learned from data.")
print("="*80)


Learning CPTs from training data...

✓ Trend <- Trend(t-1)
  CPT entries: 3
  Sample: parents=('UP',) -> {'DOWN': 0.4312210200927357, 'UP': 0.5625965996908809, 'FLAT': 0.0061823802163833074}

✓ RSI_14_disc <- Trend(t+0)
  CPT entries: 3
  Sample: parents=('UP',) -> {'HIGH': 0.12123552123552124, 'MID': 0.8347490347490347, 'LOW': 0.044015444015444015}

✓ MACD_Hist_disc <- Trend(t+0)
  CPT entries: 3
  Sample: parents=('UP',) -> {'NEG': 0.43706563706563706, 'NEU': 0.011583011583011582, 'POS': 0.5513513513513514}

✓ OBV_disc <- Trend(t+0)
  CPT entries: 3
  Sample: parents=('UP',) -> {'DOWN': 0.43474903474903476, 'UP': 0.5644787644787644, 'FLAT': 0.0007722007722007722}

✓ ADX_disc <- Trend(t+0)
  CPT entries: 3
  Sample: parents=('UP',) -> {'STRONG': 0.13281853281853281, 'MODERATE': 0.5683397683397683, 'WEAK': 0.29884169884169887}

✓ STOCH_disc <- Trend(t+0)
  CPT entries: 3
  Sample: parents=('UP',) -> {'OVERBOUGHT': 0.32664092664092664, 'NEUTRAL': 0.5722007722007721, 'OVERSOLD': 0.101158

# Model Testing and Evaluation


In [27]:
# ============================================================================
# PREDICTION FUNCTION
# ============================================================================
# Predict the Trend at time t given all indicator values
# ============================================================================

def predict_trend(
    dbn: DynamicBayesianNetwork,
    df: pd.DataFrame,
    t: int,
    node_to_col: Dict[str, str]
) -> str:
    """
    Predict the trend at time t using DBN inference.
    
    Args:
        dbn: Trained Dynamic Bayesian Network
        df: DataFrame with data
        t: Time index to predict
        node_to_col: Mapping from node names to column names
        
    Returns:
        Predicted trend value (UP, DOWN, or FLAT)
    """
    # Build evidence from current indicators + previous trend
    evidence = {}
    
    # Add current time indicators as evidence
    for indicator in ["RSI_14_disc", "MACD_Hist_disc", "OBV_disc", 
                      "ADX_disc", "STOCH_disc", "MA_Rel_disc", "DI_Spread_disc"]:
        evidence[(indicator, t)] = df.iloc[t][node_to_col[indicator]]
    
    # Add previous trend if available
    if t > 0:
        evidence[("Trend", t-1)] = df.iloc[t-1][node_to_col["Trend"]]
    
    # Infer trend distribution
    try:
        trend_dist = dbn.infer_node("Trend", t, evidence)
        
        # Return most probable trend
        if not trend_dist:
            return "FLAT"  # Default fallback
        
        predicted_trend = max(trend_dist.items(), key=lambda x: x[1])[0]
        return predicted_trend
    
    except (ValueError, KeyError) as e:
        # Handle unseen combinations with majority class
        return "UP"  # Default to most common trend

print("Prediction function defined.")


Prediction function defined.


In [28]:
# ============================================================================
# TEST THE MODEL ON TEST SET
# ============================================================================
# Make predictions for each day in the test set and compare with actual trends
# ============================================================================

print("Testing model on test set...\n")

predictions = []
actuals = []

for i in range(len(df_test)):
    # Get actual trend
    actual = df_test.iloc[i][node_to_col["Trend"]]
    
    # Make prediction
    predicted = predict_trend(dbn, df_test, i, node_to_col)
    
    predictions.append(predicted)
    actuals.append(actual)

print(f"Predictions made: {len(predictions)}")
print(f"✓ Testing completed!")


Testing model on test set...

Predictions made: 586
✓ Testing completed!


In [None]:
# ============================================================================
# EVALUATION METRICS
# ============================================================================
# Calculate accuracy, precision, recall, and F1-score for each trend class
# Custom implementation to avoid sklearn numpy compatibility issues
# ============================================================================

def calculate_metrics(actuals, predictions):
    """Calculate classification metrics without sklearn."""
    
    # Get unique classes
    classes = sorted(list(set(actuals + predictions)))
    
    # Calculate confusion matrix
    confusion_matrix = {}
    for actual_class in classes:
        confusion_matrix[actual_class] = {}
        for pred_class in classes:
            confusion_matrix[actual_class][pred_class] = 0
    
    # Fill confusion matrix
    for actual, pred in zip(actuals, predictions):
        confusion_matrix[actual][pred] += 1
    
    # Calculate metrics for each class
    metrics = {}
    for cls in classes:
        # True Positives: predicted this class and actually this class
        tp = confusion_matrix[cls][cls]
        
        # False Positives: predicted this class but actually another
        fp = sum(confusion_matrix[other][cls] for other in classes if other != cls)
        
        # False Negatives: actually this class but predicted another
        fn = sum(confusion_matrix[cls][other] for other in classes if other != cls)
        
        # True Negatives: predicted another and actually another
        tn = sum(
            confusion_matrix[other1][other2] 
            for other1 in classes if other1 != cls
            for other2 in classes if other2 != cls
        )
        
        # Calculate precision, recall, f1
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
        
        metrics[cls] = {
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'support': tp + fn
        }
    
    # Overall accuracy
    correct = sum(1 for a, p in zip(actuals, predictions) if a == p)
    accuracy = correct / len(actuals) if len(actuals) > 0 else 0
    
    return accuracy, confusion_matrix, metrics, classes


# Calculate all metrics
accuracy, cm, metrics, classes = calculate_metrics(actuals, predictions)

print("="*80)
print("MODEL EVALUATION RESULTS")
print("="*80)
print(f"\nOverall Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)\n")

# Classification report
print("Classification Report:")
print("-" * 80)
print(f"{'Class':>12}  {'Precision':>10}  {'Recall':>10}  {'F1-Score':>10}  {'Support':>10}")
print("-" * 80)

for cls in classes:
    m = metrics[cls]
    print(f"{cls:>12}  {m['precision']:>10.4f}  {m['recall']:>10.4f}  "
          f"{m['f1']:>10.4f}  {m['support']:>10}")

# Macro averages
avg_precision = sum(m['precision'] for m in metrics.values()) / len(metrics)
avg_recall = sum(m['recall'] for m in metrics.values()) / len(metrics)
avg_f1 = sum(m['f1'] for m in metrics.values()) / len(metrics)
total_support = sum(m['support'] for m in metrics.values())

print("-" * 80)
print(f"{'Macro Avg':>12}  {avg_precision:>10.4f}  {avg_recall:>10.4f}  "
      f"{avg_f1:>10.4f}  {total_support:>10}")

# Confusion Matrix
print("\n\nConfusion Matrix:")
print("-" * 80)
print(f"{'Actual \\ Pred':>12}", end="")
for pred_class in classes:
    print(f"{pred_class:>12}", end="")
print()
print("-" * 80)

for actual_class in classes:
    print(f"{actual_class:>12}", end="")
    for pred_class in classes:
        print(f"{cm[actual_class][pred_class]:>12}", end="")
    print()

print("\n" + "="*80)


MODEL EVALUATION RESULTS

Overall Accuracy: 0.5700 (57.00%)

Classification Report:
--------------------------------------------------------------------------------
       Class   Precision      Recall    F1-Score     Support
--------------------------------------------------------------------------------
        DOWN      0.6667      0.0162      0.0316         247
        FLAT      0.0000      0.0000      0.0000           7
          UP      0.5690      0.9940      0.7237         332
--------------------------------------------------------------------------------
   Macro Avg      0.4119      0.3367      0.2518         586


Confusion Matrix:
--------------------------------------------------------------------------------
Actual \ Pred        DOWN        FLAT          UP
--------------------------------------------------------------------------------
        DOWN           4           0         243
        FLAT           0           0           7
          UP           2           0  

In [33]:
# ============================================================================
# VISUALIZE PREDICTIONS VS ACTUALS (Interactive with Plotly)
# ============================================================================
# Plot predicted vs actual trends over time in the test set
# Uses Plotly for interactive visualization (no numpy compatibility issues)
# ============================================================================

import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Create mapping for visualization
trend_map = {"DOWN": -1, "FLAT": 0, "UP": 1}
actuals_numeric = [trend_map.get(t, 0) for t in actuals]
predictions_numeric = [trend_map.get(t, 0) for t in predictions]

# Get test dates
test_dates = pd.to_datetime(df_test['Date'].values[:len(actuals)])

# Create figure with two subplots
fig = make_subplots(
    rows=2, cols=1,
    shared_xaxes=True,
    vertical_spacing=0.08,
    subplot_titles=('DBN Predictions vs Actual Trends on Test Set', 
                    'Prediction Accuracy'),
    row_heights=[0.65, 0.35]
)

# Plot 1: Actual vs Predicted trends
fig.add_trace(
    go.Scatter(
        x=test_dates,
        y=actuals_numeric,
        name='Actual',
        mode='lines+markers',
        line=dict(color='#2E86AB', width=2),
        marker=dict(size=6, symbol='circle'),
        hovertemplate='<b>Date</b>: %{x|%Y-%m-%d}<br>' +
                      '<b>Actual</b>: %{text}<br>' +
                      '<extra></extra>',
        text=[k for k, v in trend_map.items() if v in actuals_numeric]
    ),
    row=1, col=1
)

fig.add_trace(
    go.Scatter(
        x=test_dates,
        y=predictions_numeric,
        name='Predicted',
        mode='lines+markers',
        line=dict(color='#FFA500', width=2, dash='dot'),
        marker=dict(size=5, symbol='square'),
        hovertemplate='<b>Date</b>: %{x|%Y-%m-%d}<br>' +
                      '<b>Predicted</b>: %{text}<br>' +
                      '<extra></extra>',
        text=[k for k, v in trend_map.items() if v in predictions_numeric]
    ),
    row=1, col=1
)

# Add horizontal line at 0
fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5, row=1, col=1)

# Plot 2: Match/Mismatch indicators
matches = [1 if a == p else 0 for a, p in zip(actuals, predictions)]
colors_bar = ['#28a745' if m == 1 else '#dc3545' for m in matches]

fig.add_trace(
    go.Bar(
        x=test_dates,
        y=matches,
        marker_color=colors_bar,
        showlegend=False,
        hovertemplate='<b>Date</b>: %{x|%Y-%m-%d}<br>' +
                      '<b>Match</b>: %{text}<br>' +
                      '<extra></extra>',
        text=['Correct' if m == 1 else 'Wrong' for m in matches]
    ),
    row=2, col=1
)

# Update layout
fig.update_layout(
    height=800,
    hovermode='x unified',
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    ),
    plot_bgcolor='white',
    paper_bgcolor='white'
)

# Update y-axes
fig.update_yaxes(
    title_text="Trend Direction",
    ticktext=['DOWN', 'FLAT', 'UP'],
    tickvals=[-1, 0, 1],
    showgrid=True,
    gridwidth=1,
    gridcolor='lightgray',
    row=1, col=1
)

fig.update_yaxes(
    title_text="Correct",
    ticktext=['Wrong', 'Correct'],
    tickvals=[0, 1],
    showgrid=True,
    gridwidth=1,
    gridcolor='lightgray',
    row=2, col=1
)

# Update x-axes
fig.update_xaxes(
    title_text="Date",
    showgrid=True,
    gridwidth=1,
    gridcolor='lightgray',
    row=2, col=1
)

# Show the interactive plot
fig.show()

print(f"\n✓ Interactive visualization completed!")
print(f"Green bars = Correct predictions ({sum(matches)} out of {len(matches)})")
print(f"Red bars = Incorrect predictions ({len(matches) - sum(matches)} out of {len(matches)})")
print(f"Accuracy: {sum(matches)/len(matches)*100:.2f}%")



✓ Interactive visualization completed!
Green bars = Correct predictions (334 out of 586)
Red bars = Incorrect predictions (252 out of 586)
Accuracy: 57.00%


In [34]:
# ============================================================================
# PREDICTION CONFIDENCE ANALYSIS
# ============================================================================
# Show probability distributions for predictions with confidence levels
# ============================================================================

def get_prediction_with_confidence(
    dbn: DynamicBayesianNetwork,
    df: pd.DataFrame,
    t: int,
    node_to_col: Dict[str, str]
) -> tuple:
    """Get prediction with full probability distribution."""
    evidence = {}
    
    for indicator in ["RSI_14_disc", "MACD_Hist_disc", "OBV_disc", 
                      "ADX_disc", "STOCH_disc", "MA_Rel_disc", "DI_Spread_disc"]:
        evidence[(indicator, t)] = df.iloc[t][node_to_col[indicator]]
    
    if t > 0:
        evidence[("Trend", t-1)] = df.iloc[t-1][node_to_col["Trend"]]
    
    try:
        trend_dist = dbn.infer_node("Trend", t, evidence)
        if not trend_dist:
            return "FLAT", 0.33, {"UP": 0.33, "DOWN": 0.33, "FLAT": 0.34}
        
        predicted = max(trend_dist.items(), key=lambda x: x[1])[0]
        confidence = trend_dist[predicted]
        return predicted, confidence, trend_dist
    
    except (ValueError, KeyError):
        return "UP", 0.33, {"UP": 0.33, "DOWN": 0.33, "FLAT": 0.34}

# Sample predictions with confidence
print("="*80)
print("SAMPLE PREDICTIONS WITH CONFIDENCE LEVELS")
print("="*80)

sample_indices = [0, 10, 20, 30, 40] if len(df_test) > 40 else list(range(min(5, len(df_test))))

for i in sample_indices:
    date = df_test.iloc[i]['Date']
    actual = df_test.iloc[i][node_to_col["Trend"]]
    predicted, confidence, distribution = get_prediction_with_confidence(dbn, df_test, i, node_to_col)
    
    match = "✓" if predicted == actual else "✗"
    
    print(f"\nDate: {date}")
    print(f"  Actual: {actual:6s} | Predicted: {predicted:6s} {match}")
    print(f"  Confidence: {confidence:.2%}")
    print(f"  Full distribution: ", end="")
    for trend, prob in sorted(distribution.items()):
        print(f"{trend}={prob:.2%}  ", end="")
    print()

print("\n" + "="*80)


SAMPLE PREDICTIONS WITH CONFIDENCE LEVELS

Date: 2023-07-13
  Actual: UP     | Predicted: UP     ✓
  Confidence: 33.00%
  Full distribution: DOWN=33.00%  FLAT=34.00%  UP=33.00%  

Date: 2023-07-27
  Actual: UP     | Predicted: UP     ✓
  Confidence: 54.26%
  Full distribution: DOWN=45.26%  FLAT=0.48%  UP=54.26%  

Date: 2023-08-10
  Actual: UP     | Predicted: UP     ✓
  Confidence: 54.26%
  Full distribution: DOWN=45.26%  FLAT=0.48%  UP=54.26%  

Date: 2023-08-24
  Actual: DOWN   | Predicted: UP     ✗
  Confidence: 56.26%
  Full distribution: DOWN=43.12%  FLAT=0.62%  UP=56.26%  

Date: 2023-09-11
  Actual: UP     | Predicted: UP     ✓
  Confidence: 54.26%
  Full distribution: DOWN=45.26%  FLAT=0.48%  UP=54.26%  



In [35]:
# ============================================================================
# SAVE PREDICTIONS TO FILE
# ============================================================================
# Save test results with predictions and confidence for further analysis
# ============================================================================

# Create results dataframe
results_df = df_test.copy()
results_df['Predicted_Trend'] = predictions
results_df['Correct'] = [1 if a == p else 0 for a, p in zip(actuals, predictions)]

# Add confidence scores
confidences = []
for i in range(len(df_test)):
    _, confidence, _ = get_prediction_with_confidence(dbn, df_test, i, node_to_col)
    confidences.append(confidence)

results_df['Prediction_Confidence'] = confidences

# Save to CSV
output_path = "data/predictions/VNI_DBN_predictions.csv"
import os
os.makedirs("data/predictions", exist_ok=True)
results_df.to_csv(output_path, index=False, encoding='utf-8-sig')

print(f"✓ Predictions saved to: {output_path}")
print(f"\nSummary:")
print(f"  Total predictions: {len(results_df)}")
print(f"  Correct predictions: {results_df['Correct'].sum()}")
print(f"  Accuracy: {results_df['Correct'].mean():.2%}")
print(f"  Average confidence: {results_df['Prediction_Confidence'].mean():.2%}")


✓ Predictions saved to: data/predictions/VNI_DBN_predictions.csv

Summary:
  Total predictions: 586
  Correct predictions: 334
  Accuracy: 57.00%
  Average confidence: 55.43%
