In [None]:
import os
import re
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from ncps.torch import LTC
from ncps.wirings import AutoNCP
import random
import matplotlib.pyplot as plt
import warnings
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error
warnings.filterwarnings('ignore')
import sys
import json

In [None]:
def custom_sort_key_dirs(filename):
    match = re.search(r"(\d+)_(\d+)", filename)
    if match:
        return (int(match.group(1)), int(match.group(2)))
    return (float('inf'), float('inf'))

def custom_sort_key_files(filename):
    match = re.search(r"(\d+)", filename)
    if match:
        return int(match.group(1))
    return float('inf')

def load_data_from_txt_folder(folder_path):
    data = []
    files = sorted(os.listdir(folder_path), key=custom_sort_key_files)
    for file in files:
        if file.endswith('.txt'):
            df = pd.read_csv(os.path.join(folder_path, file), header=None, delim_whitespace=True)
            bitrate = df.iloc[:, 0].values.reshape(-1, 1)
            data.append(bitrate)
    return data

class BitrateTimeSeriesDataset(Dataset):
    def __init__(self, data, input_steps=2):
        self.input_steps = input_steps
        self.data = data
        self.samples = []
        connection_ids = np.arange(82).reshape(-1, 1)

        for i in range(input_steps, len(data)):
            x = []
            for j in range(input_steps, 0, -1):
                bitrate = data[i - j]
                x_t = np.concatenate([bitrate, connection_ids], axis=1)
                x.append(x_t)
            x = np.stack(x, axis=0)
            y = data[i].flatten() 
            self.samples.append((x, y))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        x, y = self.samples[idx]
        x_t = torch.tensor(x, dtype=torch.float32) 
        y_t = torch.tensor(y, dtype=torch.float32) 
        return x_t, y_t


class TrafficPredictionNetwork(nn.Module):
    def __init__(self, embedding_dim, ltc_units, dense_units, output_size):
        super().__init__()


        self.embedding = nn.Embedding(82, embedding_dim)  
        self.ltc_input_size = 1 + embedding_dim

        self.ltc = LTC(input_size=self.ltc_input_size, units=ltc_units, return_sequences=True)

        self.dense1 = nn.Linear(ltc_units, dense_units[0])
        self.dense2 = nn.Linear(dense_units[0], dense_units[1])
        self.dense3 = nn.Linear(dense_units[1], 1)
        self.tanh = nn.Tanh()
        self.linear = nn.Identity()

    def forward(self, x, hx=None):
        bitrate = x[..., 0]
        conn_ids = x[..., 1].long()

        embedded_ids = self.embedding(conn_ids)
        inputs = torch.cat([bitrate.unsqueeze(-1), embedded_ids], dim=-1)

        batch_size, seq_len, num_links, feat_dim = inputs.shape
        inputs = inputs.view(batch_size, seq_len * num_links, feat_dim)

        ltc_out, hx = self.ltc(inputs, hx)  
        ltc_out = self.tanh(ltc_out)

        d1 = self.dense1(ltc_out)
        d2 = self.dense2(d1)
        out = self.dense3(d2)  
        out = out.view(batch_size, seq_len, num_links)
        out = out[:, -1, :] 

        return out, hx

def train_model(data_folder, input_steps=2, embedding_dim=16, ltc_units=64,
                dense_units=[20,10], lr=1e-3, batch_size=32, epochs=5):
    all_data = load_data_from_txt_folder(data_folder)
    train_data = all_data[:6000]
    dataset = BitrateTimeSeriesDataset(train_data, input_steps)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

    model = TrafficPredictionNetwork(embedding_dim=embedding_dim, ltc_units=ltc_units,
                                     dense_units=dense_units, output_size=82)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    model.train()
    for ep in range(epochs):
        total_loss = 0.0
        for x_batch, y_batch in loader:
            optimizer.zero_grad()
            preds, _ = model(x_batch)
            loss = criterion(preds, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Epoch {ep+1}/{epochs}: Loss = {total_loss/len(loader):.6f}")

    return model, all_data


def evaluate_model_continuous(model, full_data, test_start=6000, test_end=7100,
                              eval_start=7000, eval_end=7100, tolerance=0.15,
                              input_steps=2):
    model.eval()
    mse = nn.MSELoss()
    h = None
    predictions = []
    total_loss = 0.0
    within_tol = 0
    total_eval = 0
    connection_ids = np.arange(82).reshape(-1, 1)

    for t in range(test_start + input_steps, test_end ): #+1
        x = []
        for j in range(input_steps, 0, -1):
            bitrate = full_data[t - j]
            x_t = np.concatenate([bitrate, connection_ids], axis=1)
            x.append(x_t)
        x = np.stack(x, axis=0)  
        x_t = torch.tensor(x, dtype=torch.float32).unsqueeze(0)  
        y_true = torch.tensor(full_data[t].flatten(), dtype=torch.float32)

        with torch.no_grad():
            pred, h = model(x_t, h)
            if h is not None:
                h = h.detach()

        if eval_start <= t <= eval_end:
            total_loss += mse(pred.view(-1), y_true).item()
            predictions.append(pred.view(-1).tolist())
            for p_val, r_val in zip(pred.view(-1).tolist(), y_true.tolist()):
                if abs(p_val - r_val) <= tolerance * abs(r_val):
                    within_tol += 1
                total_eval += 1

    avg_loss = total_loss / (eval_end - eval_start + 1)
    tol_pct = 100 * within_tol / total_eval
    return avg_loss, tol_pct, predictions

In [None]:
def check_for_streaks(predictions, real_values, tolerance=0.15):
    streak_info = {}
    num_nodes = len(predictions[0])

    for node in range(num_nodes):
        streak = 0
        for t in range(len(predictions)):

            if abs(predictions[t][node] - real_values[t][node]) <= tolerance * abs(real_values[t][node]):
                streak += 1
                if streak == 5:
                    streak_info[node] = t - 4
                    break
            else:
                streak = 0
    return streak_info, len(streak_info)

def plot_predictions(predictions, real_values=None, node=None):
    num_nodes = len(predictions[0])
    node_idx = node if node is not None else random.randrange(num_nodes)

    node_preds = [p[node_idx] for p in predictions]
    avg_preds  = [sum(p)/num_nodes for p in predictions]

    plt.figure(figsize=(14,6))
    plt.subplot(1,2,1)
    plt.plot(node_preds, '-o', label='Predicted')
    if real_values is not None:
        node_reals = [r[node_idx] for r in real_values]
        plt.plot(node_reals, '-x', label='Real')
    plt.title(f'Connection {node_idx} Bitrate')
    plt.xlabel('Timestep')
    plt.ylabel('Bitrate')
    plt.legend()

    plt.subplot(1,2,2)
    plt.plot(avg_preds, '-o', label='Avg Predicted')
    if real_values is not None:
        avg_reals = [sum(r)/num_nodes for r in real_values]
        plt.plot(avg_reals, '-x', label='Avg Real')
    plt.title('Average Bitrate Across All Connections')
    plt.xlabel('Timestep')
    plt.ylabel('Average Bitrate')
    plt.legend()
    plt.tight_layout()
    plt.show()

In [None]:
def get_all_test_data_for_lnn(root_folder, input_steps=2):
    subfolders = [f for f in os.listdir(root_folder) if f.startswith("processed_7000_")]
    subfolders.sort(key=custom_sort_key_dirs)
    all_data = []
    for subfolder in subfolders:
        data_path = os.path.join(root_folder, subfolder)
        print(f"Loading data from {data_path}")
        data = load_data_from_txt_folder(data_path)
        all_data.append(data)
    return all_data



def evaluate_lnn_on_all_sets(model, all_test_data, name_of_test, tolerance=0.15):
    all_results = []
    summary_results = []
    all_streak_timesteps = []

    for data_sequence in all_test_data:
        predictions, real_values = [], []
        h = None
        for t in range(6999, 7050):
            x = []
            connection_ids = np.arange(82).reshape(-1, 1)
            for j in range(input_steps, 0, -1):
                bitrate = data_sequence[t - j]
                x_t = np.concatenate([bitrate, connection_ids], axis=1) 
                x.append(x_t)
            x = np.stack(x, axis=0)  
            x_t = torch.tensor(x, dtype=torch.float32).unsqueeze(0)
            y_true = torch.tensor(np.array(data_sequence[t])[:, 0], dtype=torch.float32)

            with torch.no_grad():
                pred, h = model(x_t, h)
                if h is not None:
                    h = h.detach()

            predictions.append(pred.view(-1).tolist())
            real_values.append(y_true.tolist())

        streak_info, num_streaks = check_for_streaks(predictions, real_values, tolerance)
        streak_nodes = set(streak_info.keys())
        streak_timesteps = list(streak_info.values())

        rmse_per_timestep = []
        mape_per_timestep = []
        for timestep in range(len(predictions)):
            rmse_node = []
            mape_node = []
            for node in range(len(predictions[0])):
                rmse_node.append(float(root_mean_squared_error([real_values[timestep][node]], [predictions[timestep][node]])))
                mape_node.append(float(mean_absolute_percentage_error([real_values[timestep][node]], [predictions[timestep][node]])))
            rmse_per_timestep.append(rmse_node)
            mape_per_timestep.append(mape_node)

        total_within_tol = sum(
            abs(p - r) <= tolerance * abs(r)
            for timestep in range(len(predictions))
            for p, r in zip(predictions[timestep], real_values[timestep])
        )
        total_preds = len(predictions) * len(predictions[0])

        results_per_test = {
            "number_of_streak_nodes": len(streak_nodes),
            "average_streak_timestep": float(np.mean(streak_timesteps)) if streak_timesteps else None,
            "percentage_within_tolerance": (total_within_tol / total_preds) * 100,
            "percentage_nodes_with_streak": (len(streak_nodes) / len(predictions[0])) * 100,
            "rmse_per_timestep": rmse_per_timestep,
            "mape_per_timestep": mape_per_timestep
        }

        preds_and_real = {
            "predictions": predictions,
            "real_values": real_values
        }
        print(results_per_test)
        summary_results.append(results_per_test)
        all_results.append(preds_and_real)
        all_streak_timesteps.extend(streak_timesteps)

    with open(name_of_test + '_results.json', 'w') as f:
        json.dump(all_results, f, indent=4)

    return all_results, summary_results


In [None]:
def analyze_lnn_json_results(json_file_path, tolerance=0.15):
    with open(json_file_path, 'r') as f:
        all_results = json.load(f)

    all_metrics = []
    all_streak_timesteps = []

    for result in all_results:
        predictions = result["predictions"]
        real_values = result["real_values"]
        rmse_per_timestep = []
        mape_per_timestep = []
        for t in range(len(predictions)):
            rmse_timestep = []
            mape_timestep = []
            for node in range(len(predictions[0])):
                real = real_values[t][node]
                pred = predictions[t][node]
                rmse = np.sqrt((real - pred) ** 2)
                mape = mean_absolute_percentage_error([real], [pred])
                rmse_timestep.append(float(rmse))
                mape_timestep.append(float(mape))
            rmse_per_timestep.append(np.mean(rmse_timestep))
            mape_per_timestep.append(np.mean(mape_timestep))

        streak_info, _ = check_for_streaks(predictions, real_values, tolerance)
        streak_timesteps = list(streak_info.values())

        total_within_tolerance = sum(
            abs(p - r) <= tolerance * abs(r)
            for pred_t, real_t in zip(predictions, real_values)
            for p, r in zip(pred_t, real_t)
        )
        total_preds = len(predictions) * len(predictions[0])
        pct_within_tol = (total_within_tolerance / total_preds) * 100
        pct_nodes_with_streak = (len(streak_info) / len(predictions[0])) * 100
        avg_streak_start = np.mean(streak_timesteps) if streak_timesteps else None

        all_metrics.append({
            "rmse": rmse_per_timestep,
            "mape": mape_per_timestep,
            "pct_within_tol": pct_within_tol,
            "pct_nodes_with_streak": pct_nodes_with_streak,
            "avg_streak_start": avg_streak_start
        })
        all_streak_timesteps.extend(streak_timesteps)

    avg_rmse = np.mean([m["rmse"] for m in all_metrics], axis=0)
    avg_mape = np.mean([m["mape"] for m in all_metrics], axis=0)
    avg_within_tol = np.mean([m["pct_within_tol"] for m in all_metrics])
    avg_nodes_with_streak = np.mean([m["pct_nodes_with_streak"] for m in all_metrics])
    avg_streak_time = np.mean(all_streak_timesteps) if all_streak_timesteps else None

    print(f"→ Avg % within tolerance: {avg_within_tol:.2f}%")
    print(f"→ Avg % of nodes with streak: {avg_nodes_with_streak:.2f}%")
    print(f"→ Avg streak start timestep: {avg_streak_time:.2f}" if avg_streak_time else "→ No streaks found.")

    timesteps = list(range(len(avg_rmse)))
    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(timesteps, avg_rmse, label='Average RMSE', marker='o')
    plt.title('Average RMSE per Timestep')
    plt.xlabel('Timestep')
    plt.ylabel('RMSE')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(timesteps, avg_mape, label='Average MAPE', marker='o')
    plt.title('Average MAPE per Timestep')
    plt.xlabel('Timestep')
    plt.ylabel('MAPE')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
def load_timestep_data(file_path):
    df = pd.read_csv(file_path, sep=r'\s+', header=None, names=["bitrate", "connection_id"])
    return df['bitrate'].tolist()

In [None]:
data_folder = 'Zbiory danych/polaczenie_bitrate_7000/normalized/processed_7000_0'  
root_folder = 'Zbiory danych/polaczenie_bitrate_7000/normalized/'
test_name = "LNN-Podstawowy"
history = 5

model, all_data = train_model(data_folder, input_steps=history, embedding_dim=16, ltc_units=64, dense_units=[20,10], lr=1e-3, batch_size=32, epochs=5)


In [None]:
avg_loss, tol_pct, preds = evaluate_model_continuous(
    model, all_data,
    test_start=6000, test_end=7050,
    eval_start=6999, eval_end=7050,
    input_steps=history,
    tolerance=0.15
)
print(f"Eval Loss (7000-7100): {avg_loss:.6f}")
print(f"Tolerance %: {tol_pct:.2f}%")



In [None]:
real_vals = [arr.flatten().tolist() for arr in all_data[7000:7050]]
streaks, num_streaks = check_for_streaks(preds, real_vals)
print(f"Streaks found in {num_streaks} nodes: {streaks}")
plot_predictions(preds, real_vals)
print(f"Evaluation from timestep 7000-7100 - Average Loss: {avg_loss:.4f}")

In [None]:
all_test_data = get_all_test_data_for_lnn(root_folder, input_steps=input_steps)

In [None]:

all_results, summary = evaluate_lnn_on_all_sets(
    model=model,
    all_test_data=all_test_data,
    name_of_test=test_name,
    tolerance=0.15
)

In [None]:
overall_within_tolerance = np.mean([r["percentage_within_tolerance"] for r in summary])
overall_streak_percentage = np.mean([r["percentage_nodes_with_streak"] for r in summary])
avg_streak_start = np.mean([r["average_streak_timestep"] for r in summary if r["average_streak_timestep"] is not None])

print(f"Avg % within tolerance: {overall_within_tolerance:.2f}%")
print(f"Avg % of nodes with streak: {overall_streak_percentage:.2f}%")
print(f"Avg streak start timestep: {avg_streak_start:.2f}")
print(f"Saved results to: {test_name}_results.json")

In [None]:
analyze_lnn_json_results(
    json_file_path=test_name + '_results.json',
    tolerance=0.10
)