In [1]:
import os
import numpy as np
import pandas as pd
import torch
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv
import torch.nn.functional as F
import re
import matplotlib.pyplot as plt
import random
import json
import networkx as nx
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error
from torch.nn import MSELoss
import warnings
warnings.filterwarnings('ignore')

# WCZYTYWANIE DANYCH

In [None]:
def load_timestep_data(file_path):
    df = pd.read_csv(file_path, sep=r'\s+', header=None, names=["bitrate","connection_id"])
    bitrate = df['bitrate'].tolist()
    return bitrate

In [None]:
def custom_sort_key_files(filename):
    match = re.search(r"(\d+)", filename)
    if match:
        main_num = int(match.group(1)) 
        return (main_num)
    return (float('inf'))

def custom_sort_key_dirs(filename):
    match = re.search(r"(\d+)_(\d+)", filename)
    if match:
        main_num = int(match.group(1))  
        sub_num = int(match.group(2))   
        return (main_num, sub_num)
    return (float('inf'), float('inf'))

def get_all_test_data(root_folder, adjacency_matrix, hisotry, avg_length, metric_length):
    subfolders = [f for f in os.listdir(root_folder) if f.startswith("processed_7000_")]
    subfolders.sort(key=custom_sort_key_dirs)
    all_data = []
    for subfolder in subfolders:
        data_folder = os.path.join(root_folder, subfolder)
        graph_data_list = load_data_with_targets(data_folder, adjacency_matrix, hisotry, avg_length, metric_type)

        test_data = graph_data_list[6000-avg_length:]

        all_data.append(test_data)
    return all_data
        
def load_data_with_targets(data_folder, adjacency_matrix, history_length, metric_length, metric_type):
    graph_data_list = []
    files = sorted(os.listdir(data_folder))
    files.sort(key=custom_sort_key_files)
    bitrate_data = [load_timestep_data(os.path.join(data_folder, file)) for file in files]
    min_required_history = max(history_length, metric_length)
    for i in range(len(bitrate_data) - min_required_history):
        short_history = bitrate_data[i + (metric_length - history_length) : i + metric_length]
        long_history = bitrate_data[i : i + metric_length]
        target_bitrate = bitrate_data[i + metric_length]
        graph_data = create_graph_data_with_targets(
            short_history, long_history, target_bitrate, adjacency_matrix, metric_type
        )
        graph_data_list.append(graph_data)
    return graph_data_list




import numpy as np

def compute_metric(values, metric_type):
    if metric_type == 'average':
        return np.mean(values, axis=1, keepdims=True)
    elif metric_type == 'median':
        return np.median(values, axis=1, keepdims=True)
    elif metric_type == 'std':
        return np.std(values, axis=1, keepdims=True)
    else:
        raise ValueError(f"Unknown metric_type: {metric_type}")



def create_graph_data_with_targets(short_history, long_history, target_bitrate, adjacency_matrix, metric_type):
    edge_index = torch.tensor(np.array(np.nonzero(adjacency_matrix)), dtype=torch.long)
    edge_weight = torch.tensor(adjacency_matrix[np.nonzero(adjacency_matrix)], dtype=torch.float)

    short_hist_array = np.array(short_history).T  
    long_hist_array = np.array(long_history).T    

    metric_value = compute_metric(long_hist_array, metric_type)  
    x_combined = np.hstack([short_hist_array, metric_value])     
    
    x = torch.tensor(x_combined, dtype=torch.float)
    y = torch.tensor(target_bitrate, dtype=torch.float).view(-1, 1)

    data = Data(x=x, edge_index=edge_index, edge_weight=edge_weight, y=y)
    return data



In [None]:
def load_adjacency_matrix(file_path):
    adjacency_matrix = np.loadtxt(file_path, delimiter='\t') 
    return adjacency_matrix

# MODEL

In [None]:
class GNNModel(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_gru_layers=3, history_length=2):
        super(GNNModel, self).__init__()
        self.history_length = history_length
        self.conv1 = GCNConv(input_dim * history_length + 1, hidden_dim) 
        self.gru = torch.nn.GRU(hidden_dim, hidden_dim, num_layers=num_gru_layers)
        self.fc = torch.nn.Linear(hidden_dim, output_dim)

    def forward(self, data, h):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        out, h = self.gru(x.unsqueeze(0), h)
        out = self.fc(out.squeeze(0))
        return out, h

In [None]:
def detach_hidden_state(hidden_state):
    if isinstance(hidden_state, torch.Tensor):
        return hidden_state.detach()
    elif isinstance(hidden_state, (tuple, list)):
        return type(hidden_state)(detach_hidden_state(h) for h in hidden_state)
    else:
        return hidden_state

# TESTOWANIE, METRYKI

In [None]:
def test_model_with_tolerance(model, test_data_list, starting_index=999, tolerance=0.15, criterion=MSELoss(), detach_interval=8):
    model.eval()
    total_loss = 0
    within_tolerance_count = 0
    total_predictions = 0
    predictions = []
    current_index = 0
    h = None
    step_counter = 0

    with torch.no_grad():
        for data in test_data_list:
            if h is None:
                h = torch.zeros(model.gru.num_layers, data.x.size(0), model.gru.hidden_size).to(data.x.device)
            elif step_counter % detach_interval == 0:
                h = detach_hidden_state(h)

            out, h = model(data, h)
            loss = criterion(out, data.y)
            total_loss += loss.item()
            predictions.append(out.view(-1).tolist())
            real_values = data.y.view(-1).tolist()

            if current_index >= starting_index:
                for real, pred in zip(real_values, out.view(-1).tolist()):
                    if abs(pred - real) <= tolerance * abs(real):
                        within_tolerance_count += 1
                    total_predictions += 1

            current_index += 1
            step_counter += 1

    average_loss = total_loss / len(test_data_list)
    within_tolerance_percentage = (within_tolerance_count / total_predictions) * 100
    return average_loss, within_tolerance_percentage, predictions



In [None]:
def plot_predictions(predictions,real_values=None, node = None):
    num_nodes = len(predictions[0])

    random_node = random.randint(0, num_nodes - 1) if node is None else node

    node_predictions = [p[random_node] for p in predictions]

    avg_predictions = [sum(p) / num_nodes for p in predictions]

    plt.figure(figsize=(14, 6))

    plt.subplot(1, 2, 1)
    plt.plot(node_predictions, label='Predicted', marker='o')
    if real_values:
        node_real_values = [r[random_node] for r in real_values]
        plt.plot(node_real_values, label='Real', marker='x')
    plt.title(f'Predictions for Node {random_node}')
    plt.xlabel('Timestep')
    plt.ylabel('Bitrate')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(avg_predictions, label='Average Predicted', marker='o')
    if real_values:
        avg_real_values = [sum(r) / num_nodes for r in real_values]
        plt.plot(avg_real_values, label='Average Real', marker='x')
    plt.title('Average Predictions Across All Nodes')
    plt.xlabel('Timestep')
    plt.ylabel('Average Bitrate')
    plt.legend()

    plt.tight_layout()
    plt.show()



In [None]:
def check_for_streaks(predictions, real_values, tolerance=0.15):
    real_values = real_values[999:1050]
    predictions = predictions[999:1050]
    streak_info = {}
    num_nodes = len(predictions[0])
    num_streaks = 0

    for node in range(num_nodes):
        streak = 0
        for timestep in range(len(predictions)):
            pred = predictions[timestep][node]
            real = real_values[timestep][node]
            if abs(pred - real) <= tolerance * abs(real):
                streak += 1
                if streak == 5:
                    streak_info[node] = timestep - 4
                    num_streaks += 1
                    break
            else:
                streak = 0

    return streak_info, num_streaks

In [None]:
def analyze_predictions(predictions, real_values, error_nodes_list, tolerance=0.15):
    real_values = real_values[1000:1050]
    predictions = predictions[1000:1050]

    streak_info = {}
    percentage_within_tolerance = {}

    num_nodes = len(predictions[0])

    for node in range(num_nodes):
        streak = 0
        within_tolerance_count = 0

        for timestep in range(len(predictions)):
            pred = predictions[timestep][node]
            real = real_values[timestep][node]

            if abs(pred - real) <= tolerance * abs(real):
                streak += 1
                within_tolerance_count += 1

                if streak == 5 and node not in streak_info:
                    streak_info[node] = timestep - 4
            else:
                streak = 0

        percentage_within_tolerance[node] = (within_tolerance_count / len(predictions)) * 100

    sorted_nodes = sorted(percentage_within_tolerance.items(), key=lambda x: x[1], reverse=True)
    sorted_nodes = sorted(
        ((node, percentage) for node, percentage in percentage_within_tolerance.items() if node in error_nodes_list),
        key=lambda x: x[1],
        reverse=True
    )

    table = []
    table.append(f"{'Node':<10} {'Streak Start':<15} {'% Within Tolerance':<20}")
    table.append("="*45)

    for node, percentage in sorted_nodes:
        streak_start = streak_info.get(node, "None")
        table.append(f"{node:<10} {streak_start:<15} {percentage:<20.2f}%")

    group_table = []
    group_table.append("\n\nGrouped Node Performance (Groups of 10):")
    group_table.append(f"{'Group':<15} {'Average % Within Tolerance':<30}")
    group_table.append("="*45)

    for start in range(0, num_nodes, 10):
        group_nodes = list(range(start, min(start + 10, num_nodes)))
        group_percents = [percentage_within_tolerance[n] for n in group_nodes if n in percentage_within_tolerance]
        avg_percent = sum(group_percents) / len(group_percents) if group_percents else 0
        group_table.append(f"{start}-{start+9:<12} {avg_percent:<30.2f}%")

    all_avg = sum(percentage_within_tolerance.values()) / num_nodes
    group_table.append("="*45)
    group_table.append(f"{'Overall Avg':<15} {all_avg:<30.2f}%")


    top_5 = sorted_nodes[:5]
    bottom_5 = sorted_nodes[-5:]

    summary = []
    summary.append("\nTop 5 Nodes:")
    for node, perc in top_5:
        summary.append(f"Node {node:<3}: {perc:.2f}%")

    summary.append("\nBottom 5 Nodes:")
    for node, perc in bottom_5:
        summary.append(f"Node {node:<3}: {perc:.2f}%")

    return "\n".join(table + group_table + summary)


def extract_and_sort_sub_numbers(root_folder):
    directories = [d for d in os.listdir(root_folder) if os.path.isdir(os.path.join(root_folder, d))]
    sorted_directories = sorted(directories, key=custom_sort_key_dirs)

    sub_numbers = []
    for filename in sorted_directories:
        match = re.search(r"(\d+)_(\d+)", filename)
        if match:
            sub_num = int(match.group(2))
            sub_numbers.append(sub_num)

    return sub_numbers

# Testowanie i metryki dla wszystkich testów

In [None]:
def test_all_sets_with_extended_run(model, all_test_data, name_of_test,criterion, optimizer, tolerance=0.15, incremental= 50, detach=8, metric_length= 100):
    import copy
    all_results = []
    test_restults_all = []
    all_streak_timesteps = []


    initial_model_state = copy.deepcopy(model.state_dict())

    for test_data_list in all_test_data:
        streak_nodes = set()
        streak_timesteps = []
        within_tolerance = []

        model.load_state_dict(copy.deepcopy(initial_model_state))

        average_loss, within_tolerance_percentage, predictions = evaluate_and_update_with_tolerance_metrics(model, test_data_list,optimizer, criterion, 
                                               incremental_step=incremental, tolerance=0.15, 
                                               eval_window=(999, 1050), detach_interval=detach, metric_length=metric_length)
        within_tolerance.append(within_tolerance_percentage)
        real_values = [data.y.view(-1).tolist() for data in test_data_list]
        
        streak_info, num_streaks = check_for_streaks(predictions, real_values, tolerance)
        streak_nodes = set(streak_info.keys())
        streak_timesteps = list(streak_info.values())
        

        
        rmse_per_timestep = []
        mape_per_timestep = []
        for timestep in range(1000, 1050):
            rmse_node = []
            mape_node = []
            for node in range(len(predictions[0])):
                rmse_node.append(root_mean_squared_error([real_values[timestep][node]], [predictions[timestep][node]]))
                mape_node.append(mean_absolute_percentage_error([real_values[timestep][node]], [predictions[timestep][node]]))
            rmse_per_timestep.append(rmse_node)
            mape_per_timestep.append(mape_node)

    
        num_streak_nodes = len(streak_nodes)
        avg_streak_timestep = np.mean(streak_timesteps) if streak_timesteps else None
        percentage_within_tolerance = np.mean(within_tolerance)
        percentage_nodes_with_streak = (num_streak_nodes / len(predictions[0])) * 100

        
        results_per_test = {
            "number_of_streak_nodes": num_streak_nodes,
            "average_streak_timestep": avg_streak_timestep,
            "percentage_within_tolerance": percentage_within_tolerance,
            "percentage_nodes_with_streak": percentage_nodes_with_streak,
            "rmse_per_timestep": rmse_per_timestep,
            "mape_per_timestep": mape_per_timestep
        }
        preds_and_real = {
            "predictions": predictions,
            "real_values": real_values
        }
        test_restults_all.append(results_per_test)
        all_results.append(preds_and_real)
        all_streak_timesteps.extend(streak_timesteps)

    with open(name_of_test +'_results.json', 'w') as json_file:
        json.dump(all_results, json_file, indent=4)

    overall_percentage_within_tolerance = np.mean([result["percentage_within_tolerance"] for result in test_restults_all])
    overall_avg_rmse_per_timestep = np.mean([result["rmse_per_timestep"] for result in test_restults_all], axis=0)
    overall_avg_mape_per_timestep = np.mean([result["mape_per_timestep"] for result in test_restults_all], axis=0)
    overall_percentage_nodes_with_streak = np.mean([result["percentage_nodes_with_streak"] for result in test_restults_all])
    avg_streak_timestep = np.mean(all_streak_timesteps) if all_streak_timesteps else None

    print(f"Overall Percentage of Nodes with Streak: {overall_percentage_nodes_with_streak:.2f}%")
    print(f"Overall Percentage of Predictions within Tolerance: {overall_percentage_within_tolerance:.2f}%")
    print(f"Overall Average Timestep at which Streak Happened: {avg_streak_timestep:.2f}")


    return all_results, overall_avg_rmse_per_timestep, overall_avg_mape_per_timestep

def plot_average_metrics(overall_avg_rmse_per_timestep, overall_avg_mape_per_timestep):
    
    timesteps = range(1000, 1050)

    plt.figure(figsize=(14, 6))
    plt.subplot(1, 2, 1)
    plt.plot(timesteps, overall_avg_rmse_per_timestep, label='Average RMSE', marker='o')
    plt.title('Average RMSE per Timestep')
    plt.xlabel('Timestep')
    plt.ylabel('RMSE')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(timesteps, overall_avg_mape_per_timestep, label='Average MAPE', marker='o')
    plt.title('Average MAPE per Timestep')
    plt.xlabel('Timestep')
    plt.ylabel('MAPE')
    plt.legend()

    plt.tight_layout()
    plt.show()

def calculate_metrics_from_json(json_file_path, tolerance=0.15):
    with open(json_file_path, 'r') as json_file:
        all_results = json.load(json_file)

    all_metrics = []
    all_streak_timesteps = []

    for test_result in all_results:
        predictions = test_result["predictions"]
        real_values = test_result["real_values"]

        total_within_tolerance = 0
        total_predictions = 0
        streak_nodes = set()
        streak_timesteps = []
        rmse_per_timestep = []
        mape_per_timestep = []

        for real_list, pred_list in zip(real_values, predictions):
            for real, pred in zip(real_list, pred_list):
                if abs(pred - real) <= tolerance * abs(real):
                    total_within_tolerance += 1
                total_predictions += 1

        streak_info, num_streaks = check_for_streaks(predictions, real_values, tolerance)
        streak_nodes = set(streak_info.keys())
        streak_timesteps = list(streak_info.values())

        rmse_per_timestep = []
        mape_per_timestep = []
        for timestep in range(1000, 1050):
            rmse_timestep = []
            mape_timestep = []
            for node in range(len(predictions[0])):
                rmse_timestep.append(root_mean_squared_error([real_values[timestep][node]], [predictions[timestep][node]]))
                mape_timestep.append(mean_absolute_percentage_error([real_values[timestep][node]], [predictions[timestep][node]]))
            rmse_per_timestep.append(np.mean(rmse_timestep))
            mape_per_timestep.append(np.mean(mape_timestep))

        num_streak_nodes = len(streak_nodes)
        avg_streak_timestep = np.mean(streak_timesteps) if streak_timesteps else None
        percentage_within_tolerance = (total_within_tolerance / total_predictions) * 100
        percentage_nodes_with_streak = (num_streak_nodes / len(predictions[0])) * 100

        test_metrics = {
            "number_of_streak_nodes": num_streak_nodes,
            "average_streak_timestep": avg_streak_timestep,
            "percentage_within_tolerance": percentage_within_tolerance,
            "percentage_nodes_with_streak": percentage_nodes_with_streak,
            "rmse_per_timestep": rmse_per_timestep,
            "mape_per_timestep": mape_per_timestep
        }
        all_metrics.append(test_metrics)
        all_streak_timesteps.extend(streak_timesteps)

    overall_percentage_within_tolerance = np.mean([metric["percentage_within_tolerance"] for metric in all_metrics])
    overall_avg_rmse_per_timestep = np.mean([metric["rmse_per_timestep"] for metric in all_metrics], axis=0)
    overall_avg_mape_per_timestep = np.mean([metric["mape_per_timestep"] for metric in all_metrics], axis=0)
    overall_percentage_nodes_with_streak = np.mean([metric["percentage_nodes_with_streak"] for metric in all_metrics])
    avg_streak_timestep = np.mean(all_streak_timesteps) if all_streak_timesteps else None

    print(f"Overall Percentage of Nodes with Streak: {overall_percentage_nodes_with_streak:.2f}%")
    print(f"Overall Percentage of Predictions within Tolerance: {overall_percentage_within_tolerance:.2f}%")
    print(f"Overall Average Timestep at which Streak Happened: {avg_streak_timestep:.2f}")

    return all_metrics, overall_avg_rmse_per_timestep, overall_avg_mape_per_timestep


In [None]:
def visualize_predictions_on_network(json_path, adjacency_matrix, test_index=0, tolerance=0.15):
    import matplotlib.pyplot as plt
    import networkx as nx
    import json

    
    with open(json_path, 'r') as f:
        all_results = json.load(f)
    result = all_results[test_index]
    predictions = result["predictions"]
    real_values = result["real_values"]
    num_nodes = len(predictions[0])

    streak_info = {}
    percentage_within_tolerance = {}
    for node in range(num_nodes):
        streak = 0
        within_tolerance_count = 0
        for timestep in range(len(predictions)):
            pred = predictions[timestep][node]
            real = real_values[timestep][node]
            if abs(pred - real) <= tolerance * abs(real):
                streak += 1
                within_tolerance_count += 1
                if streak == 5 and node not in streak_info:
                    streak_info[node] = timestep - 4
            else:
                streak = 0
        percentage_within_tolerance[node] = (within_tolerance_count / len(predictions)) * 100

    G = nx.Graph()
    V = adjacency_matrix.shape[0]
    for i in range(V):
        for j in range(V):
            if adjacency_matrix[i][j] > 0:
                G.add_edge(i, j, weight=adjacency_matrix[i][j])

    pos = nx.spring_layout(G, seed=42)
    node_colors = [percentage_within_tolerance.get(n, 0) for n in G.nodes()]
    labels = {
        n: f"{n}\n{streak_info[n]}" if n in streak_info else str(n)
        for n in G.nodes()
    }

    plt.figure(figsize=(12, 10))
    nx.draw_networkx_edges(G, pos, alpha=0.3)
    nodes = nx.draw_networkx_nodes(G, pos, node_color=node_colors, cmap='plasma', node_size=800)
    nx.draw_networkx_labels(G, pos, labels, font_size=9)
    cbar = plt.colorbar(nodes)
    cbar.set_label('% Within Tolerance')
    plt.title(f"Prediction Accuracy Visualization - Test {test_index}")
    plt.axis('off')
    plt.show()


# UCZENIE

In [None]:
def train(model, train_loader, optimizer, criterion):
    model.train()
    h = None
    for data in train_loader:
        optimizer.zero_grad()
        if h is None:
            h = torch.zeros(model.gru.num_layers, data.x.size(0), model.gru.hidden_size).to(data.x.device)
        else:
            h = detach_hidden_state(h)
        out, h = model(data, h)
        loss = criterion(out, data.y)
        loss.backward()
        optimizer.step()

In [None]:
def evaluate_and_update(model, test_data, optimizer, criterion, incremental_step=10):
    model.eval()
    total_loss = 0
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)

    new_training_data = []

    h = None
    for i, data in enumerate(test_loader):
        if h is None:
            h = torch.zeros(model.gru.num_layers, data.x.size(0), model.gru.hidden_size).to(data.x.device)
        else:
            h = detach_hidden_state(h)
        out, h = model(data, h)
        loss = criterion(out, data.y)
        total_loss += loss.item()
        new_training_data.append(data)

        
        if (i + 1) % incremental_step == 0:
            print(f"Updating model at timestep {i+1} with {incremental_step} new timesteps")
            train(model, DataLoader(new_training_data, batch_size=1, shuffle=False), optimizer, criterion)
            new_training_data = [] 

    avg_loss = total_loss / len(test_loader)
    print(f"Average Loss after evaluation: {avg_loss}")
    return avg_loss


In [None]:
def evaluate_and_update_with_tolerance_metrics(model, test_data, optimizer, criterion, 
                                               incremental_step=20, tolerance=0.15, 
                                               eval_window=(1000, 1050), detach_interval=8, metric_length=100):
    model.eval()
    total_loss = 0
    test_loader = DataLoader(test_data, batch_size=1, shuffle=False)


    new_training_data = []
    within_tolerance_count = 0
    total_predictions = 0
    predictions = []
    h = None
    step_counter = 0
    for i, data in enumerate(test_loader):
        if h is None:
            h = torch.zeros(model.gru.num_layers, data.x.size(0), model.gru.hidden_size).to(data.x.device)
        elif step_counter % detach_interval == 0:
                h = detach_hidden_state(h)
        out, h = model(data, h)
        loss = criterion(out, data.y)
        total_loss += loss.item()
        new_training_data.append(data)

        pred_list = out.view(-1).tolist()
        predictions.append(pred_list)

        if eval_window[0] <= i <= eval_window[1]:
            true_list = data.y.view(-1).tolist()
            for pred, real in zip(pred_list, true_list):
                if abs(pred - real) <= tolerance * abs(real):
                    within_tolerance_count += 1
                total_predictions += 1

        if (i + 1) % incremental_step == 0:
            train(model, DataLoader(new_training_data, batch_size=1, shuffle=False), optimizer, criterion)
            new_training_data = []
        step_counter += 1

    avg_loss = total_loss / len(test_loader)
    within_tolerance_percentage = (
        (within_tolerance_count / total_predictions) * 100 if total_predictions > 0 else 0
    )

    return avg_loss, within_tolerance_percentage, predictions


In [None]:
def load_model(model_class, model_path, *model_args, **model_kwargs):
    model = model_class(*model_args, **model_kwargs)
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    return model

# ZMIENNIE INICJALIZACJA

In [None]:
data_folder = 'Zbiory danych/polaczenie_bitrate_7000/normalized/processed_7000_0'  
file_path = 'Zbiory danych/matrix.net'  
root_folder = "Zbiory danych/polaczenie_bitrate_7000/normalized"
adjacency_matrix = load_adjacency_matrix(file_path) 



history_length = 2
detach = 8
inc_step = 60
metric_length = 200
metric_type = 'average'  
hidden_dim = 200
graph_data_list = load_data_with_targets(
                data_folder, adjacency_matrix, history_length, metric_length, metric_type
            )

name_of_test = "GNN-Ostatni-HPO"



train_data = graph_data_list[:6000 - metric_length]
val_data = graph_data_list[6000 - metric_length:7100]


train_loader = DataLoader(train_data, batch_size=1, shuffle=True)

model = GNNModel(input_dim=1, hidden_dim=hidden_dim, output_dim=1,  num_gru_layers=1, history_length=history_length)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.MSELoss()

In [None]:
all_tests = get_all_test_data(root_folder, adjacency_matrix, history_length, metric_length, metric_type)

In [None]:
train(model, train_loader, optimizer, criterion)

torch.save(model.state_dict(), name_of_test + "_before_incremental.pth")


In [None]:

avg_loss, within_tolerance_percentage, predictions = evaluate_and_update_with_tolerance_metrics(
    model, val_data, optimizer, criterion,
    incremental_step=inc_step,
    tolerance=0.15,
    eval_window=(1000, 1050),
    detach_interval=detach,
    metric_length=metric_length
)
torch.save(model.state_dict(), name_of_test + "_after_incremental.pth")

print(f'Average Loss: {avg_loss}')
print(f'Percentage of Predictions within Tolerance: {within_tolerance_percentage:.2f}%')

real_values = [data.y.view(-1).tolist() for data in val_data]

In [None]:
streak_info, num_streaks = check_for_streaks(predictions, real_values)
print(streak_info)
print(num_streaks)
avg_streak_start = (
                sum(streak_info.values()) / len(streak_info) if streak_info else float('inf')
            )
print(f"Average Streak Start: {avg_streak_start:.2f}")

In [None]:
numbers = extract_and_sort_sub_numbers(root_folder)

table = analyze_predictions(predictions, real_values, numbers[1:])
print(table)

In [None]:
plot_predictions(predictions[1000:1050], real_values[1000:1050], 79)

In [None]:
model_path = name_of_test + "_before_incremental.pth"
model2 = load_model(
    GNNModel,
    model_path,
    input_dim=1,
    hidden_dim=hidden_dim,
    output_dim=1,
    num_gru_layers=1,
    history_length=history_length
)

In [None]:
results = test_all_sets_with_extended_run(model2, all_tests, name_of_test, criterion ,optimizer, tolerance=0.15, incremental=inc_step, detach=detach)

In [None]:
json_file_path = name_of_test + '_results.json'
metrics, overall_avg_rmse_per_timestep, overall_avg_mape_per_timestep = calculate_metrics_from_json(json_file_path, tolerance=0.10)
plot_average_metrics(overall_avg_rmse_per_timestep, overall_avg_mape_per_timestep)

In [None]:
def visualize_directed_predictions(adj_path, json_path, test_index=0, tolerance=0.15):

    with open(json_path, 'r') as f:
        all_results = json.load(f)
    result = all_results[test_index]
    print(len(result["predictions"]))
    predictions = result["predictions"][999:1050]
    real_values = result["real_values"][999:1050]

    connection_metrics = {}
    for conn_id in range(len(predictions[0])):
        streak = 0
        streak_step = None
        within_tol = 0
        for t in range(len(predictions)):
            pred = predictions[t][conn_id]
            real = real_values[t][conn_id]
            if abs(pred - real) <= tolerance * abs(real):
                streak += 1
                within_tol += 1
                if streak == 5 and streak_step is None:
                    streak_step = t - 4
            else:
                streak = 0
        percentage = (within_tol / len(predictions)) * 100
        connection_metrics[conn_id] = {
            "procent_w_granicach_tolerancji": percentage,
            "poczatek_tconv": streak_step
        }

    with open(adj_path, 'r') as f:
        lines = [line.strip() for line in f if line.strip()]
    matrix = [list(map(float, line.split('\t'))) for line in lines]
    V = len(matrix)

    G = nx.DiGraph()
    straight_edges = []
    curved_edges = []
    straight_colors = []
    curved_colors = []
    edge_annotations = {}
    conn_id = 0

    for i in range(V):
        for j in range(V):
            if matrix[i][j] > 0:
                G.add_edge(i, j, weight=matrix[i][j])
                label = f"{conn_id}"
                percent = connection_metrics[conn_id]["procent_w_granicach_tolerancji"]
                streak = connection_metrics[conn_id]["poczatek_tconv"]
                annotation = f"{label}\nTconv:{streak}" if streak is not None else label
                edge_annotations[(i, j)] = annotation

                if (j, i) in edge_annotations:
                    curved_edges.append((i, j))
                    curved_colors.append(percent / 100.0)
                else:
                    straight_edges.append((i, j))
                    straight_colors.append(percent / 100.0)

                conn_id += 1

    pos = nx.spring_layout(G, seed=42, k=1.5)
    pos = {k: (-v[1], -v[0]) for k, v in pos.items()}

    plt.figure(figsize=(22, 18))
    nx.draw_networkx_nodes(G, pos, node_color='lightblue', node_size=600)
    nx.draw_networkx_labels(G, pos, font_size=10)

    nx.draw_networkx_edges(
        G, pos,
        edgelist=straight_edges,
        width=1.5,
        edge_color=straight_colors,
        edge_cmap=plt.cm.plasma,
        edge_vmin=0.0,
        edge_vmax=1.0,
        arrows=True
    )

    nx.draw_networkx_edges(
        G, pos,
        edgelist=curved_edges,
        width=1.5,
        edge_color=curved_colors,
        edge_cmap=plt.cm.plasma,
        edge_vmin=0.0,
        edge_vmax=1.0,
        arrows=True,
        connectionstyle="arc3,rad=0.25"
    )

    for (i, j), text in edge_annotations.items():
        x = (pos[i][0] + pos[j][0]) / 2
        y = (pos[i][1] + pos[j][1]) / 2
        offset = 0.04 if (i < j) else -0.04
        plt.text(x, y + offset, text, fontsize=8, ha='center', va='center',
                 bbox=dict(facecolor='white', alpha=0.6, boxstyle='round,pad=0.2'))

    sm = plt.cm.ScalarMappable(cmap=plt.cm.plasma,
                               norm=plt.Normalize(vmin=0, vmax=100))
    sm.set_array([])
    cbar = plt.colorbar(sm)
    cbar.set_label('% Prognoz w Granicach Tolerancji')

    plt.title("Wizualizacja topologii z wynikami", fontsize=16)
    plt.axis('off')
    plt.show()


In [None]:
visualize_directed_predictions(
    adj_path=file_path,
    json_path= name_of_test + "_results.json",
    test_index=0
)
