In [1]:
import math
import numpy as np
import wandb
import random
import torch
import torch_geometric
from torch_geometric.data import Data
import sys
import os
from tqdm import tqdm
import signal
import joblib
import argparse
import json
import os
import subprocess
from torch.utils.data import DataLoader, Dataset, Subset
from sklearn.preprocessing import StandardScaler

# Add the 'scripts' directory to the Python path
scripts_path = os.path.abspath(os.path.join('..'))
if scripts_path not in sys.path:
    sys.path.append(scripts_path)

In [2]:
import training.help_functions as hf
from data_preprocessing.process_simulations_for_gnn import EdgeFeatures

import gnn_io as gio
import gnn_architecture as garch

In [3]:
dataset_path = '../../data/train_data/sim_output_1pm_22_10_2024/'

In [None]:
try:
    datalist = []
    batch_num = 1
    while True:
        print(f"Processing batch number: {batch_num}")
        # total_memory, available_memory, used_memory = get_memory_info()
        # print(f"Total Memory: {total_memory:.2f} GB")
        # print(f"Available Memory: {available_memory:.2f} GB")
        # print(f"Used Memory: {used_memory:.2f} GB")
        batch_file = os.path.join(dataset_path, f'datalist_batch_{batch_num}.pt')
        if not os.path.exists(batch_file):
            break
        batch_data = torch.load(batch_file, map_location='cpu')
        if isinstance(batch_data, list):
            datalist.extend(batch_data)
        batch_num += 1
        break
    print(f"Loaded {len(datalist)} items into datalist")

except Exception as e:
    print(f"An error occurred: {str(e)}")

In [5]:
base_dir = "temp/"
params = {
    "batch_size": 8,
    "unique_model_description": "eda",
    "node_features": [feat.name for feat in EdgeFeatures]
}

In [11]:
x_highway = datalist[49].x[:,5].numpy()

In [None]:
import matplotlib.pyplot as plt

plt.hist(x_highway, bins=range(-1, 11), edgecolor='black', align='left')
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram of Series Data')
plt.show()

In [None]:
# Define the mapping for the fifth feature
mapping = {
    -1: 0,
    0: 1,
    1: 2,
    2: 2,
    3: 3,
    4: 3,
    5: 4,
    6: 4,
    7: 5,
    8: 5,
    9: 6
}

# Apply the mapping and one-hot encode the fifth feature
for data in datalist[5:6]:
    fifth_feature = data.x[:, 5].numpy()
    mapped_feature = np.vectorize(mapping.get)(fifth_feature)
    print(mapped_feature.shape)
    one_hot_encoded = np.eye(7)[mapped_feature]
    
    # Remove the original fifth feature and append the one-hot encoded version
    data.x = torch.cat((data.x[:, :5], data.x[:, 6:], torch.tensor(one_hot_encoded, dtype=data.x.dtype)), dim=1)