## **README**

**Input**

Training/prediction csv data from the rhino-grasshopper simulation

**Output**

Pytorch graph data object

## **Import Libraries**

In [1]:
# import libraries
import os
import pandas as pd
import torch                                 # pytorch
from torch_geometric.data import Data        # to work with graph data

## **Pipeline Settings**

In [2]:
# Define the mode at the start of your code
pipeline_mode = "training"  # Change to "prediction" when needed

In [3]:
# Csv Training Data
run_num = 'run_4'
file_path_training = rf'data\csv_training\{run_num}'

**No changes to input below for each run**

In [4]:
# Pytorch Training Path
filename_training = rf'data/torch_data_object_training/{run_num}.pt'

In [5]:
# Pytorch Prediction Path
filename_prediction = rf'data/torch_data_object_prediction/{run_num}.pt'

In [6]:
# Csv Files to Import
file_names = [ 
    'building_alum_vertex',
    'building_brick_vertex',
    'building_conc_vertex',
    'building_glass_vertex',
    'building_wood_vertex',
    'distance_alum',
    'distance_brick',
    'distance_conc',
    'distance_glass',
    'distance_wood',
    'label',  
    'sensor',
    'sensor_length',
    'vertex_length_alum',
    'vertex_length_brick',
    'vertex_length_conc',
    'vertex_length_glass',
    'vertex_length_wood',
]  

In [7]:
# Dictionary mapping DataFrame names to material names
df_material_map = {
    'building_conc_vertex_df': 'exterior_concrete_wall',
    'building_glass_vertex_df': 'exterior_glass',
    'building_wood_vertex_df': 'exterior_wood_wall',
    'building_brick_vertex_df': 'exterior_white_brick',
    'building_alum_vertex_df': 'exterior_alum_cladding'
}

In [8]:
# List to combine into building_df
append_list = (
    'building_alum_vertex_df_append',
    'building_brick_vertex_df_append',
    'building_conc_vertex_df_append',
    'building_glass_vertex_df_append',
    'building_wood_vertex_df_append',
)

In [9]:
# List of dfs to extract vertex length
df_vertex_names = [
    'vertex_length_alum_df',
    'vertex_length_brick_df',
    'vertex_length_conc_df',
    'vertex_length_glass_df',
    'vertex_length_wood_df'
]

In [10]:
# Material list
material_list = ['alum', 'brick','conc', 'glass', 'wood']  

## **Functions**

In [11]:
def print_df(dataframes, line_length=50):
    """
    Prints the head of each DataFrame in the list, separated by a line.

    Parameters:
    - dataframes (list of tuple): A list where each tuple contains a string (the name of the DataFrame) and a DataFrame.
    - line_length (int): The length of the separating line. Default is 50.
    """
    line = '-' * line_length
    for name, df in dataframes:
        print(name)
        print(df.head())
        print(line)

In [12]:
def check_nulls_in_dfs(dfs):
    """
    Checks for null values in each DataFrame within a dictionary and prints a message indicating the presence of null values.

    Args:
    - dfs (dict): A dictionary where each key is the name of a DataFrame and each value is the DataFrame object.
    """
    for df_name, df in dfs.items():
        # Check if there are any null values in the DataFrame
        if df.isnull().values.any():
            print(f'Null values found in \'{df_name}\' DataFrame.')
        else:
            print(f'No null values in \'{df_name}\' DataFrame.')

In [13]:
def format_and_insert_id_column(df, id_base_name):
    """
    Resets the index of the DataFrame, creates a new column with formatted IDs based on the new index,
    and inserts this new column as the first column of the DataFrame.
    
    Args:
    - df (pd.DataFrame): The DataFrame to operate on.
    - id_base_name (str): The base name for the new ID column (e.g., 'sensor_id', 'vertex_id').
    
    Returns:
    - pd.DataFrame: The modified DataFrame with the new ID column as the first column.
    """
    # Resetting the index so the specified ID column is no longer the index column, if it was.
    df.reset_index(drop=True, inplace=True)
    
    # Creating a new ID column with formatted values.
    df[id_base_name] = [f'{id_base_name}_{i + 1}' for i in df.index]
    
    # Inserting the new ID column as the first column.
    df.insert(0, id_base_name, df.pop(id_base_name))
    
    return df

In [14]:
def load_csv_files_as_dict(file_path_training, file_names):
    """
    Loads CSV files into pandas DataFrames and stores them in a dictionary with dynamically constructed keys.
    Skips empty CSV files.

    Args:
    - file_path_training (str): The directory path where the CSV files are stored.
    - file_names (list of str): List of file names without the '.csv' extension.

    Returns:
    - dict: A dictionary where each key is a dynamically constructed name based on the file name and
            each value is the corresponding DataFrame loaded from the CSV file.
    """
    # Dictionary to store the DataFrames, with keys as dynamically constructed names
    dataframes = {}

    # Loop over the list of file names
    for file_name in file_names:
        file_path = os.path.join(file_path_training, f'{file_name}.csv')
        # Check if the file is not empty (size > 0)
        if os.path.getsize(file_path) > 0:
            # Construct the DataFrame name and load the CSV file into a DataFrame
            df_name = f'{file_name}_df'
            dataframes[df_name] = pd.read_csv(file_path, header=None)
        else:
            print(f'Skipped empty file: {file_name}.csv')

    return dataframes

In [15]:
def append_material_properties(dfs, material_df, df_material_map):
    """
    Appends material properties to each row of specified building DataFrames based on a mapping dictionary,
    creating new DataFrames for the appended versions without modifying the originals.
    Skips empty DataFrames and those not found in the dfs dictionary.
    
    Args:
    - dfs (dict): Dictionary of DataFrames to be updated, where keys are DataFrame names.
    - material_df (pd.DataFrame): DataFrame containing material properties.
    - df_material_map (dict): Dictionary mapping DataFrame names to material names.
    
    Returns:
    - dict: The dfs dictionary with new DataFrames added that contain the original data
            with material properties appended. The new DataFrames have keys with an `_append` suffix.
    """
    for df_name, material_name in df_material_map.items():
        # Construct new DataFrame name with '_append' suffix
        new_df_name = f"{df_name}_append"
        
        # Check if the original DataFrame is present in dfs and not empty
        if df_name in dfs and not dfs[df_name].empty:
            # Find the row in material_df for the specified material
            material_row = material_df[material_df['material_name'] == material_name].drop('material_name', axis=1)
            # Replicate the material row to match the size of the building DataFrame
            repeated_material = pd.concat([material_row] * len(dfs[df_name]), ignore_index=True)
            # Create a new DataFrame by appending the material properties
            dfs[new_df_name] = pd.concat([dfs[df_name].reset_index(drop=True), repeated_material.reset_index(drop=True)], axis=1)
        else:
            print(f"Skipped empty or missing DataFrame: {df_name}")
    
    return dfs

In [16]:
def combine_dataframes_in_order(dfs, append_list):
    """
    Combines a list of DataFrames found in the dictionary 'dfs' into a single DataFrame,
    strictly following the order specified in 'append_list'. Skips any DataFrame names
    not found in the 'dfs' dictionary.

    Args:
    - dfs (dict): Dictionary containing DataFrames.
    - append_list (tuple): Tuple containing the names of the DataFrames to be combined in order.

    Returns:
    - pd.DataFrame: The combined DataFrame.
    """
    combined_df = pd.DataFrame()  # Initialize an empty DataFrame to start with

    for df_name in append_list:
        if df_name in dfs:  # Check if DataFrame name exists in the dictionary
            # If the combined DataFrame is empty, initialize it with the first DataFrame
            if combined_df.empty:
                combined_df = dfs[df_name].copy()
            else:
                # Concatenate the current DataFrame to the combined DataFrame
                combined_df = pd.concat([combined_df, dfs[df_name]], ignore_index=True)
        else:
            print(f"DataFrame name '{df_name}' not found in the dictionary. Skipping...")

    return combined_df

In [17]:
def extract_values_to_dict(dfs, df_names):
    """
    Extracts a single value from each specified DataFrame and stores it in a dictionary.

    Args:
    - dfs (dict): Dictionary containing the DataFrames.
    - df_names (list of str): List of the names of the DataFrames to extract values from.

    Returns:
    - dict: A dictionary with the DataFrame names as keys and their extracted values as values.
    """
    values_dict = {}

    for df_name in df_names:
        if df_name in dfs and not dfs[df_name].empty:
            # Assuming each DataFrame contains only one value, extract it
            value = dfs[df_name].iloc[0, 0]  # Extract the first value of the DataFrame
            values_dict[df_name] = value
        else:
            print(f"DataFrame '{df_name}' does not exist or is empty. Skipping...")

    return values_dict

In [18]:
def map_sensor_to_vertex(sensor_length, values_dict):
    """
    Creates mappings of sensor IDs to vertex material IDs based on values_dict.
    
    Args:
    - sensor_length (int): The number of sensors.
    - values_dict (dict): Dictionary with vertex lengths for each material.
    
    Returns:
    - dict: A dictionary of DataFrames, each representing sensor to vertex mappings for a material.
    """
    mapped_dfs = {}
    for material, length in values_dict.items():
        material_name = material.split('_df')[0]  # Extract material name from the key
        data = [(f'sensor_id_{sensor_id}', f'{material_name}_{i}') for sensor_id in range(1, sensor_length + 1) for i in range(1, length + 1)]
        mapped_df = pd.DataFrame(data, columns=['sensor_id', 'vertex_id'])
        mapped_dfs[material_name] = mapped_df
    
    return mapped_dfs

In [19]:
def append_distance_to_mapped_dfs(mapped_dfs, dfs, material_list):
    """
    Correctly appends distance values from distance DataFrames in dfs to each corresponding mapped DataFrame.
    Correctly references the material name in mapped_dfs and skips materials if their corresponding distance DataFrame cannot be found in dfs.
    
    Args:
    - mapped_dfs (dict): Dictionary of mapped DataFrames.
    - dfs (dict): Dictionary containing distance DataFrames.
    - material_list (list): List of materials to search distance data for.
    
    Returns:
    - dict: Updated dictionary of mapped DataFrames with distance values appended.
    """
    for material in material_list:
        distance_df_name = f'distance_{material}_df'
        mapped_df_name = f'vertex_length_{material}'  # Adjusted to match the naming convention in mapped_dfs

        # Check if both the distance DataFrame and the mapped DataFrame exist
        if distance_df_name in dfs and mapped_df_name in mapped_dfs:
            distance_df = dfs[distance_df_name]
            # Assuming the distance values are in the first column of distance_df
            mapped_df = mapped_dfs[mapped_df_name]
            # Ensure the distance DataFrame has enough rows to match the mapped DataFrame
            if len(distance_df) >= len(mapped_df):
                mapped_df['distance'] = distance_df.iloc[:len(mapped_df), 0].values
            else:
                print(f"Warning: Not enough distance values for {material}, distances not appended.")
            mapped_dfs[mapped_df_name] = mapped_df
        else:
            # Print a warning if the distance DataFrame is not found
            print(f"Warning: Distance DataFrame for '{material}' not found, skipping.")

    return mapped_dfs

## **Import Dataset**

In [20]:
# Call function to import csv and convert to dataframe, storing them in a dictionary
dfs = load_csv_files_as_dict(file_path_training, file_names)
# Access a specific DataFrame, e.g., sensor_df = dfs['sensor_df']

Skipped empty file: building_alum_vertex.csv
Skipped empty file: building_brick_vertex.csv
Skipped empty file: distance_alum.csv
Skipped empty file: distance_brick.csv
Skipped empty file: vertex_length_alum.csv
Skipped empty file: vertex_length_brick.csv


In [21]:
for df_name, df in dfs.items():
    print(f'DataFrame Name: {df_name}')
    print(df)
    print()  

DataFrame Name: building_conc_vertex_df
              0           1          2
0    360.978140  438.952231   0.000000
1    360.436820  438.007320   0.000000
2    360.436820  438.007320  17.788885
3    360.978140  438.952231  17.788885
4    370.925848  432.007495   0.000000
..          ...         ...        ...
527  405.335995  383.873168  42.000000
528  404.170548  384.540845   0.000000
529  404.170548  384.540845  42.000000
530  401.491986  379.946006   0.000000
531  401.491986  379.946006  42.000000

[532 rows x 3 columns]

DataFrame Name: building_glass_vertex_df
             0           1          2
0   360.978140  438.952231  17.788885
1   360.436820  438.007320  17.788885
2   360.436820  438.007320  23.566007
3   360.978140  438.952231  23.566007
4   370.925848  432.007495  17.788885
5   370.925848  432.007495  23.566007
6   380.706836  448.858343  17.788885
7   380.706836  448.858343  23.566007
8   378.543764  450.091695  17.788885
9   378.543764  450.091695  23.566007
10  380.

In [22]:
for df_name in dfs:
    print(f'DataFrame Name: {df_name}')

DataFrame Name: building_conc_vertex_df
DataFrame Name: building_glass_vertex_df
DataFrame Name: building_wood_vertex_df
DataFrame Name: distance_conc_df
DataFrame Name: distance_glass_df
DataFrame Name: distance_wood_df
DataFrame Name: label_df
DataFrame Name: sensor_df
DataFrame Name: sensor_length_df
DataFrame Name: vertex_length_conc_df
DataFrame Name: vertex_length_glass_df
DataFrame Name: vertex_length_wood_df


In [23]:
# Import Material Library and append to dfs dictionary of dataframes
material_df = pd.read_csv(r'data\material\material_library.csv')
dfs['material'] = material_df

In [24]:
material_df

Unnamed: 0,material_name,r_ref,g_ref,b_ref,spec,rough
0,exterior_concrete_wall,0.7,0.69,0.66,0.03,0.3
1,exterior_wood_wall,0.15,0.14,0.13,0.48,0.2
2,exterior_alum_cladding,0.63,0.63,0.61,0.05,0.1
3,exterior_white_brick,0.73,0.65,0.47,0.25,0.3
4,exterior_glass,0.7,0.7,0.7,0.0,0.0


## **Check - Null Values**

In [25]:
# Check for Null Values
check_nulls_in_dfs(dfs)

No null values in 'building_conc_vertex_df' DataFrame.
No null values in 'building_glass_vertex_df' DataFrame.
No null values in 'building_wood_vertex_df' DataFrame.
No null values in 'distance_conc_df' DataFrame.
No null values in 'distance_glass_df' DataFrame.
No null values in 'distance_wood_df' DataFrame.
No null values in 'label_df' DataFrame.
No null values in 'sensor_df' DataFrame.
No null values in 'sensor_length_df' DataFrame.
No null values in 'vertex_length_conc_df' DataFrame.
No null values in 'vertex_length_glass_df' DataFrame.
No null values in 'vertex_length_wood_df' DataFrame.
No null values in 'material' DataFrame.


## **Prepare Dataset - building_df**

In [26]:
dfs = append_material_properties(dfs, material_df, df_material_map)

Skipped empty or missing DataFrame: building_brick_vertex_df
Skipped empty or missing DataFrame: building_alum_vertex_df


In [27]:
# Create combined building_df
building_df = combine_dataframes_in_order(dfs, append_list)

DataFrame name 'building_alum_vertex_df_append' not found in the dictionary. Skipping...
DataFrame name 'building_brick_vertex_df_append' not found in the dictionary. Skipping...


## **Prepare Dataset - edge_df**

In [28]:
values_dict = extract_values_to_dict(dfs, df_vertex_names)

DataFrame 'vertex_length_alum_df' does not exist or is empty. Skipping...
DataFrame 'vertex_length_brick_df' does not exist or is empty. Skipping...


In [29]:
values_dict

{'vertex_length_conc_df': 532,
 'vertex_length_glass_df': 36,
 'vertex_length_wood_df': 454}

In [30]:
#extract out the length values and convert to numerical from both dataframes
sensor_length = int(dfs['sensor_length_df'].iloc[0,0])

In [31]:
sensor_length

57

In [32]:
# Map sensor length with each material vertex length, return dictionary of dfs
mapped_dfs = map_sensor_to_vertex(sensor_length, values_dict)

In [33]:
mapped_dfs

{'vertex_length_conc':           sensor_id               vertex_id
 0       sensor_id_1    vertex_length_conc_1
 1       sensor_id_1    vertex_length_conc_2
 2       sensor_id_1    vertex_length_conc_3
 3       sensor_id_1    vertex_length_conc_4
 4       sensor_id_1    vertex_length_conc_5
 ...             ...                     ...
 30319  sensor_id_57  vertex_length_conc_528
 30320  sensor_id_57  vertex_length_conc_529
 30321  sensor_id_57  vertex_length_conc_530
 30322  sensor_id_57  vertex_length_conc_531
 30323  sensor_id_57  vertex_length_conc_532
 
 [30324 rows x 2 columns],
 'vertex_length_glass':          sensor_id               vertex_id
 0      sensor_id_1   vertex_length_glass_1
 1      sensor_id_1   vertex_length_glass_2
 2      sensor_id_1   vertex_length_glass_3
 3      sensor_id_1   vertex_length_glass_4
 4      sensor_id_1   vertex_length_glass_5
 ...            ...                     ...
 2047  sensor_id_57  vertex_length_glass_32
 2048  sensor_id_57  vertex_length

In [34]:
final_dfs = append_distance_to_mapped_dfs(mapped_dfs, dfs, material_list)



In [35]:
# Sort the keys of final_dfs alphabetically
sorted_keys = sorted(final_dfs.keys())

In [36]:
# Concatenate the DataFrames in alphabetical order
edge_df = pd.concat([final_dfs[key] for key in sorted_keys], ignore_index=True)

In [37]:
edge_df

Unnamed: 0,sensor_id,vertex_id,distance
0,sensor_id_1,vertex_length_conc_1,6.025483
1,sensor_id_1,vertex_length_conc_2,6.025487
2,sensor_id_1,vertex_length_conc_3,11.801877
3,sensor_id_1,vertex_length_conc_4,11.801876
4,sensor_id_1,vertex_length_conc_5,13.591615
...,...,...,...
58249,sensor_id_57,vertex_length_wood_450,122.666893
58250,sensor_id_57,vertex_length_wood_451,125.486862
58251,sensor_id_57,vertex_length_wood_452,123.170420
58252,sensor_id_57,vertex_length_wood_453,131.470311


## **Graph Object - Preprocess Node Index and Naming**

In [38]:
sensor_df = format_and_insert_id_column(dfs['sensor_df'], 'sensor_id')
building_df = format_and_insert_id_column(building_df, 'vertex_id')
label_df = format_and_insert_id_column(dfs['label_df'], 'sensor_id')

## **Graph Object - Add Column Headers**

In [39]:
sensor_df.rename(columns={0: 'sensor_x_coordinate', 1: 'sensor_y_coordinate', 2: 'sensor_z_coordinate'}, inplace=True)

In [40]:
label_df.columns = ['sensor_id', 'hb_solar_radiation']

## **Graph Object - Midpoint Check**

In [41]:
dataframes = [
    ('sensor_df', sensor_df),
    ('building_df', building_df),
    ('label_df', label_df),
    ('edge_df', edge_df)
]

dataframes

[('sensor_df',
         sensor_id  sensor_x_coordinate  sensor_y_coordinate  \
  0    sensor_id_1           360.620728           438.529495   
  1    sensor_id_2           360.620728           438.529495   
  2    sensor_id_3           360.620728           438.529495   
  3    sensor_id_4           365.631683           434.920624   
  4    sensor_id_5           365.631683           434.920624   
  5    sensor_id_6           365.631683           434.920624   
  6    sensor_id_7           373.457581           436.170013   
  7    sensor_id_8           373.457581           436.170013   
  8    sensor_id_9           373.457581           436.170013   
  9   sensor_id_10           378.348083           444.595428   
  10  sensor_id_11           378.348083           444.595428   
  11  sensor_id_12           378.348083           444.595428   
  12  sensor_id_13           379.674835           449.561905   
  13  sensor_id_14           379.674835           449.561905   
  14  sensor_id_15       

## **Graph Object - Prepare Node Features**

Combine sensor and building information to create a unified node feature matrix. 

Combine sensor_df and building_df into a single dataframe, ensuring each has a unique identifier across sensors and vertices

In [42]:
# Add a column to distinguish between sensors and vertices
sensor_df['type'] = 'sensor'
building_df['type'] = 'vertex'

In [43]:
# Combine dataframes
all_nodes_df = pd.concat([sensor_df.assign(index=range(0, len(sensor_df))),
                          building_df.assign(index=range(len(sensor_df), len(sensor_df)+len(building_df)))])

In [44]:
# Prepare node features - example: using coordinates and a type flag (sensor=1, vertex=0)
all_nodes_df['type_flag'] = all_nodes_df['type'].apply(lambda x: 1 if x == 'sensor' else 0)
node_features = all_nodes_df[['sensor_x_coordinate', 'sensor_y_coordinate', 'sensor_z_coordinate', 'type_flag']].fillna(0).values
x = torch.tensor(node_features, dtype=torch.float)

# Print Check
all_nodes_df

Unnamed: 0,sensor_id,sensor_x_coordinate,sensor_y_coordinate,sensor_z_coordinate,type,index,vertex_id,0,1,2,r_ref,g_ref,b_ref,spec,rough,type_flag
0,sensor_id_1,360.620728,438.529495,6.0,sensor,0,,,,,,,,,,1
1,sensor_id_2,360.620728,438.529495,18.0,sensor,1,,,,,,,,,,1
2,sensor_id_3,360.620728,438.529495,30.0,sensor,2,,,,,,,,,,1
3,sensor_id_4,365.631683,434.920624,6.0,sensor,3,,,,,,,,,,1
4,sensor_id_5,365.631683,434.920624,18.0,sensor,4,,,,,,,,,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1017,,,,,vertex,1074,vertex_id_1018,278.823743,349.032461,48.0,0.15,0.14,0.13,0.48,0.2,0
1018,,,,,vertex,1075,vertex_id_1019,276.725943,350.247252,0.0,0.15,0.14,0.13,0.48,0.2,0
1019,,,,,vertex,1076,vertex_id_1020,276.725943,350.247252,48.0,0.15,0.14,0.13,0.48,0.2,0
1020,,,,,vertex,1077,vertex_id_1021,273.543410,344.818679,0.0,0.15,0.14,0.13,0.48,0.2,0


In [45]:
# Print the tensor to check its contents
print("Node Features Tensor:")
print(x)

Node Features Tensor:
tensor([[360.6207, 438.5295,   6.0000,   1.0000],
        [360.6207, 438.5295,  18.0000,   1.0000],
        [360.6207, 438.5295,  30.0000,   1.0000],
        ...,
        [  0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000],
        [  0.0000,   0.0000,   0.0000,   0.0000]])


## **Graph Object - Create Edge Index**

1. Map each sensor_id and vertex_id to a unique index.2. 
Use these mappings to create the edge_index tensor from edge_df.

Step 1: Prepare Mappings
First, we'll create dictionaries to map sensor_id and vertex_id to unique indices. We'll concatenate the indices of sensors and vertices to ensure uniqueness across the graph.

In [46]:
sensor_ids = sensor_df['sensor_id'].unique()
vertex_ids = building_df['vertex_id'].unique()

# Create a continuous index for sensors and vertices
sensor_index = {sensor_id: i for i, sensor_id in enumerate(sensor_ids)}
vertex_index = {vertex_id: i + len(sensor_index) for i, vertex_id in enumerate(vertex_ids)}

Step 2: Create Edge Index
Next, we'll use these mappings to create the edge_index tensor. Note that the vertex_id in edge_df appears to have a slight discrepancy (missing the "id_" part based on the example provided), so we'll adjust for that in our mapping.

In [47]:
# Adjust the vertex_id in edge_df to match the format in buildings_df
edge_df['adjusted_vertex_id'] = edge_df['vertex_id'].apply(lambda x: 'vertex_id_' + x.split('_')[-1])

# Map sensor_id and vertex_id to their respective indices
edge_index_list = edge_df.apply(lambda row: [sensor_index.get(row['sensor_id'], -1),
                                             vertex_index.get(row['adjusted_vertex_id'], -1)], axis=1)

# Filter out any edges that couldn't be mapped (-1 indicates a mapping failure)
filtered_edge_index_list = [pair for pair in edge_index_list if -1 not in pair]

# Convert to torch tensor
edge_index = torch.tensor(filtered_edge_index_list, dtype=torch.long).t().contiguous()


In [48]:
edge_index

tensor([[  0,   0,   0,  ...,  56,  56,  56],
        [ 57,  58,  59,  ..., 508, 509, 510]])

## **Graph Object - Edge Attributes**

This section extracts edge attributes from the edge_df and converts them into a torch tensor.

In [49]:
edge_attr = torch.tensor(edge_df[['distance']].values, dtype=torch.float)

In [50]:
edge_attr

tensor([[  6.0255],
        [  6.0255],
        [ 11.8019],
        ...,
        [123.1704],
        [131.4703],
        [129.2611]])

## **Graph Object - Target Labels**

This sections prepares the labels for sensors by:
1. Aligning with their respective indices
2. Converting to a torch tensor

In [51]:
# Ensure data type compatibility
label_df['hb_solar_radiation'] = label_df['hb_solar_radiation'].astype(float)

In [52]:
# Update labels for sensors with their radiation values
label_df['index'] = label_df['sensor_id'].map(sensor_index)

In [53]:
# Create torch tensor with compatible data type
labels = torch.zeros(len(label_df), dtype=torch.float)
labels[label_df['index']] = torch.tensor(label_df['hb_solar_radiation'].values, dtype=torch.float)

## **Graph Object - Creating & Saving the Pytorch Data Object**

In [54]:
if pipeline_mode == "training":
    # Creating the Data object for training
    data_training = Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=labels)
    print("Data Object for Training:")
    print(data_training)

    # Saving the training data
    torch.save(data_training, filename_training)
    print(f"Training data saved as {filename_training}")

elif pipeline_mode == "prediction":
    # Creating the Data object for prediction (without labels)
    data_predict = Data(x=x, edge_index=edge_index, edge_attr=edge_attr)
    print("Data Object for Prediction:")
    print(data_predict)

    # Saving the prediction data
    torch.save(data_predict, filename_prediction)
    print(f"Prediction data saved as {filename_prediction}")

Data Object for Training:
Data(x=[1079, 4], edge_index=[2, 58254], edge_attr=[58254, 1], y=[57])
Training data saved as data/torch_data_object_training/run_4.pt
