<a href="https://colab.research.google.com/github/harishk30/CamelsHetroGNN/blob/main/AstridLHTest.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
pip install torch_geometric



In [3]:
import numpy as np
import h5py

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
catalog = '/content/drive/MyDrive/groups_090.hdf5'
f = h5py.File(catalog, 'r')

In [6]:
M_star = f['Subhalo/SubhaloMassType'][:,4]*1e10
pos  = f['Subhalo/SubhaloPos'][:]/1e3
vel = f['Subhalo/SubhaloVel'][:]
met = f['Subhalo/SubhaloStarMetallicity'][:]

In [7]:
def load_simulation_data(file):
  with h5py.File(file, 'r') as f:
    positions = f['Subhalo/SubhaloPos'][:]/1e3
    vel = f['Subhalo/SubhaloVel'][:]
    met = f['Subhalo/SubhaloStarMetallicity'][:]
    mass = f['Subhalo/SubhaloMassType'][:,4]*1e10
  return positions, vel, met, mass

In [8]:
def apply_periodic_boundary_conditions(positions, box_size):
    # Wrap positions to the box size
    positions = positions % box_size
    return positions

In [9]:
def minimum_image_distance(pos1, pos2, box_size):
    # Calculate the minimum image distance between two points
    delta = np.abs(pos1 - pos2)
    delta = np.where(delta > 0.5 * box_size, box_size - delta, delta)
    return np.sqrt((delta ** 2).sum(axis=-1))

In [10]:
from scipy.spatial import KDTree

def distance(point1, point2):
    return np.linalg.norm(point1 - point2)

def create_edges_knn(points, k=6):
    edges = []
    edge_value = []

    # Create a KDTree for efficient nearest neighbor search
    point_tree = KDTree(points)

    for i in range(len(points)):
        # Query the k nearest neighbors for each point
        _, neighbors = point_tree.query(points[i], k=k+1)

        for j in neighbors[1:]:  # Skip the first neighbor because it's the point itself
            # Add an edge between the point and its neighbor
            edges.append([i, j])

            # Compute the distance between the points as the edge value
            edge_value.append(distance(points[i], points[j]))

    return [edges, edge_value]

In [11]:
from tqdm import tqdm
def min_distance(positions, box_size = 25):
    min_distance = np.inf
    max_distance = 0

    # Iterate over all pairs of galaxies
    for i in tqdm(range(len(positions))):
        for j in range(i + 1, len(positions)):
            dist = minimum_image_distance(positions[i], positions[j], box_size)
            if dist < min_distance:
                min_distance = dist
            if dist > max_distance:
                max_distance = dist

    # Print the results
    print(f"Minimum distance: {min_distance} Mpc/h")
    print(f"Maximum distance: {max_distance} Mpc/h")

In [12]:
def minimum_image_distance_vectorized(positions, box_size = 25):
    num_galaxies = positions.shape[0]

    # Compute pairwise differences in each dimension
    diff = positions[:, np.newaxis, :] - positions[np.newaxis, :, :]

    # Apply periodic boundary conditions
    diff = np.abs(diff)
    diff = np.where(diff > 0.5 * box_size, box_size - diff, diff)

    # Compute the Euclidean distance
    dist = np.sqrt(np.sum(diff ** 2, axis=-1))

    return dist

In [13]:
from tqdm import tqdm
from scipy.spatial import cKDTree
def create_edges_knn_pbc(points, box_size = 25, k=6):
    tree = KDTree(points, boxsize=box_size)

    edges = []
    edge_values = []
    '''
    distances = minimum_image_distance_vectorized(points, box_size)
    # Mask the diagonal (self-distances which are zero)
    np.fill_diagonal(distances, np.inf)
    # Get the minimum and maximum distances
    min_distance = np.min(distances)
    max_distance = np.max(np.triu(distances, k=1))
    print(min_distance, max_distance)
    '''

    min_distance = np.inf
    max_distance = 0
    large_distance_count = 0

    for i in tqdm(range(len(points)), desc="Processing points"):
        distances, neighbors = tree.query(points[i], k=k+1)
        for j, tree_dist in zip(neighbors[1:], distances[1:]):
            if j != i and j < len(points):
                actual_distance = minimum_image_distance(points[i], points[j], box_size)
                edges.append([i, j])
                edge_values.append(actual_distance)
                min_distance = min(min_distance, actual_distance)
                max_distance = max(max_distance, actual_distance)
    print(min_distance, max_distance)
    return np.array(edges), np.array(edge_values)


In [14]:
def create_points(positions, masses, vel, met):
    point_features = []
    for i, pos in enumerate(positions):
        point_features.append(list(pos) + list(vel[i]) + [masses[i]] + [met[i]])
    return point_features

In [15]:
from torch_geometric.data import Data
import torch
def create_graph(file_path, k_val=6):
    positions, velocity, metallicities, masses = load_simulation_data(file_path)
    edges, edge_values = create_edges_knn_pbc(positions, 25, k_val)
    point_values = create_points(positions, masses, velocity, metallicities)

    edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
    point_values = torch.tensor(point_values, dtype=torch.float)
    edge_value = torch.tensor(edge_values, dtype=torch.float)

    return [point_values, edge_index, edge_value]

In [16]:
def turn_data(graph):
    graph_data = Data(x=graph[0], edge_index=graph[1], edge_attr=graph[2])
    return graph_data

In [None]:
graph = create_graph('/content/drive/MyDrive/groups_090.hdf5', 15000)

Processing points:  31%|███▏      | 6305/20082 [22:09<45:32,  5.04it/s]

In [1]:
turn_data(graph)

NameError: name 'turn_data' is not defined