In [1]:
import torch
print(torch.cuda.is_available())  # should be True
print(torch.cuda.get_device_name(0))  # should print your GPU


True
NVIDIA GeForce RTX 3050


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [3]:
import os
import joblib
import numpy as np
import networkx as nx
from glob import glob
from natsort import natsorted
from scipy.spatial import Delaunay
from skimage.measure import  label as l 
from skimage.measure import  regionprops 
import torch
from torch_geometric.data import Data
from sklearn.preprocessing import StandardScaler


# --- Compute Morphological Features ---
def compute_morph(contour, box):
    x1, y1, x2, y2 = map(int, box)
    pts = contour - np.array([x1, y1])
    h, w = y2 - y1 + 1, x2 - x1 + 1
    mask = np.zeros((h, w), dtype=np.uint8)
    rr = pts[:, 1].astype(int)
    cc = pts[:, 0].astype(int)
    mask[rr, cc] = 1
    lbl = l(mask)
    props = regionprops(lbl)
    if not props:
        return {'area': 0, 'perimeter': 0, 'eccentricity': 0,
                'solidity': 0, 'circularity': 0}
    r = props[0]
    return {
        'area': r.area,
        'perimeter': r.perimeter,
        'eccentricity': r.eccentricity,
        'solidity': r.solidity,
        'circularity': 4 * np.pi * r.area / (r.perimeter**2 + 1e-6)
    }


In [4]:

# --- Generate NetworkX Graph from .dat ---
def generate_graph_from_dat(dat_path):
    data = joblib.load(dat_path)
    nodes = []
    # print(type(data))
    for idx, nucleus in data.items(): 
        # print(idx)# Iterate over list indices
        contour = np.array(nucleus['contour'])
        centroid = tuple(nucleus['centroid'])
        ntype = nucleus['type']
        x_min, y_min = contour.min(axis=0)
        x_max, y_max = contour.max(axis=0)
        box = (x_min, y_min, x_max, y_max)
        morph = compute_morph(contour, box)
        nodes.append({'id': idx,  # Use index as node ID
                      'centroid': centroid,
                      'type': ntype,
                      **morph})

    G = nx.Graph()
    for node in nodes:
        G.add_node(node['id'],
                   x=node['centroid'][0],
                   y=node['centroid'][1],
                   type=node['type'],
                   area=node['area'],
                   perimeter=node['perimeter'],
                   eccentricity=node['eccentricity'],
                   solidity=node['solidity'],
                   circularity=node['circularity'])
    points = np.array([n['centroid'] for n in nodes])
    if len(points) >= 3:
        tri = Delaunay(points)
        edges = set()
        for simplex in tri.simplices:
            for i in range(3):
                u, v = sorted([simplex[i], simplex[(i+1) % 3]])
                edges.add((u, v))
                for u_idx, v_idx in edges:
                    p1, p2 = points[u_idx], points[v_idx]

                    # Inverse Euclidean distance
                    dist = np.linalg.norm(p1 - p2)
                    if dist > 100: continue
                    inv_dist = 1.0 / (dist + 1e-6)

                    # Morphological feature vectors for cosine similarity
                    morph_keys = ['area', 'perimeter', 'eccentricity', 'solidity', 'circularity']
                    vec_u = np.array([nodes[u_idx][k] for k in morph_keys])
                    vec_v = np.array([nodes[v_idx][k] for k in morph_keys])

                    # Cosine similarity
                    cos_sim = np.dot(vec_u, vec_v) / (np.linalg.norm(vec_u) * np.linalg.norm(vec_v) + 1e-6)
                    norm_cos_sim = (cos_sim + 1) / 2  # Normalize to [0, 1]

                    # Type-aware scaling
                    type_u, type_v = nodes[u_idx]['type'], nodes[v_idx]['type']
                    type_factor = 1.0 if type_u == type_v else 0.5

                    # Final weight (scalar)
                    weight = inv_dist * norm_cos_sim * type_factor

                    # Add edge with full attribute vector
                    G.add_edge(nodes[u_idx]['id'], nodes[v_idx]['id'],
                            weight=weight,
                            inv_dist=inv_dist,
                            cos_sim=norm_cos_sim,
                            type_factor=type_factor)

    return G


In [5]:
# --- Convert NetworkX Graph to PyG Data ---
def convert_nx_to_pyg(G, g_label):
    # Convert node label to integers
    G = nx.convert_node_labels_to_integers(G)
    
    # Extract node features
    node_features = []
    feature_names = ['x', 'y', 'type', 'area', 'perimeter', 'eccentricity', 'solidity', 'circularity']

    for node, data in G.nodes(data=True):
        features = []
        for f in feature_names:
            value = data.get(f, 0)
            try:
                features.append(float(value))
            except (ValueError, TypeError):
                print(f"Warning: Invalid value for {f} in node {node}: {value}. Using 0.")
                features.append(0.0)
        node_features.append(features)

    # Convert to numpy array
    node_features_np = np.array(node_features)

    # Normalize only morphological features (area, perimeter, ecc, solidity, circularity)
    scaler = StandardScaler()
    morph_features = node_features_np[:, 3:]  # columns 3 to 7
    morph_normalized = scaler.fit_transform(morph_features)

    # Combine x, y, type (unnormalized) + normalized morph features
    node_features_combined = np.hstack([
        node_features_np[:, :3],    # x, y, type
        morph_normalized            # normalized morph features
    ])

    # Final node feature tensor
    x = torch.tensor(node_features_combined, dtype=torch.float).to(device)
    if x.dim() != 2 or x.size(1) != len(feature_names):
        raise ValueError(f"Expected x to be 2D with shape [num_nodes, {len(feature_names)}], got shape {x.shape}")

    # Extract edge indices and multi-dimensional edge attributes
    edge_index = []
    edge_attr = []

    for u, v, data in G.edges(data=True):
        edge_index.append([u, v])

        # Extract all components of edge features
        inv_dist = float(data.get('inv_dist', 1.0))
        cos_sim = float(data.get('cos_sim', 1.0))
        type_factor = float(data.get('type_factor', 1.0))

        edge_attr.append([inv_dist, cos_sim, type_factor])

    edge_index = torch.tensor(edge_index, dtype=torch.long).T.contiguous().to(device)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float).to(device)

    
    # Print edge feature diagnostics
    print(f"Edge index shape: {edge_index.shape}")
    print(f"Edge attr shape: {edge_attr.shape}")
    print("Edge attr stats:\n  inv_dist → min {:.4f}, max {:.4f}\n  cos_sim → min {:.4f}, max {:.4f}\n  type_factor → unique {}".format(
        edge_attr[:, 0].min().item(), edge_attr[:, 0].max().item(),
        edge_attr[:, 1].min().item(), edge_attr[:, 1].max().item(),
        torch.unique(edge_attr[:, 2]).tolist()
    ))

    # Create PyG Data object
    data = Data(
        x=x,
        edge_index=edge_index,
        edge_attr=edge_attr,
        y=torch.tensor([g_label], dtype=torch.long).to(device),
        original_node_indices=torch.arange(G.number_of_nodes(), dtype=torch.long).to(device)
    )
    print(data.x)
    # Add individual node features as separate fields
    # for i, feature_name in enumerate(feature_names):
    #     data[feature_name] = x[:, i]
    print(data.x)
    print(f"x tensor shape: {data.x.shape}")
    return data


In [6]:
ogg = generate_graph_from_dat('n_detected_pannuke/Invasive/1.dat')
new_graph = convert_nx_to_pyg(ogg, 3)

Edge index shape: torch.Size([2, 4155])
Edge attr shape: torch.Size([4155, 3])
Edge attr stats:
  inv_dist → min 0.0100, max 0.1169
  cos_sim → min 0.5067, max 1.0000
  type_factor → unique [0.5, 1.0]
tensor([[ 8.6904e+02,  2.6572e+01,  5.0000e+00,  ...,  5.9455e-01,
          9.8080e-01,  1.5266e+00],
        [ 4.1511e+02,  2.5697e+01,  3.0000e+00,  ...,  5.9455e-01,
          9.8080e-01,  1.5266e+00],
        [ 6.2268e+01,  3.0866e+01,  1.0000e+00,  ...,  5.7040e-01,
         -1.0249e+00, -8.4779e-01],
        ...,
        [ 1.8729e+03,  1.0926e+03,  1.0000e+00,  ...,  5.3539e-01,
         -2.0582e+00, -8.4779e-01],
        [ 1.9970e+03,  1.1034e+03,  1.0000e+00,  ...,  5.2812e-01,
         -3.5634e-01, -8.4778e-01],
        [ 1.8482e+03,  1.1113e+03,  1.0000e+00,  ...,  4.9510e-01,
         -8.8909e-02, -8.4779e-01]], device='cuda:0')
tensor([[ 8.6904e+02,  2.6572e+01,  5.0000e+00,  ...,  5.9455e-01,
          9.8080e-01,  1.5266e+00],
        [ 4.1511e+02,  2.5697e+01,  3.0000e+00,

In [9]:

# --- Generate and Save Graphs as .pt ---
def generate_and_save_graphs(dat_folder, graph_folder, g_label=0):
    os.makedirs(graph_folder, exist_ok=True)
    dats = natsorted(glob(os.path.join(dat_folder, "*.dat")))
    
    for dat_path in dats[:-2]:
        # Generate NetworkX graph
        
        G = generate_graph_from_dat(dat_path)
        print(G)
        # Convert to PyG Data object
        graph = convert_nx_to_pyg(G, g_label)
        
        # Save full graph as .pt
        base = os.path.splitext(os.path.basename(dat_path))[0]
        graph_path = os.path.join(graph_folder, f"{base}.pt")
        torch.save(graph, graph_path)
        print(f"Saved full graph to {graph_path}")
    
    print(f"Generated {len(dats)} graphs in {graph_folder}")



In [None]:
# import joblib

# # filemap_path = './n_detected_pannuke/Invasive/file_map.dat'
# filemap = joblib.load(filemap_path)

In [None]:
# for img in filemap: 
#     print(img)

In [None]:


# if __name__ == "__main__":
#     # Test visualization on a sample of images
#     subtype = 'Invasive'
#     print(f"Testing visualization for subtype: {subtype}")
    # generate_and_visualize_graphs(
    #     image_folder=f"./dataset/data/Photos/{subtype}/",
    #     dat_folder=f"./n_detected_pannuke/{subtype}/",
    #     graph_folder=f"./graphs_new_pannuke_visual/{subtype}/",
#         num_images=3
#     )
#     subtype = 'Normal'
#     print(f"Testing visualization for subtype: {subtype}")
#     generate_and_visualize_graphs(
#         image_folder=f"./dataset/data/Photos/{subtype}/",
#         dat_folder=f"./n_detected_pannuke/{subtype}/",
#         graph_folder=f"./graph_new/_pannuke_visual/{subtype}/",
#         num_images=3
#     )
#     subtype = 'InSitu'
#     print(f"Testing visualization for subtype: {subtype}")
#     generate_and_visualize_graphs(
#         image_folder=f"./dataset/data/Photos/{subtype}/",
#         dat_folder=f"./n_detected_pannuke/{subtype}/",
#         graph_folder=f"./graphs_new_pannuke_visual/{subtype}/",
#         num_images=3
#     )
#     subtype = 'Benign'
#     print(f"Testing visualization for subtype: {subtype}")
#     generate_and_visualize_graphs(
#         image_folder=f"./dataset/data/Photos/{subtype}/",
#         dat_folder=f"./n_detected_pannuke/{subtype}/",
#         graph_folder=f"./graphs_new_pannuke_visual/{subtype}/",
#         num_images=3
#     )


In [None]:
subtypes = {
    'Invasive': 3,
    'Benign': 1,
    'InSitu': 2,
    'Normal': 0
}

for fname, label in subtypes.items():
        print(f"Saving all graphs for subtype: {fname}")
        print(label)
        generate_and_save_graphs(
            dat_folder=f"./n_detected_pannuke/{fname}/",
            graph_folder=f"./graphs_new_pannuke/{fname}/",
            g_label=label
        )


Saving all graphs for subtype: Invasive
3
Graph with 787 nodes and 1896 edges
Edge index shape: torch.Size([2, 1896])
Edge attr shape: torch.Size([1896, 3])
Edge attr stats:
  inv_dist → min 0.0100, max 0.0999
  cos_sim → min 0.5139, max 1.0000
  type_factor → unique [0.5, 1.0]
tensor([[ 4.4771e+02,  2.5431e+01,  4.0000e+00,  ...,  5.5658e-01,
          1.0158e+00,  1.5076e+00],
        [ 7.0728e+02,  2.9043e+01,  2.0000e+00,  ...,  5.5658e-01,
          1.0158e+00,  1.5076e+00],
        [ 7.5144e+02,  3.2002e+01,  1.0000e+00,  ...,  4.8704e-01,
         -3.6008e-01, -8.3030e-01],
        ...,
        [ 1.9329e+03,  1.0469e+03,  3.0000e+00,  ...,  4.9010e-01,
         -1.2774e+00, -8.3030e-01],
        [ 1.8767e+03,  1.0782e+03,  4.0000e+00,  ...,  4.8006e-01,
         -2.0949e+00, -8.3030e-01],
        [ 1.9034e+03,  1.0892e+03,  4.0000e+00,  ...,  7.0956e-02,
         -3.6008e-01, -8.3030e-01]], device='cuda:0')
tensor([[ 4.4771e+02,  2.5431e+01,  4.0000e+00,  ...,  5.5658e-01,
     

In [None]:
import pandas as pd
import torch

# Load metadata
df = pd.read_csv('metadata.csv')  # Assumes columns: graph_path, label

# Iterate over each graph path and load the graph
for i, row in df.iterrows():
    graph_path = row['graph_path']
    label = row['label']

    # Load the graph object
    graph = torch.load(graph_path)

    # Check if node features are not of dimension 3
    try:
        if graph.x is None or graph.x.shape[1] != 8:
            print(f"\n--- Graph {i} ---")
            print(graph)
            print("Node feature shape:", graph.x.shape if graph.x is not None else "None")
            print("Edge index shape:", graph.edge_index.shape)
            print("Edge attr shape:", graph.edge_attr.shape if graph.edge_attr is not None else "None")
            print("Label:", label)
    except: 
        print(f"\n--- Graph {i} ---")
        print(graph_path)
        print("Node feature shape:", graph.x.shape if graph.x is not None else "None")
        print("Edge index shape:", graph.edge_index.shape)
        print("Edge attr shape:", graph.edge_attr.shape if graph.edge_attr is not None else "None")
        print("Label:", label)


In [None]:
import torch
import numpy as np
graph_path = "graphs_new_pannuke_edgeAtr/Invasive/3.pt" # example graph 
graph = torch.load(graph_path)
num_nodes = graph.num_nodes
# print(graph.x)
tensor_2d = torch.zeros((num_nodes, num_nodes))
for i in range(len(graph.x)):
    for j in range(len(graph.x)):
        tensor_2d[i][j] = np.linalg.norm(i - j)
        
# print(num_nodes)
print(tensor_2d)


In [None]:
# import torch
# import networkx as nx
# import pandas as pd
# from torch_geometric.data import Data

# def test_convert_nx_to_pyg(G, g_label, feature_names=None, max_nodes=100000):
#     """
#     Test and visualize the PyG Data object converted from NetworkX graph.
    
#     Args:
#         G: NetworkX graph
#         g_label: int, label for the graph
#         feature_names: list of node feature names (optional)
#         max_nodes: max nodes to visualize
    
#     Returns:
#         PyTorch Geometric Data object
#     """
#     print("🔄 Converting NetworkX → PyG...")
#     pyg_graph = convert_nx_to_pyg(G, g_label)

#     print("\n✅ Graph Structure:")
#     print(pyg_graph)

#     print("\n📐 Shapes:")
#     print(f"Node features shape: {pyg_graph.x.shape}")
#     print(f"Edge index shape: {pyg_graph.edge_index.shape}")
#     print(f"Edge attribute shape: {pyg_graph.edge_attr.shape}")
#     print(f"Graph label: {g_label}")

#     if feature_names is None:
#         feature_names = ['x', 'y', 'type', 'area', 'perimeter', 'eccentricity', 'solidity', 'circularity']

#     # 🧪 Show first 5 node features
#     print("\n🔬 First 5 node features:")
#     df_nodes = pd.DataFrame(pyg_graph.x.numpy(), columns=feature_names)
#     print(df_nodes.head())

#     # 🧪 Show first 5 edge features
#     if pyg_graph.edge_attr is not None:
#         edges = pyg_graph.edge_index.T.numpy()
#         edge_features = pyg_graph.edge_attr.numpy()
#         df_edges = pd.DataFrame(edges, columns=['src', 'dst'])
#         df_edges[['inv_dist', 'cos_sim', 'type_factor']] = edge_features
#         print("\n🔗 First 5 edge features:")
#         print(df_edges.head())

#     # 🎨 Visualize the graph
#     print("\n🎨 Visualizing Graph...")
#     visualize_pyg_graph(pyg_graph, max_nodes=max_nodes)

#     return pyg_graph


In [None]:
# f