#### **1. Import Libraries**

In [2]:
# Import libraries
import numpy as np
import pandas as pd 
import math
import os 

# Pytorch family
import torch
from torch.nn import functional as F
# Padding sequence
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader
# Pytorch lightning for training
import pytorch_lightning as pl

# Progress tracking 
from tqdm import tqdm
# Hugging face datasets library for processing data
from datasets import Dataset
# Python std library provides a decorator for making classes
from dataclasses import dataclass
# Pathlib deals with file path
from pathlib import Path
# Typing defines data path
from typing import Dict, Optional, List, Union, Tuple

# Encapsulate the outputs of the model
from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions
# Handling chunking during the forward pass of a model
from transformers.pytorch_utils import apply_chunking_to_forward
# Transformer activations
from transformers.activations import ACT2FN
# Metrics
import torchmetrics as tm

In [3]:
# Define constants for the project
# Number of node operations
NODE_OP_CODES  = 120
# Number of node features
NODE_FEATS = 140
# Number of config features
CONFIG_FEATS = 24
# Number of combined node and configuration features
NODE_CONFIG_FEATS = 18

#### **2. Generating Tile DataFrame**

In [18]:
# Data path
DATA_DIR = '/kaggle/input/tpugraphs/npz'

In [25]:
def generate_tile_df() -> pd.DataFrame:
    # Create a list of file paths
    file_paths = []
    for elem in (Path(DATA_DIR) / 'tile').rglob("*"):
        if elem.is_file():
            file_paths.append(str(elem))

    # Create a DataFrame with a single column "paths" containing the file paths
    df = pd.DataFrame({'paths': file_paths})

    # Add new columns to the DataFrame
    df['split'] = df['paths'].apply(lambda x: Path(x).parent.name)
    df['configuration'] = df['paths'].apply(lambda x: Path(x).parent.parent.name).astype(str)
    df['extra'] = df['paths'].apply(lambda x: Path(x).parent.parent.parent.name).astype(str)
    df['model_name'] = df['paths'].apply(lambda x: Path(x).stem)
    df['collection'] = df['extra'] + ':' + df['configuration']
    df['ID'] = df['collection'] + ':' + df['model_name']

    return df

In [26]:
# Generate the tile DataFrame
tile_df = generate_tile_df()

# Display 
tile_df.head()

Unnamed: 0,paths,split,configuration,extra,model_name,collection,ID
0,/kaggle/input/tpugraphs/npz/tile/xla/valid/res...,valid,xla,tile,resnet_v1_50_official_batch_128_bf16_2bea628b7...,tile:xla,tile:xla:resnet_v1_50_official_batch_128_bf16_...
1,/kaggle/input/tpugraphs/npz/tile/xla/valid/inc...,valid,xla,tile,inception_v3_batch_128_train_40fa8f86f121f00a,tile:xla,tile:xla:inception_v3_batch_128_train_40fa8f86...
2,/kaggle/input/tpugraphs/npz/tile/xla/valid/inc...,valid,xla,tile,inception_v3_batch_128_train_-23e94c034a65a177,tile:xla,tile:xla:inception_v3_batch_128_train_-23e94c0...
3,/kaggle/input/tpugraphs/npz/tile/xla/valid/inc...,valid,xla,tile,inception_v3_batch_128_train_171f4371caf28639,tile:xla,tile:xla:inception_v3_batch_128_train_171f4371...
4,/kaggle/input/tpugraphs/npz/tile/xla/valid/mlp...,valid,xla,tile,mlperf_bert_batch_24_2x2_-25e30862c042a2b8,tile:xla,tile:xla:mlperf_bert_batch_24_2x2_-25e30862c04...


#### **3. Define functions**

In [46]:
def edges_adjacency(edges: torch.Tensor, add_diagonal = True) -> torch.tensor:
    """Create the adjacency matrix of edges"""
    
    adj_matrix = torch.zeros((edges.max() + 1, edges.max() + 1))
    adj_matrix[edges[:, 0], edges[:, 1]] = 1
    if add_diagonal:
        diag_idx = torch.arange(adj_matrix.shape[0])
        adj_matrix[diag_idx, diag_idx] = 1
    return adj_matrix

def tile_loader(path: str) -> dict:
    """Load data from a numpy file, convert to PyTorch tensors, and create an adjacency matrix."""
    tile_dict = dict(np.load(path))
    tile_dict = {k: torch.from_numpy(v) for k, v in np.load(path).item()}
    # add adjacency matrix to dictionary
    tile_dict['edges_adjacency'] = edges_adjacency(tile_dict[edge_index])
    return tile_dict

In [50]:
class TileDataset(torch.utils.data.Dataset):
    def __init__(self, df:pd.DataFrame, add_cls_token = True, num_configs = 10, max_configs = None):
        self.df = df
        self.add_cls_token = add_cls_token
        self.num_configs = num_configs
        self.max_configs = max_configs
        
    def __len__(self) -> int:
        return len(self.df)
    
    def select_configs(self, total_configs: int):
        if self.max_configs != None:
            total_configs = min(total_configs, self.max_configs)
        if self.num_configs == -1:
            return np.arange(total_configs)
        if total_configs < self.num_configs:
            return np.random.choice(total_configs, self.num_configs, replace = True)
        return np.random.choice(total_configs, self.num_configs, replace=False)
    
    def __getitem__(self, idx:int. selected_configs: List[int] = None):
        tile_dict = tile_loader(self.df.paths[idx])
        if selected_configs == None:
            selected_configs = self.select_configs(tile_dict['config_feature'].shape[0])
        tile_dict['node_config_feat'] = tile

SyntaxError: invalid syntax (3230492850.py, line 20)

In [49]:
# Example data
data = {
    "node_features": np.random.rand(5, 3),
    "edge_index": np.array([[0, 1], [1, 2], [2, 0], [3, 4], [4, 3]])
}

# Save the data as a numpy file
np.save("data.npy", data)

# Load the data with the tile_loader function
loaded_data = tile_loader("data.npy")

# Print the loaded data
for key, value in loaded_data.items():
    print(f"{key}:\n{value}\n")

IsADirectoryError: [Errno 21] Is a directory: '/kaggle/input/tpugraphs/npz'

In [36]:
edges = torch.tensor([[0, 1], [1, 2], [2, 0], [3, 4], [4, 3]])
x = edges_adjacency(edges)

In [44]:
print(x)
print(edges)
print(edges.max())
print(edges[:, 0])
print(edges[:, 1])

tensor([[1., 1., 0., 0., 0.],
        [0., 1., 1., 0., 0.],
        [1., 0., 1., 0., 0.],
        [0., 0., 0., 1., 1.],
        [0., 0., 0., 1., 1.]])
tensor([[0, 1],
        [1, 2],
        [2, 0],
        [3, 4],
        [4, 3]])
tensor(4)
tensor([0, 1, 2, 3, 4])
tensor([1, 2, 0, 4, 3])
