In [7]:
from kosmoss import CONFIG, PROCESSED_DATA_PATH
import os.path as osp
import shutil

In [11]:
_, used_space, _ = shutil.disk_usage(osp.join(PROCESSED_DATA_PATH, f"graphs-{CONFIG['timestep']}"))
used_space // (2**30)

187

In [11]:
psutil.disk_usage(osp.join(PROCESSED_DATA_PATH, f"graphs-{CONFIG['timestep']}"))

sdiskusage(total=1055815524352, used=201310593024, free=854488154112, percent=19.1)

In [1]:
926 // 100

9

In [2]:
926 % 100

26

In [3]:
1085440 / 6784

160.0

In [4]:
1084000 // 160

6775

In [3]:
d = {"hello": "world"}
d.pop("hello")
d

{}

In [5]:
922309 // (1085440 // 6784)

5764

In [1]:
kwargs = {
    "logger": "hi",
    "gpus": "hfdis",
    "strategy": "ddp",
    "progress_bar_refresh_rate": 0
}

In [4]:
list(kwargs.keys())

['logger', 'gpus', 'strategy', 'progress_bar_refresh_rate']

In [5]:
import pytorch_lightning as pl
pl.__version__

'1.5.10'

In [6]:
import ray
ray.__version__

'1.10.0'

In [4]:
import numpy as np
from ray import tune

h = tune.choice([2 ** k for k in np.arange(2, 6)])

In [11]:
l = [2 ** k for k in np.arange(2, 6)]

In [12]:
l

[4, 8, 16, 32]

In [15]:
l[3][4]

IndexError: invalid index to scalar variable.

In [10]:
h.sample()

32

In [77]:
import torch.nn as nn
from ray import tune


config = {
    # Fixed set of HParams
    "batch_size": 512,
    "in_feats": 20,
    "out_feats": 4,
    "act": nn.SiLU(inplace=True),

    # HPO sampled by Ray.Tune
    "hidden_feats": 4, #tune.randint(4, 20),# tune.choice([2 ** k for k in np.arange(2, 6)]),
    # "hidden_feats": tune.choice([2 ** k for k in np.arange(2, 6)]),
    "edge_dim": 4, #tune.randint(4, 20),# tune.choice([2 ** k for k in np.arange(2, 6)]),
    # "edge_dim": tune.choice([2 ** k for k in np.arange(2, 6)]),
    "num_layers": 4, #tune.randint(4, 10),
    "lr": tune.loguniform(1e-4, 1e-1),
    "dropout": tune.uniform(0, 1),
    "heads": 4, #tune.randint(4, 8)
}

In [95]:
import copy

class PatchedGAT(pyg.nn.models.basic_gnn.BasicGNN):
    def init_conv(self, 
                  in_channels: int, 
                  out_channels: int, 
                  **kwargs) -> pyg.nn.conv.MessagePassing:

        kwargs = copy.copy(kwargs)
        if 'heads' in kwargs and out_channels % kwargs['heads'] != 0:
            kwargs['heads'] = 1

        return pyg.nn.conv.GATConv(in_channels, out_channels, dropout=self.dropout, **kwargs)

In [99]:
import os.path as osp
from torch_geometric.data import Dataset
import torchmetrics.functional as F
from typing import List, Union

from kosmoss import ARTIFACTS_PATH, CONFIG, DATA_PATH, LOGS_PATH, METADATA
from kosmoss.dataproc.flows import BuildGraphsFlow



class GNNDataset(Dataset):

    def __init__(self) -> None:

        self.timestep = str(CONFIG['timestep'])
        self.params = METADATA[str(self.timestep)]['features']
        self.num_shards = self.params['num_shards']

        super().__init__(root=DATA_PATH)

    @property
    def raw_file_names(self) -> list:
        return [""]

    @property
    def processed_file_names(self) -> List[str]:
        return [osp.join(f"graphs-{self.timestep}", f"data-{shard}.pt") 
                for shard in np.arange(self.num_shards)]


    def download(self) -> None:
        raise Exception("Execute the Notebooks in this Bootcamp following the order defined by the Readme.")


    def process(self) -> None:
        BuildGraphsFlow()

    def len(self):
        return self.params['dataset_len']

    def get(self, idx):

        shard_size = self.len() // self.num_shards
        fileidx = idx // shard_size
        rowidx = idx % shard_size

        data_list = torch.load(osp.join(self.processed_dir, f"graphs-{self.timestep}", f'data-{fileidx}.pt'))
        data = data_list[rowidx]

        return data
    
ds = GNNDataset()

In [104]:
import pytorch_lightning as pl

class LitGNNDataModule(pl.LightningDataModule):
        
    
    def __init__(self, batch_size: int) -> None:
        self.bs = batch_size
        super().__init__()
        
        
    def prepare_data(self) -> None:
        pass
    
    
    def setup(self, stage: str) -> None:
        dataset = GNNDataset()#.shuffle()
        length = len(dataset)
        
        self.testds = dataset[int(length * .9):]
        self.valds = dataset[int(length * .8):int(length * .9)]
        self.trainds = dataset[:int(length * .8)]
    
    
    def train_dataloader(self) -> torch.utils.data.DataLoader:
        return pyg.loader.DataLoader(self.trainds, batch_size=self.bs, num_workers=4, shuffle=True)
    
    
    def val_dataloader(self) -> torch.utils.data.DataLoader:
        return pyg.loader.DataLoader(self.valds, batch_size=self.bs, num_workers=4)
    
    
    def test_dataloader(self) -> torch.utils.data.DataLoader:
        return pyg.loader.DataLoader(self.testds, batch_size=self.bs, num_workers=4)
    
mo = LitGNNDataModule(16)

In [100]:
net = PatchedGAT(
    in_channels=config['in_feats'], 
    out_channels=config['out_feats'],
    hidden_channels=config['hidden_feats'],
    num_layers=config['num_layers'],
    edge_dim=config['edge_dim'],
    dropout=config['dropout'],
    heads=config['heads'],
    # act=nn.SiLU(inplace=True),
    act=config['act']
)

In [82]:
m = nn.ModuleList([nn.Linear(10, 10) for i in range(10)])

In [83]:
m.append(nn.Linear(10, 10))

ModuleList(
  (0): Linear(in_features=10, out_features=10, bias=True)
  (1): Linear(in_features=10, out_features=10, bias=True)
  (2): Linear(in_features=10, out_features=10, bias=True)
  (3): Linear(in_features=10, out_features=10, bias=True)
  (4): Linear(in_features=10, out_features=10, bias=True)
  (5): Linear(in_features=10, out_features=10, bias=True)
  (6): Linear(in_features=10, out_features=10, bias=True)
  (7): Linear(in_features=10, out_features=10, bias=True)
  (8): Linear(in_features=10, out_features=10, bias=True)
  (9): Linear(in_features=10, out_features=10, bias=True)
  (10): Linear(in_features=10, out_features=10, bias=True)
)

In [85]:
import torch

t = torch.rand(10)
t

tensor([0.5408, 0.0991, 0.3162, 0.7140, 0.7784, 0.8058, 0.9549, 0.8038, 0.1840,
        0.0950])

In [90]:
x = t
for layer in m:
    x = layer(x)

In [78]:
tune.choice(l)

<ray.tune.sample.Categorical at 0x7f87b4869250>

In [1]:
tune.randint(4, 8)

NameError: name 'tune' is not defined

In [80]:
import torch_geometric as pyg

net = pyg.nn.GAT(
            in_channels=config['in_feats'], 
            out_channels=config['out_feats'],
            hidden_channels=config['hidden_feats'],
            num_layers=config['num_layers'],
            edge_dim=config['edge_dim'],
            dropout=config['dropout'],
            heads=config['heads'],
            # act=nn.SiLU(inplace=True),
            act=config['act']
        )
# net = pyg.nn.GAT(
#             in_channels=20, 
#             out_channels=4,
#             hidden_channels=32,
#             num_layers=5,
#             edge_dim=32,
#             dropout=.17,
#             heads=8,
#             act=nn.SiLU(inplace=True),
#         )

In [23]:
type(config['out_feats'])

int

In [22]:
type(config['heads'])

ray.tune.sample.Integer