# GNN

## Installation

In [8]:
! pip install torch==2.0.0+cpu torchvision==0.15.0+cpu -f https://download.pytorch.org/whl/torch_stable.html


Looking in links: https://download.pytorch.org/whl/torch_stable.html


In [9]:
! pip install pyg-lib==0.3.1 torch_scatter torch_sparse torch_cluster torch_spline_conv -f https://data.pyg.org/whl/torch-2.0.0+cpu.html

Looking in links: https://data.pyg.org/whl/torch-2.0.0+cpu.html


In [10]:
! pip install 'torch-geometric==2.4.0'



## Data

In [11]:
from blocksnet import ServiceType
from blocksnet.utils.service_types import SERVICE_TYPES

service_types = {st['name'] : ServiceType(**st) for st in SERVICE_TYPES}

In [12]:
import pandas as pd
import geopandas as gpd
import numpy as np
import torch
from tqdm import tqdm
from torch_geometric.data import Data

BATCH_SIZE = 1
MAX_ACCESSIBILITY = 60

COLUMNS = ['capacity', 'demand']
TARGET_COLUMN = 'provision'

def create_data(service_type, acc_mx, gdf):
    # Сброс индексов
    gdf = gdf.reset_index(drop=True)
    
    # Копия матрицы доступности
    acc_mx = acc_mx.copy()
    acc_mx.index = gdf.index
    acc_mx.columns = gdf.index
    
    # Нормализация данных (векторизация)
    gdf[COLUMNS] = gdf[COLUMNS] / gdf[COLUMNS].max().max()

    # Признаки узлов (X) и целевая переменная (y)
    x = gdf[COLUMNS].values  # numpy array
    y = gdf[TARGET_COLUMN].values  # numpy array

    # Формирование списка рёбер
    acc_df = acc_mx.stack().reset_index()
    acc_df.columns = ['i', 'j', 'time_min']
    acc_df = acc_df[acc_df['time_min'] <= MAX_ACCESSIBILITY]
    acc_df['weight'] = acc_df['time_min'] / service_type.accessibility

    edge_index = acc_df[['i', 'j']].to_numpy().T  # Делаем транспонированный массив (2, N)
    edge_attr = acc_df['weight'].to_numpy()  # Веса рёбер

    # Создание объекта Data
    return Data(
        x=torch.tensor(x, dtype=torch.float32),
        edge_index=torch.tensor(edge_index, dtype=torch.long),
        edge_attr=torch.tensor(edge_attr, dtype=torch.float32),
        y=torch.tensor(y, dtype=torch.float32),
    )

acc_mx = pd.read_pickle('acc_mx.pickle')
gdf = pd.read_parquet('provision.parquet')
service_type = service_types['school']

data = create_data(service_type, acc_mx, gdf)

# datas = []

# for folder in tqdm([0,1,2,3]):
#     for service_type_name in ['school']:
#         service_type = service_types[service_type_name]
#         acc_mx = pd.read_pickle(f'./data/gnn/{folder}/adj_mx.pickle')
#         try:
#             gdf = gpd.read_parquet(f'./data/gnn/{folder}/{service_type.name}.parquet')
#             datas.append(create_data(service_type, acc_mx, gdf))
#         except:
#             continue

In [13]:
from torch_geometric.loader import NeighborLoader

# Настройки сэмплинга соседей
train_loader = NeighborLoader(
    data,  # Передаём один большой граф
    num_neighbors=[10, 10],  # Количество соседей на каждом слое (2-hop GAT)
    batch_size=512,  # Количество узлов в батче
    shuffle=True,
    num_workers=4  # Ускоряем загрузку
)

# Отображаем структуру батча
batch = next(iter(train_loader))
print(batch)  # Это уже подграф!

Data(x=[14880, 2], edge_index=[2, 47200], edge_attr=[47200], y=[14880], n_id=[14880], e_id=[47200], num_sampled_nodes=[3], num_sampled_edges=[2], input_id=[512], batch_size=512)


In [14]:
torch.save(train_loader, 'train_loader.pth')