In [123]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from torch.utils.data import Subset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import sys
sys.path.insert(0, '/pbabkin/nas-for-moe/code')
import nas_moe.vae
import nas_moe.dataset
import nas_moe.surrogate
import nas_moe.single_arch
import nas_moe.graph
import os
from torch.utils.data import DataLoader
from torch_geometric.data import Batch
from pathlib import Path
import torch.nn as nn
from torch.utils.data import DataLoader
from torch_geometric.data import Batch
from tqdm import tqdm
from nni.nas.hub.pytorch import DARTS as DartsSpace
from torch_geometric.utils import dense_to_sparse

In [124]:
train_dataset = torchvision.datasets.CIFAR10(
    root='/pbabkin/nas-for-moe/code/data',
    train=True,
    download=True,
    transform=transforms.ToTensor()
)

split_valid = int(10000)
num_samples = len(train_dataset)
indices = list(range(num_samples))
np.random.seed(42)
np.random.shuffle(indices)
valid_subset = Subset(train_dataset, indices[split_valid:])
valid_labels = [label for im, label in valid_subset]

Files already downloaded and verified


In [125]:
LATENT_DIM = 64
latent_space = nas_moe.vae.LatentSpace(LATENT_DIM)

In [126]:
transform = transforms.Compose([transforms.ToTensor()])
train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

Files already downloaded and verified


In [127]:
latent_space.train_vae(train_loader, epochs=2)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:07<00:00,  3.98s/it]

Epoch [2/2] Loss: 81.1543





In [128]:
# Подготовка данных
images = []
labels = []

for img, label in valid_subset:
    if True:
        z = latent_space.encode_image(img)  # Получаем латентный вектор (тензор)
        images.append(z.cpu().numpy())  # Переводим в numpy
    else:
        img_array = (img.permute(1, 2, 0).numpy() * 255).astype(np.uint8)
        img_flat = img_array.flatten()
        images.append(img_flat)

    labels.append(label)

X = np.array(images)
y = np.array(labels)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print(f"Форма данных: {X.shape}")
# 3072 - просто

Форма данных: (40000, 64)


In [129]:
kmeans = KMeans(n_clusters=10, random_state=42, n_init=10, verbose=0)
kmeans.fit(X_scaled)

clusters = kmeans.labels_

print(f"Количество уникальных кластеров: {len(np.unique(clusters))}")

Количество уникальных кластеров: 10


In [132]:
base_path = Path('/pbabkin/nas-for-moe/code/data/CIFAR10_div/CIFAR10_div/architectures')
model_dicts_paths = [base_path / p for p in os.listdir(base_path)]
dataset_cluster_acc = nas_moe.dataset.ArchClusterACCDataset(
    model_dicts_paths, 
    clusters,
    valid_labels
)

In [133]:
dataset_cluster_acc[1]

Data(x=[21, 8], edge_index=[2, 29], y=[1, 10], index=1)

In [134]:
surr = nas_moe.surrogate.GAT(8, 10)

In [135]:
def collate_graphs(batch):
    """
    Объединяет список Data объектов в один Batch
    """
    return Batch.from_data_list(batch)

train_loader = DataLoader(
    dataset_cluster_acc,
    batch_size=32,
    shuffle=True,
    collate_fn=collate_graphs
)


In [136]:
device = "cuda" if torch.cuda.is_available() else "cpu"
surr.to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(surr.parameters(), lr=0.001)

for epoch in tqdm(range(50)):
    surr.train()
    for batch in train_loader:
        batch = batch.to(device)

        output = surr(batch.x, batch.edge_index, batch.batch)

        loss = criterion(output, batch.y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [07:56<00:00,  9.54s/it]

Epoch 50, Loss: 0.0004





# Inference

In [137]:
model_space = DartsSpace(
    width=16,
    num_cells=8,
    dataset='cifar'
)

In [138]:
arch_dicts = []
archGenerator = nas_moe.single_arch.ArchitectureGenerator(model_space, 8, 3)
K = 10000
arch_dicts += [archGenerator.generate_arch()['architecture'] for _ in range(K)]

In [139]:
surr.cpu()

def inference_surr(arch):
    graph = nas_moe.graph.Graph(arch, index=0)
    adj, _, features = graph.get_adjacency_matrix()
    
    features = torch.tensor(features, dtype=torch.float)
    adj = torch.tensor(adj, dtype=torch.float)
    
    edge_index, _ = dense_to_sparse(adj)
    
    batch = torch.zeros(features.size(0), dtype=torch.long)
    
    with torch.no_grad():
        output = surr(features, edge_index, batch)
    return output.squeeze(0)

In [140]:
cluster_accs = [inference_surr(arch) for arch in tqdm(arch_dicts)]

100%|███████████████████████████████████████████████████████████████████████████████████████████████| 10000/10000 [01:32<00:00, 107.84it/s]


In [144]:
stacked = torch.stack(cluster_accs)  # shape (5, 10)

# get top 2 values and their indices along dim=0 (for each of the 10 columns)
top2_values, top2_indices = torch.topk(stacked, k=3, dim=0)

print("Top 2 values:", top2_values)       # shape (2, 10)
print("Top 2 indices:", top2_indices)     # shape (2, 10)

# Second maximum indices (index of the second highest value per column)
second_max_indices = top2_indices[2]

print("Second max indices:", second_max_indices)

Top 2 values: tensor([[0.8747, 0.7712, 0.8515, 0.8066, 0.7858, 0.8335, 0.8673, 0.8084, 0.7968,
         0.8312],
        [0.8616, 0.7651, 0.8497, 0.7981, 0.7820, 0.8244, 0.8614, 0.8020, 0.7837,
         0.8289],
        [0.8587, 0.7637, 0.8474, 0.7958, 0.7799, 0.8235, 0.8573, 0.8006, 0.7829,
         0.8287]])
Top 2 indices: tensor([[5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844],
        [6621, 3956, 6621, 1072, 3185, 6336, 6336, 1463, 1463, 1463],
        [1521, 1990, 6336, 1521, 1463, 4668, 3185, 4668, 6336, 1521]])
Second max indices: tensor([1521, 1990, 6336, 1521, 1463, 4668, 3185, 4668, 6336, 1521])


In [141]:
stacked = torch.stack(cluster_accs)  # shape (5, 10)
max_indices = torch.argmax(stacked, dim=0)  # shape (10,)
print("Max indices:", max_indices)

Max indices: tensor([5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844])


In [142]:
torch.max(stacked, dim=0)

torch.return_types.max(
values=tensor([0.8747, 0.7712, 0.8515, 0.8066, 0.7858, 0.8335, 0.8673, 0.8084, 0.7968,
        0.8312]),
indices=tensor([5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844, 5844]))

In [1]:
import numpy as np
import torch
import torchvision
import torchvision.transforms as transforms
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from torch.utils.data import Subset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import sys
sys.path.insert(0, '/pbabkin/nas-for-moe/code')
from pathlib import Path

In [2]:
import nas_moe.graph
import os
import json

In [3]:
folder_path = Path('./data/CIFAR10_acc/trained_models_archs_1')
json_paths = [Path(x) for x in os.listdir(folder_path)]

In [4]:
valid_predictions_list = []
valid_acc_list = []
architecture_list = []

for json_path in json_paths:
    with open(folder_path / json_path, 'r') as file:
        data = json.load(file)

    valid_predictions_list += [data['valid_predictions']]
    valid_acc_list += [data['valid_accuracy']]
    architecture_list.append(data['architecture'])
        
    file_path = Path("data.json")

In [10]:
graph = nas_moe.graph.Graph(architecture_list[0])

In [12]:
adj_matrix, operations, one_hot_ops = graph.get_adjacency_matrix()

In [17]:
adj_matrix.shape

(21, 21)

In [19]:
import nas_moe.dataset

  from .autonotebook import tqdm as notebook_tqdm


In [20]:
dataset_cluster_acc = nas_moe.dataset.ArchClusterACCDataset(
    './data/CIFAR10_acc/trained_models_archs_1', 

)

nas_moe.dataset.ArchClusterACCDataset