In [None]:
!nvidia-smi -L

In [None]:
# make sure we are in the right env
!conda info

# Only run once

In [None]:
!git clone https://github.com/NVlabs/stylegan2-ada-pytorch
%cd stylegan2-ada-pytorch

In [None]:
!git clone https://github.com/openai/CLIP CLIP
!mv CLIP/* .

In [None]:
import subprocess

CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

In [None]:
# Only needed the first time
!pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex

In [None]:
!pip install click requests tqdm pyspng ninja gdown

In [None]:
!DNNLIB_CACHE_DIR=.cache

In [None]:
!gdown --id 1CNhPQH3cuDJSOX2RE5PDkFkD56WkC_u9

# Start

In [None]:
%cd stylegan2-ada-pytorch/

In [None]:
import numpy as np
import torch
import pickle
import PIL.Image
import torch.nn.functional as F
import clip
import dnnlib
import legacy
from tqdm.autonotebook import tqdm

In [None]:
w_avg_samples = 65536 # total samples to cluster
num_categories = 64   # total number of top level clusters
batch_size = 16
network_pkl = 'network-snapshot-000088.pkl'
prefix = "flowers"
suffix = "64k"
device = torch.device('cuda')
image_mean = torch.tensor([0.48145466, 0.4578275, 0.40821073]).to(device)
image_std = torch.tensor([0.26862954, 0.26130258, 0.27577711]).to(device)

In [None]:
#with open(model_file, 'rb') as f:
#    G = pickle.load(f)['G_ema'].requires_grad_(False).to(device)  # torch.nn.Module
with dnnlib.util.open_url(network_pkl) as fp:
    G = legacy.load_network_pkl(fp)['G_ema'].requires_grad_(False).to(device) # type: ignore

In [None]:
# Generate samples in W
z_samples = np.random.randn(w_avg_samples, G.z_dim)
labels = None
if (G.mapping.c_dim):
    labels = torch.from_numpy(0.2*np.random.randn(w_avg_samples, G.mapping.c_dim)).to(device)
w_samples = G.mapping(torch.from_numpy(z_samples).to(device), labels)  # [N, L, C]
w_samples = w_samples.cpu().numpy().astype(np.float32)                 # [N, L, C]
w_samples_1d = w_samples[:, :1, :].astype(np.float32).squeeze()
print(w_samples_1d.shape)
np.save(prefix + "_latents_" + suffix + ".npy", w_samples_1d)
w = w_samples_1d

In [None]:
model1, _ = clip.load("ViT-B/32", device=device)
model2, _ = clip.load("RN50x4", device=device)

In [None]:
# Generate images to get CLIP image features (two models used); could also get other perceptual model features in this step, such as LPIPS
# This step is certainly the slowest, will be faster with a lower number of total samples, or with less models being run.
logits1 = []
logits2 = []
latents = w
for i in tqdm(range(latents.shape[0]//batch_size)):
    images = G.synthesis(torch.tensor(np.tile(np.expand_dims(latents[i*batch_size:(i+1)*batch_size,:],axis=1),[1,G.mapping.num_ws,1]), dtype=torch.float32, device=device), noise_mode='const')
    with torch.no_grad():
        image_input = (torch.clamp(images, -1, 1) + 1) * 0.5
        image_input = F.interpolate(image_input, size=(256, 256), mode='area')
        image_input = image_input[:, :, 16:240, 16:240] # 256 -> 224, center crop
        image_input -= image_mean[None, :, None, None]
        image_input /= image_std[None, :, None, None]
        image_features = model1.encode_image(image_input)
        logits1.append(image_features.cpu().numpy())

        image_input = (torch.clamp(images, -1, 1) + 1) * 0.5
        image_input = F.interpolate(image_input, size=(324, 324), mode='area')
        image_input = image_input[:, :, 18:306, 18:306] # 324 -> 288, center crop
        image_input -= image_mean[None, :, None, None]
        image_input /= image_std[None, :, None, None]
        image_features = model2.encode_image(image_input)
        logits2.append(image_features.cpu().numpy())

logits1 = np.array(logits1)
logits1 = logits1.reshape(-1, *logits1.shape[2:]).squeeze()
print(logits1.shape)
clip1 = logits1
np.save(prefix + "_clip_" + suffix + "_1.npy", clip1)

logits2 = np.array(logits2)
logits2 = logits2.reshape(-1, *logits2.shape[2:]).squeeze()
print(logits2.shape)
clip2 = logits2
np.save(prefix + "_clip_" + suffix + "_2.npy", clip2)