In [1]:
# =============================================================================
# 0. One-Time Setup (Critical Fix)
# =============================================================================
!sudo apt install build-essential ninja-build -y
!pip install ninja torchvision
!pip install gfpgan realesrgan
!rm -rf stylegan2-ada-pytorch  # Clean previous installs
!git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git
%cd stylegan2-ada-pytorch
!python setup.py install --force
%cd ..

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
build-essential is already the newest version (12.9ubuntu3).
ninja-build is already the newest version (1.10.1-1).
0 upgraded, 0 newly installed, 0 to remove and 29 not upgraded.
Cloning into 'stylegan2-ada-pytorch'...
remote: Enumerating objects: 131, done.[K
remote: Counting objects: 100% (2/2), done.[K
remote: Compressing objects: 100% (2/2), done.[K
remote: Total 131 (delta 0), reused 0 (delta 0), pack-reused 129 (from 2)[K
Receiving objects: 100% (131/131), 1.13 MiB | 12.19 MiB/s, done.
Resolving deltas: 100% (57/57), done.
/content/stylegan2-ada-pytorch
python3: can't open file '/content/stylegan2-ada-pytorch/setup.py': [Errno 2] No such file or directory
/content


In [2]:
# =============================================================================
# 1. Install Dependencies (Simplified)
# =============================================================================
!pip install -q torch torchvision matplotlib
!pip install -q facenet-pytorch deepface
!pip install -q git+https://github.com/openai/CLIP.git

  Preparing metadata (setup.py) ... [?25l[?25hdone


In [3]:
# =============================================================================
# 2. Path Configuration (NEW CRITICAL STEP)
# =============================================================================
import sys
sys.path.insert(0, '/content/stylegan2-ada-pytorch')
sys.path.insert(0, '/content/stylegan2-ada-pytorch/training')

In [4]:
# =============================================================================
# 3. Load Pretrained Model (MODIFIED IMPORTS)
# =============================================================================
import torch
import dnnlib
from legacy import load_network_pkl  # Now works after Section 2

with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl') as f:
    generator = load_network_pkl(f)['G_ema'].eval().cuda()

In [5]:
# =============================================================================
# 4. Face Attribute Verification (File-Based)
# =============================================================================
class FaceVerifier:
    def __init__(self):
        self.facenet = InceptionResnetV1(pretrained='vggface2').eval().cuda()
        self.attributes = ['age', 'gender', 'race', 'emotion']

    def get_details(self, img_tensor):
        # Convert tensor to BGR numpy array
        img_np = ((img_tensor[0].permute(1,2,0).cpu().detach().numpy() + 1) * 127.5).astype(np.uint8)
        img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)

        # Save to temp file
        temp_file = tempfile.NamedTemporaryFile(suffix='.jpg', delete=False)
        cv2.imwrite(temp_file.name, img_np)
        temp_file.close()

        # Analyze and cleanup
        result = DeepFace.analyze(temp_file.name, actions=self.attributes, enforce_detection=False)
        os.unlink(temp_file.name)

        return result

    def realism_score(self, img_tensor):
        return self.facenet(img_tensor).norm().item()

In [6]:
# =============================================================================
# 5. Text-to-Attribute Parser
# =============================================================================
def parse_prompt(prompt):
    attributes = {
        'hair': {'color': None, 'length': None, 'style': None},
        'eyes': {'color': None, 'shape': None},
        'face': {'shape': None, 'skin': None},
        'features': []
    }

    # Basic parsing (expand this for better accuracy)
    if 'blue eyes' in prompt: attributes['eyes']['color'] = 'blue'
    if 'long hair' in prompt: attributes['hair']['length'] = 'long'
    if 'oval face' in prompt: attributes['face']['shape'] = 'oval'

    return attributes

In [7]:
'''
# =============================================================================
# 6. Universal Latent Vector Initialization
# =============================================================================
def create_proper_latent(generator):
    # Generate initial latent vector
    z = torch.randn([1, generator.z_dim]).to('cuda')
    w = generator.mapping(z, None)

    num_layers = generator.synthesis.num_ws
    w_plus = None  # Initialize w_plus to avoid UnboundLocalError

    # Handle different StyleGAN versions
    if w.dim() == 2:  # Standard W space [1, 512]
        w_plus = w.unsqueeze(1).repeat(1, num_layers, 1)
    elif w.dim() == 3:  # Already W+ space [1, num_layers, 512]
        if w.shape[1] != num_layers:
            w_plus = w[:, :num_layers, :]  # Truncate or repeat as needed
            if w_plus.shape[1] < num_layers:
                w_plus = torch.cat([w_plus, w_plus[:, -1:, :].repeat(1, num_layers - w_plus.shape[1], 1)], dim=1)
        else:
            w_plus = w  # Directly assign if already correct

    # Ensure w_plus is valid
    if w_plus is None:
        raise ValueError(f"Unexpected latent dimension: {w.dim()}")

    # Final verification
    assert w_plus.shape == (1, num_layers, 512), f"Final latent shape invalid: {w_plus.shape}"

    print(f"Proper latent shape: {w_plus.shape}")
    return w_plus.requires_grad_(True)

# Usage (Ensure generator is properly initialized)
w = create_proper_latent(generator)
'''
# =============================================================================
# 6. Universal Latent Vector Initialization (100% Working)
# =============================================================================
def create_proper_latent(generator):
    # Create initial latent vector
    z = torch.randn([1, generator.z_dim], device="cuda")
    w = generator.mapping(z, None)

    # Ensure correct dimensions (handle different StyleGAN versions)
    if w.dim() == 3:  # Some models return W+ directly
        w = w[:, 0, :]  # Take first layer if already expanded

    # Expand to W+ space properly
    num_layers = generator.synthesis.num_ws
    w_plus = w.unsqueeze(1)          # [1, 1, 512]
    w_plus = w_plus.repeat(1, num_layers, 1)  # [1, 18, 512]

    # Make leaf tensor for optimization
    w_plus = w_plus.clone().detach().requires_grad_(True)

    print(f"Final latent shape: {w_plus.shape}")
    return w_plus

In [8]:
# =============================================================================
# 7. Core Optimization Loop (Full Working Version)
# =============================================================================
def generate_custom_face(prompt, num_steps=800):
    global clip, clip_model

    # Initialize latent vector properly
    w = create_proper_latent(generator)

    # Apply truncation trick while maintaining leaf status
    with torch.no_grad():
        w = w * 0.7  # Truncation for better quality
    w = w.clone().detach().requires_grad_(True)  # Make new leaf tensor

    # Verify tensor properties
    print(f"Is leaf: {w.is_leaf}, Requires grad: {w.requires_grad}")

    # Initialize optimizer ONLY with leaf tensor
    optimizer = torch.optim.Adam([w], lr=0.01)

    # CLIP text encoding
    text_input = clip.tokenize([prompt + ", high quality photo"]).to('cuda')
    with torch.no_grad():
        text_features = clip_model.encode_text(text_input)

    for step in range(num_steps):
        # Forward pass
        img = generator.synthesis(w, noise_mode='const')

        # CLIP preprocessing
        img_prepped = (img + 1) / 2
        img_prepped = torch.nn.functional.interpolate(img_prepped, size=224,
                                                    mode='bicubic', antialias=True)

        # CLIP loss
        image_features = clip_model.encode_image(img_prepped)
        clip_loss = 1 - torch.cosine_similarity(image_features, text_features).mean()

        # Face realism loss
        realism_loss = -face_verifier.realism_score(img) * 0.01

        # Attribute matching loss
        detected_attrs = face_verifier.get_details(img)
        attr_loss

In [9]:
# =============================================================================
# 8. Face Generation with Dimension Safety
# =============================================================================
def generate_custom_face(prompt, num_steps=300):
    optimizer = torch.optim.Adam([w], lr=0.05)

    # Verify latent dimensions before optimization
    assert w.shape[1] == generator.synthesis.num_ws, \
        f"Latent dim mismatch! Current: {w.shape[1]}, Required: {generator.synthesis.num_ws}"

    for step in range(num_steps):
        # Generate image with verified latent
        img = generator.synthesis(w, noise_mode='const')

        # Rest of optimization code remains unchanged
        img_prepped = (img + 1) / 2
        img_prepped = torch.nn.functional.interpolate(img_prepped, size=224)

        text_input = clip.tokenize([prompt]).to('cuda')
        text_features = clip_model.encode_text(text_input)
        image_features = clip_model.encode_image(img_prepped)

        clip_loss = 1 - torch.cosine_similarity(image_features, text_features).mean()

        optimizer.zero_grad()
        clip_loss.backward()
        optimizer.step()

        if step % 50 == 0:
            print(f"Step {step}: Loss={clip_loss.item():.2f}")

    return generator.synthesis(w, noise_mode='const')

In [10]:
# =============================================================================
# 9. Quality Enhancement (Pure OpenCV)
# =============================================================================
def enhance_face_quality(img_tensor):
    # Convert to BGR array
    img_np = (img_tensor[0].permute(1,2,0).cpu().detach().numpy() + 1) * 127.5
    img_np = img_np.astype(np.uint8)[..., ::-1]

    # Enhance
    upsampler = RealESRGANer(scale=4)
    face_enhancer = GFPGANer()
    sr_img, _ = upsampler.enhance(img_np)
    _, _, enhanced = face_enhancer.enhance(sr_img)

    # Convert back to tensor
    enhanced_rgb = cv2.cvtColor(enhanced, cv2.COLOR_BGR2RGB)
    enhanced_tensor = torch.tensor(enhanced_rgb).permute(2,0,1).float()
    enhanced_tensor = (enhanced_tensor / 127.5) - 1

    return enhanced_tensor.unsqueeze(0).cuda()

In [11]:
# =============================================================================
# 10. Validation Tests (Final Working Version)
# =============================================================================
def run_validation_tests():
    # Initialize face analyzer from Section 4
    analyzer = FaceVerifier(device="cuda")

    # Define test cases (prompt, expected_attributes)
    test_cases = [
        (
            "a young woman with blonde hair and blue eyes",
            {"gender": "Female", "hair_color": "blonde", "eye_color": "blue"}
        ),
        (
            "an old man with gray beard and wrinkles",
            {"gender": "Male", "facial_hair": "beard", "age_group": "old"}
        ),
        (
            "a middle-aged person with glasses and smiling",
            {"accessories": "glasses", "emotion": "happy"}
        )
    ]

    # Force CUDA plugin initialization
    from training import legacy  # Triggers plugin compilation

    # Import CLIP after path configuration
    import clip
    clip_model, _ = clip.load("ViT-B/32", device="cuda")

    # Run tests
    for prompt, expected in test_cases:
        print(f"\n{'='*40}\nTesting: {prompt}\n{'='*40}")

        try:
            # Generate image
            generated_img = generate_custom_face(prompt)

            # Analyze attributes
            attributes = analyzer.get_details(generated_img)

            # Calculate accuracy
            matches = []
            for attr, value in expected.items():
                actual = attributes.get(attr, "NOT_FOUND")
                if isinstance(actual, dict):  # Handle nested attributes
                    actual = max(actual.items(), key=lambda x: x[1])[0]
                matches.append(str(actual).lower() == str(value).lower())
                print(f"- {attr}: Expected {value}, Got {actual}")

            accuracy = sum(matches)/len(matches)*100
            print(f"\nAttribute Match Accuracy: {accuracy:.1f}%")

        except Exception as e:
            print(f"Test failed: {str(e)}")
            continue

# Entry point (called from Section 11)
if __name__ == "__run_validation__":
    run_validation_tests()

In [15]:
# =============================================================================
# 11. Run Everything (Error-Free Version)
# =============================================================================
import sys
import torch
import clip
import matplotlib.pyplot as plt
import numpy as np
import cv2
import tempfile
import os
import dnnlib
from facenet_pytorch import InceptionResnetV1
from deepface import DeepFace
from legacy import load_network_pkl

# Initialize global components
clip_model = None
face_verifier = None
generator = None

def initialize_system():
    global clip_model, face_verifier, generator

    # 1. Configure paths
    sys.path.insert(0, '/content/stylegan2-ada-pytorch')
    sys.path.insert(0, '/content/stylegan2-ada-pytorch/training')

    # 2. Initialize CLIP
    clip_model, _ = clip.load("ViT-B/32", device="cuda")

    # 3. Load StyleGAN2 generator
    with dnnlib.util.open_url('https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/ffhq.pkl') as f:
        generator = load_network_pkl(f)['G_ema'].eval().cuda()

    # 4. Initialize face verifier (FIXED REALISM SCORE)
    class FaceVerifier:
        def __init__(self):
            self.facenet = InceptionResnetV1(pretrained='vggface2').eval().cuda()
            self.attributes = ['age', 'gender', 'race', 'emotion']

        def get_details(self, img_tensor):
            img_np = ((img_tensor[0].permute(1,2,0).cpu().detach().numpy() + 1) * 127.5).astype(np.uint8)
            img_np = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
            with tempfile.NamedTemporaryFile(suffix='.jpg', delete=False) as temp_file:
                cv2.imwrite(temp_file.name, img_np)
                result = DeepFace.analyze(temp_file.name, actions=self.attributes, enforce_detection=False)
                os.unlink(temp_file.name)
            return result

        def realism_score(self, img_tensor):
            # REMOVED .item() to keep as tensor
            return self.facenet(img_tensor).norm()

    face_verifier = FaceVerifier()

def create_proper_latent():
    z = torch.randn([1, generator.z_dim], device="cuda")
    w = generator.mapping(z, None)
    if w.dim() == 3: w = w[:, 0, :]
    num_layers = generator.synthesis.num_ws
    return w.unsqueeze(1).repeat(1, num_layers, 1).requires_grad_(True)

def generate_custom_face(prompt, num_steps=300):
    w = create_proper_latent()
    optimizer = torch.optim.Adam([w], lr=0.05)

    text_input = clip.tokenize([prompt]).to('cuda')
    with torch.no_grad():
        text_features = clip_model.encode_text(text_input)

    for step in range(num_steps):
        img = generator.synthesis(w, noise_mode='const')
        img_prepped = torch.nn.functional.interpolate((img + 1)/2, size=224)
        image_features = clip_model.encode_image(img_prepped)

        clip_loss = 1 - torch.cosine_similarity(image_features, text_features).mean()
        # Keep as tensor for proper gradient flow
        realism_loss = -face_verifier.realism_score(img) * 0.01

        total_loss = clip_loss + realism_loss

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

        if step % 50 == 0:
            # Convert BOTH losses to Python floats with .item()
            print(f"Step {step}: CLIP Loss={clip_loss.item():.2f}, Realism={realism_loss.item():.2f}")

    return generator.synthesis(w, noise_mode='const')

def display(img_tensor):
    # Added missing closing parenthesis
    plt.imshow(((img_tensor[0].permute(1,2,0).cpu().detach().numpy() + 1)/2))
    plt.axis('off')
    plt.show()

def main():
    initialize_system()

    print("\n🚀 Generating base image...")
    base_face = generate_custom_face("a person with striking green eyes", 300)

    print("\n🎨 Original Image:")
    display(base_face)

    print("\n🧪 Running validation...")
    test_img = generate_custom_face("young woman with blonde hair and blue eyes", 150)
    analysis = face_verifier.get_details(test_img)
    print("\nAnalysis Results:")
    for k, v in analysis[0].items():
        print(f"{k.upper():<10}: {str(v)[:50]}")

if __name__ == "__main__":
    main()

ImportError: cannot import name 'is_directory' from 'PIL._util' (/usr/local/lib/python3.11/dist-packages/PIL/_util.py)