# Install requried Python modules

In [1]:
# Step 1: Install necessary Python libraries

#!pip install -q -r requirements.txt
!pip install uv

Collecting uv
  Downloading uv-0.7.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading uv-0.7.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (18.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.6/18.6 MB[0m [31m307.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: uv
Successfully installed uv-0.7.21

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# Use uv to install the dependencies
!uv pip install -r requirements.txt

[2mUsing Python 3.11.11 environment at: /opt/app-root[0m
[2K[2mResolved [1m190 packages[0m [2min 2.44s[0m[0m                                       [0m
[2K[2mPrepared [1m153 packages[0m [2min 1m 05s[0m[0m                                          
         If the cache and target directories are on different filesystems, hardlinking may not be supported.
[2K[2mInstalled [1m153 packages[0m [2min 1m 28s[0m[0m                            [0m
 [32m+[39m [1mabsl-py[0m[2m==2.3.1[0m
 [32m+[39m [1maccelerate[0m[2m==1.8.1[0m
 [32m+[39m [1mairportsdata[0m[2m==20250706[0m
 [32m+[39m [1mannotated-types[0m[2m==0.7.0[0m
 [32m+[39m [1mastor[0m[2m==0.8.1[0m
 [32m+[39m [1mblake3[0m[2m==1.0.5[0m
 [32m+[39m [1mcachetools[0m[2m==6.1.0[0m
 [32m+[39m [1mchardet[0m[2m==5.2.0[0m
 [32m+[39m [1mclick[0m[2m==8.2.1[0m
 [32m+[39m [1mcloudpickle[0m[2m==3.1.1[0m
 [32m+[39m [1mcompressed-tensors[0m[2m==0.10.1[0m
 [32m+[39m [1mc

# 💡 Disable the default vLLM inference serving
Our `Lab 1 - inference with vllm` is quite computationally intensive. This lab is only using an L4 GPU so in order ensure the success of jupyterlab notebooks, we recommend disabling the default vLLM inference serving that is running in the namespace `llama-serving`.

To do that, first login into the OpenShift CLI using `--token` or username/password given by the instructor.

In [2]:
!oc login -u admin -p ${ADMIN_PASSWORD} --server=https://api.sno.${BASE_DOMAIN}:6443

Login successful.

You have access to 108 projects, the list has been suppressed. You can list all projects with 'oc projects'

Using project "llama-serving".


Once you login the `oc` CLI, run the following respectively to enable/disable the defautl model serving.

```bash
# undeploy deepseek-qwen3
oc create configmap undeploy-sno-deepseek-qwen3-vllm -n llama-serving
# redeploy deepseek-qwen3
oc delete configmap undeploy-sno-deepseek-qwen3-vllm -n llama-serving

# undeploy llama3-2-3b
oc create configmap undeploy-llama3-2-3b -n llama-serving
# redeploy llama3-2-3b
oc delete configmap undeploy-llama3-2-3b -n llama-serving
```

Wait until the Pods are terminated before you continue with Lab 1.

In [3]:
# undeploy llama3-2-3b
!oc create configmap undeploy-llama3-2-3b -n llama-serving

configmap/undeploy-llama3-2-3b created


In [4]:
# undeploy deepseek-qwen3
!oc create configmap undeploy-sno-deepseek-qwen3-vllm -n llama-serving

configmap/undeploy-sno-deepseek-qwen3-vllm created


# 🔍 Environment Validation Script

This script validates the environment by checking:
- vllm version (expected: 0.9.1)
- llmcompressor version (expected: 0.5.2)
- llama-serving namespace status

In [3]:
import subprocess
import sys
import pkg_resources
from packaging import version
import os

def check_package_version(package_name, expected_version):
    """Check if a package is installed and has the expected version"""
    try:
        installed_version = pkg_resources.get_distribution(package_name).version
        if version.parse(installed_version) >= version.parse(expected_version):
            return True, installed_version
        else:
            return False, installed_version
    except pkg_resources.DistributionNotFound:
        return False, "Not installed"

def check_llama_serving_status():
    """Check the status of llama-serving namespace"""
    try:
        # Check if we can access the namespace
        result = subprocess.run( 
            ["oc", "get", "pods", "-n", "llama-serving", "--no-headers"],
            capture_output=True,
            text=True,
            timeout=30
        )
        
        if result.returncode == 0:
            lines = result.stdout.strip().split('\n')
            if not lines or lines == ['']:
                return True, "No pods found - default vLLM inference serving is disabled (optimal for labs)"
            
            # Check pod status
            pod_status = []
            for line in lines:
                if line.strip():
                    parts = line.split()
                    if len(parts) >= 3:
                        pod_name = parts[0]
                        ready = parts[1]
                        status = parts[2]
                        pod_status.append(f"{pod_name}: {ready} {status}")
            
            return True, pod_status
        else:
            return False, f"Error accessing namespace: {result.stderr}"
    except subprocess.TimeoutExpired:
        return False, "Timeout accessing llama-serving namespace"
    except Exception as e:
        return False, f"Error: {str(e)}"

def validate_environment():
    """Main validation function"""
    print("🔍 Environment Validation Report")
    print("=" * 50)
    
    # Check required packages
    required_packages = {
        "vllm": "0.9.1",
        "llmcompressor": "0.5.2"
    }
    
    all_checks_passed = True
    
    for package, expected_ver in required_packages.items():
        is_valid, installed_ver = check_package_version(package, expected_ver)
        
        if is_valid:
            print(f"✅ {package}: {installed_ver} (>= {expected_ver})")
        else:
            print(f"❌ {package}: {installed_ver} (expected >= {expected_ver})")
            all_checks_passed = False
    
    # Check llama-serving status
    print("\n🔍 llama-serving Status:")
    print("-" * 30)
    
    is_accessible, status_info = check_llama_serving_status()
    
    if is_accessible:
        print("✅ llama-serving namespace is accessible")
        if isinstance(status_info, list):
            for pod_info in status_info:
                print(f"  📦 {pod_info}")
        else:
            print(f"  ℹ️  {status_info}")
    else:
        print(f"❌ llama-serving namespace: {status_info}")
        all_checks_passed = False
    
    print("\n" + "=" * 50)
    if all_checks_passed:
        print("🎉 All validation checks passed! Environment is ready.")
    else:
        print("⚠️  Some validation checks failed. Please review the issues above.")
    
    return all_checks_passed

# Run the validation
validate_environment()

  import pkg_resources


🔍 Environment Validation Report
✅ vllm: 0.9.1 (>= 0.9.1)
✅ llmcompressor: 0.5.2 (>= 0.5.2)

🔍 llama-serving Status:
------------------------------
✅ llama-serving namespace is accessible
  ℹ️  No pods found - default vLLM inference serving is disabled (optimal for labs)

🎉 All validation checks passed! Environment is ready.


True

## Quick Status Check

Run the cell below for a quick status summary:

In [4]:
def quick_status_check():
    """Quick validation check with minimal output"""
    print("🚀 Quick Environment Status Check")
    print("=" * 35)
    
    # Check vllm
    vllm_ok, vllm_ver = check_package_version("vllm", "0.9.1")
    print(f"{'✅' if vllm_ok else '❌'} vllm: {vllm_ver}")
    
    # Check llmcompressor
    llmc_ok, llmc_ver = check_package_version("llmcompressor", "0.5.2")
    print(f"{'✅' if llmc_ok else '❌'} llmcompressor: {llmc_ver}")
    
    # Check llama-serving
    llama_ok, _ = check_llama_serving_status()
    print(f"{'✅' if llama_ok else '❌'} llama-serving: {'accessible, default vLLM inference serving is disabled (optimal for labs)' if llama_ok else 'issue detected'}")
    
    # Overall status
    all_good = vllm_ok and llmc_ok and llama_ok
    print(f"\n{'🎉' if all_good else '⚠️'} Overall: {'All systems ready!' if all_good else 'Issues found - run full validation above'}")
    
    return all_good

# Run quick check
quick_status_check()

🚀 Quick Environment Status Check
✅ vllm: 0.9.1
✅ llmcompressor: 0.5.2
✅ llama-serving: accessible, default vLLM inference serving is disabled (optimal for labs)

🎉 Overall: All systems ready!


True