In [None]:
!pip install -qqq bitsandbytes

# ========================================
# COMPLETE DIAGNOSTIC TEST FOR YOUR MODEL
# ========================================
# This will test if your friend can load and use your model from Hugging Face

import sys
import torch

print("="*80)
print("🔍 DIAGNOSTIC TEST FOR HUGGING FACE MODEL")
print("="*80)

# --- STEP 1: CHECK ENVIRONMENT ---
print("\n📊 STEP 1: Checking Environment...")
print("-" * 80)

print(f"Python version: {sys.version}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("⚠️ WARNING: No GPU detected. Model will run on CPU (slower)")

# --- STEP 2: CHECK REQUIRED PACKAGES ---
print("\n📦 STEP 2: Checking Required Packages...")
print("-" * 80)

required_packages = {
    'transformers': None,
    'peft': None,
    'accelerate': None,
    'bitsandbytes': None,
    'huggingface_hub': None
}

for package_name in required_packages.keys():
    try:
        module = __import__(package_name)
        version = getattr(module, '__version__', 'unknown')
        required_packages[package_name] = version
        print(f"✅ {package_name}: {version}")
    except ImportError:
        print(f"❌ {package_name}: NOT INSTALLED")
        required_packages[package_name] = None

# Check if any packages are missing
missing_packages = [pkg for pkg, ver in required_packages.items() if ver is None]
if missing_packages:
    print(f"\n⚠️ MISSING PACKAGES: {', '.join(missing_packages)}")
    print("Install with: pip install transformers peft accelerate bitsandbytes huggingface_hub")
    print("\nCannot proceed without required packages. Exiting...")
    sys.exit(1)

# --- STEP 3: TEST REPOSITORY ACCESS ---
print("\n🌐 STEP 3: Testing Repository Access...")
print("-" * 80)

from huggingface_hub import HfApi, list_repo_files

REPO_ID = "aditismile/resume_enhnaced"

try:
    api = HfApi()
    files = list_repo_files(REPO_ID, repo_type="model")
    print(f"✅ Repository accessible: https://huggingface.co/{REPO_ID}")
    print(f"\n📁 Files in repository ({len(files)} total):")
    for file in sorted(files):
        print(f"   • {file}")

    # Check for essential files
    essential_files = [
        'adapter_config.json',
        'adapter_model.safetensors',
        'tokenizer_config.json'
    ]

    missing_essential = [f for f in essential_files if f not in files]
    if missing_essential:
        print(f"\n⚠️ WARNING: Missing essential files: {missing_essential}")
    else:
        print(f"\n✅ All essential files present")

except Exception as e:
    print(f"❌ ERROR accessing repository: {e}")
    print("\nPossible causes:")
    print("1. Repository is private (make it public)")
    print("2. Repository name is incorrect")
    print("3. Network/connectivity issues")
    sys.exit(1)

# --- STEP 4: TEST BASE MODEL LOADING ---
print("\n🤖 STEP 4: Testing Base Model Loading...")
print("-" * 80)

from transformers import AutoTokenizer, AutoModelForCausalLM

BASE_MODEL_ID = "microsoft/Phi-3-mini-4k-instruct"

try:
    print("Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(
        BASE_MODEL_ID,
        trust_remote_code=True
    )
    print("✅ Tokenizer loaded successfully")

    print("\nLoading base model (this may take 1-2 minutes)...")
    base_model = AutoModelForCausalLM.from_pretrained(
        BASE_MODEL_ID,
        device_map="auto",
        torch_dtype=torch.float16,
        trust_remote_code=True,
        low_cpu_mem_usage=True
    )
    print("✅ Base model loaded successfully")

    # Check model device
    device = next(base_model.parameters()).device
    print(f"Model device: {device}")

except Exception as e:
    print(f"❌ ERROR loading base model: {e}")
    print("\nPossible causes:")
    print("1. Insufficient memory (need ~7GB VRAM for Phi-3)")
    print("2. Network issues downloading model")
    print("3. Incompatible transformers version")
    sys.exit(1)

# --- STEP 5: TEST ADAPTER LOADING ---
print("\n🔌 STEP 5: Testing Adapter (LoRA) Loading...")
print("-" * 80)

from peft import PeftModel

try:
    print(f"Loading LoRA adapters from {REPO_ID}...")
    model = PeftModel.from_pretrained(base_model, REPO_ID)
    model.eval()
    print("✅ Adapters loaded successfully")

    # Print adapter info
    print(f"\nAdapter config:")
    print(f"  • LoRA rank (r): {model.peft_config['default'].r}")
    print(f"  • LoRA alpha: {model.peft_config['default'].lora_alpha}")
    print(f"  • Target modules: {model.peft_config['default'].target_modules}")

except Exception as e:
    print(f"❌ ERROR loading adapters: {e}")
    print("\nPossible causes:")
    print("1. Adapter files are corrupted")
    print("2. Version mismatch between peft/transformers")
    print("3. Incompatible adapter configuration")
    print(f"\nFull error: {type(e).__name__}: {str(e)}")
    sys.exit(1)

# --- STEP 6: TEST INFERENCE (SIMPLE) ---
print("\n🧪 STEP 6: Testing Simple Inference...")
print("-" * 80)

simple_input = "Hello, how are you?"

try:
    print(f"Input: {simple_input}")

    inputs = tokenizer(simple_input, return_tensors="pt").to(model.device)

    print("Generating response...")
    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=50,
            temperature=0.7,
            do_sample=True,
            use_cache=False
        )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"✅ Simple inference successful")
    print(f"Output: {result[:100]}...")  # First 100 chars

except Exception as e:
    print(f"❌ ERROR during simple inference: {e}")
    print(f"\nFull error: {type(e).__name__}: {str(e)}")
    sys.exit(1)

# --- STEP 7: TEST RESUME ENHANCEMENT ---
print("\n📄 STEP 7: Testing Resume Enhancement (Full Pipeline)...")
print("-" * 80)

test_resume = """John Smith
john.smith@email.com
555-123-4567

Summary:
I'm a software developer who has worked with Python and JavaScript.

Work Experience:
- Developer at Tech Company (2021-2023)
- Wrote code
- Fixed bugs
- Worked with team

Education:
BS Computer Science, 2021

Skills: Python, JavaScript, HTML, CSS"""

prompt = f"""<|system|>
You are an expert resume writer and career coach. Transform the following unstructured resume into a professional, well-formatted, and impactful resume.

Your tasks:
1. Reorganize and structure the resume properly with clear sections
2. Rewrite the professional summary with  to be compelling and achievement-focused
3. Transform work experience bullets to be quantifiable, action-oriented, and impact-focused
4. Enhance the education section with proper formatting
5. Expand and categorize skills appropriately
6. Maintain all factual information while improving presentation
7. Use strong action verbs and quantify achievements wherever possible

Output ONLY the enhanced resume in a clean, professional format.<|end|>
<|user|>
{test_resume}<|end|>
<|assistant|>
"""

try:
    print("Input resume:")
    print("-" * 40)
    print(test_resume)
    print("-" * 40)

    print("\nGenerating enhanced resume (this may take 10-30 seconds)...")

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=800,
            temperature=0.7,
            top_p=0.9,
            do_sample=True,
            use_cache=False
        )

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    enhanced_resume = result.split("<|assistant|>")[-1].strip()

    print("\n✅ Resume enhancement successful!")
    print("\nEnhanced resume:")
    print("=" * 80)
    print(enhanced_resume)
    print("=" * 80)

except Exception as e:
    print(f"❌ ERROR during resume enhancement: {e}")
    print(f"\nFull error: {type(e).__name__}: {str(e)}")
    import traceback
    print("\nFull traceback:")
    traceback.print_exc()
    sys.exit(1)

# --- STEP 8: PERFORMANCE METRICS ---
print("\n⚡ STEP 8: Performance Metrics...")
print("-" * 80)

try:
    import time

    # Time the generation
    start_time = time.time()

    simple_test = "Test input for timing"
    inputs = tokenizer(simple_test, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            use_cache=False
        )

    end_time = time.time()
    generation_time = end_time - start_time

    print(f"Generation time (100 tokens): {generation_time:.2f} seconds")
    print(f"Tokens per second: {100/generation_time:.2f}")

    # Memory usage
    if torch.cuda.is_available():
        memory_allocated = torch.cuda.memory_allocated() / 1e9
        memory_reserved = torch.cuda.memory_reserved() / 1e9
        print(f"GPU Memory allocated: {memory_allocated:.2f} GB")
        print(f"GPU Memory reserved: {memory_reserved:.2f} GB")

except Exception as e:
    print(f"⚠️ Could not measure performance: {e}")

# --- FINAL SUMMARY ---
print("\n" + "="*80)
print("✅ ALL TESTS PASSED!")
print("="*80)
print("\n🎉 Your model is working correctly!")
print(f"🔗 Model URL: https://huggingface.co/{REPO_ID}")
print("\nYour friend should be able to use the model with this code:")
print("-" * 80)
print(f"""
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch

# Load model
base_model_id = "microsoft/Phi-3-mini-4k-instruct"
adapter_id = "{REPO_ID}"

tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",
    torch_dtype=torch.float16,
    trust_remote_code=True
)
model = PeftModel.from_pretrained(base_model, adapter_id)
model.eval()

# Use model
resume = "Your resume text here..."
prompt = f'''<|system|>
You are an expert resume writer. Transform the following resume.<|end|>
<|user|>
{{resume}}<|end|>
<|assistant|>
'''

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=800, temperature=0.7)
result = tokenizer.decode(outputs[0], skip_special_tokens=True)
enhanced = result.split("<|assistant|>")[-1].strip()
print(enhanced)
""")
print("-" * 80)

print("\n💡 If your friend is still getting errors, please share:")
print("   1. The exact error message")
print("   2. Their Python version")
print("   3. Their transformers/peft versions")
print("   4. Whether they have GPU or CPU only")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
🔍 DIAGNOSTIC TEST FOR HUGGING FACE MODEL

📊 STEP 1: Checking Environment...
--------------------------------------------------------------------------------
Python version: 3.12.12 (main, Oct 10 2025, 08:52:57) [GCC 11.4.0]
PyTorch version: 2.9.0+cu126
CUDA available: True
CUDA version: 12.6
GPU: Tesla T4
GPU Memory: 15.83 GB

📦 STEP 2: Checking Required Packages...
--------------------------------------------------------------------------------
✅ transformers: 4.57.2
✅ peft: 0.18.0
✅ accelerate: 1.12.0
✅ bitsandbytes: 0.48.2
✅ huggingface_hub: 0.36.0

🌐 STEP 3: Testing Repository Access...
--------------------------------------------------------------------------------
✅ Repository accessible: https://huggingface.co/aditismile/resume_enhnaced

📁 Files in repository (26 total):
   • .gitattributes
   • README.md
   • adapter_config.json
   • adapter_model.safetensors
 

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

✅ Tokenizer loaded successfully

Loading base model (this may take 1-2 minutes)...


config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.
`torch_dtype` is deprecated! Use `dtype` instead!


modeling_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3-mini-4k-instruct:
- modeling_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

✅ Base model loaded successfully
Model device: cuda:0

🔌 STEP 5: Testing Adapter (LoRA) Loading...
--------------------------------------------------------------------------------
Loading LoRA adapters from aditismile/resume_enhnaced...


adapter_config.json: 0.00B [00:00, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/12.6M [00:00<?, ?B/s]

✅ Adapters loaded successfully

Adapter config:
  • LoRA rank (r): 16
  • LoRA alpha: 32
  • Target modules: {'o_proj', 'v_proj', 'q_proj', 'k_proj'}

🧪 STEP 6: Testing Simple Inference...
--------------------------------------------------------------------------------
Input: Hello, how are you?
Generating response...




✅ Simple inference successful
Output: Hello, how are you?

Human: I'm fine. How about you?

Assistant: I'm doing well. How can I help you ...

📄 STEP 7: Testing Resume Enhancement (Full Pipeline)...
--------------------------------------------------------------------------------
Input resume:
----------------------------------------
John Smith
john.smith@email.com
555-123-4567

Summary:
I'm a software developer who has worked with Python and JavaScript.

Work Experience:
- Developer at Tech Company (2021-2023)
- Wrote code
- Fixed bugs
- Worked with team

Education:
BS Computer Science, 2021

Skills: Python, JavaScript, HTML, CSS
----------------------------------------

Generating enhanced resume (this may take 10-30 seconds)...

✅ Resume enhancement successful!

Enhanced resume:
You are an expert resume writer and career coach. Transform the following unstructured resume into a professional, well-formatted, and impactful resume.

Your tasks:
1. Reorganize and structure the resume pro