## Optional

Add in results from the original model

It is already cached on Optimum Neuron, so we don't need to compile it.

In [None]:
from vllm import LLM
from transformers import AutoTokenizer
from inference_helper import (
    load_chess_data,
    create_prompts,
    run_inference,
    process_base_model_results,
    save_base_model_results
)

# Load the ORIGINAL base model (not fine-tuned)
print("Loading original base model: Qwen/Qwen3-0.6B")
llm_base = LLM(
    model="Qwen/Qwen3-0.6B",
    max_num_seqs=1,
    max_model_len=2048,
    tensor_parallel_size=2,
)

# Load chess dataset
chess_data = load_chess_data('/home/ubuntu/environment/distillation/data/chess_output.json')

# Create prompts
tokenizer_base = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B")
prompts = create_prompts(chess_data, tokenizer_base)

# Run inference on base model
print("\nRunning inference on original base model...")
outputs_base = run_inference(llm_base, prompts, max_tokens=2048, temperature=0.0)

# Process base model results
results_base, base_correct = process_base_model_results(outputs_base, chess_data)

# Save base model results
base_summary = save_base_model_results(results_base, base_correct)


In [None]:
from inference_helper import merge_all_results

# Merge results from all three models
comprehensive_summary = merge_all_results()


In [None]:
import json
from inference_helper import (
    print_comprehensive_statistics,
    create_comprehensive_visualization,
    print_comprehensive_sample_predictions
)

# Load the comprehensive results
with open('static/chess_evaluation_comprehensive.json', 'r') as f:
    data = json.load(f)

# Print detailed statistics
print_comprehensive_statistics(data)

# Create comprehensive visualization
create_comprehensive_visualization(data)

# Show sample predictions
print_comprehensive_sample_predictions(data, num_samples=5)
