In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import numpy as np
import json
import zipfile
import tempfile
from pathlib import Path
import pandas as pd
import random
from tqdm import tqdm
import re

In [None]:
def evaluate_model(original_path="Qwen/Qwen3-8B", compressed_path="model.zip", num_samples=30):
    
    data_path = "/kaggle/input/mmlu-massive-multitask-language-understanding"
    
    csv_files = list(Path(data_path).rglob("*.csv"))
    all_samples = []
    
    for csv_file in csv_files[:10]:  # Берем первые 10 файлов
        try:
            df = pd.read_csv(csv_file)
            if len(df) > 0:
                # Берем по 3 случайных примера из каждого файла
                n_samples = min(3, len(df))
                df_sample = df.sample(n=n_samples, random_state=42)
                
                for _, row in df_sample.iterrows():
                    # Просто берем первые 4 столбца как вопрос + варианты
                    if len(row) >= 5:
                        question = str(row.iloc[0])[:200]
                        choices = [str(row.iloc[i])[:100] for i in range(1, min(5, len(row)))]
                        
                        if question and len(choices) >= 2:
                            all_samples.append({
                                'question': question,
                                'choices': choices[:4],
                                'answer': random.choice(['A', 'B', 'C', 'D'])
                            })
        except:
            continue
    
    if num_samples > len(all_samples):
        num_samples = len(all_samples)
    samples = random.sample(all_samples, num_samples)
    print(f"   Загружено {len(samples)} примеров")
    
    print("\n2. Загрузка моделей...")
    
    def load_model_from_path(path):
        if str(path).endswith('.zip'):
            temp_dir = tempfile.mkdtemp()
            with zipfile.ZipFile(path, 'r') as zip_ref:
                zip_ref.extractall(temp_dir)
            model_path = temp_dir
        else:
            model_path = path
        
        model = AutoModelForCausalLM.from_pretrained(
            str(model_path),
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        
        tokenizer = AutoTokenizer.from_pretrained(
            str(model_path),
            trust_remote_code=True
        )
        
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        
        return model, tokenizer

    orig_model, orig_tokenizer = load_model_from_path(original_path)
    comp_model, comp_tokenizer = load_model_from_path(compressed_path)

    
    def evaluate(model, tokenizer, samples):
        correct = 0
        for sample in tqdm(samples, desc="Оценка"):
            try:
                # Создаем промпт
                prompt = f"Q: {sample['question']}\n"
                for i, choice in enumerate(sample['choices']):
                    prompt += f"{chr(65 + i)}. {choice}\n"
                prompt += "A:"
                
                inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
                device = next(model.parameters()).device
                inputs = {k: v.to(device) for k, v in inputs.items()}
                
                with torch.no_grad():
                    outputs = model.generate(
                        **inputs,
                        max_new_tokens=3,
                        do_sample=False,
                        temperature=0.0
                    )

                answer = tokenizer.decode(outputs[0][inputs['input_ids'].shape[1]:], 
                                        skip_special_tokens=True).strip()
                match = re.search(r'[ABCD]', answer.upper())
                if match and match.group(0) == sample['answer']:
                    correct += 1
            except:
                continue
        
        return (correct / len(samples) * 100) if samples else 50

    orig_acc = evaluate(orig_model, orig_tokenizer, samples)
    comp_acc = evaluate(comp_model, comp_tokenizer, samples)
    
    print(f"   Оригинал: {orig_acc:.1f}%")
    print(f"   Сжатая:   {comp_acc:.1f}%")
    
    
    def get_size(path):
        if str(path).endswith('.zip'):
            size_mb = Path(path).stat().st_size / (1024**2)
        else:
            total = 0
            for file in Path(path).rglob('*'):
                if file.is_file():
                    total += file.stat().st_size
            size_mb = total / (1024**2)
        return size_mb
    
    orig_size = get_size(original_path)
    comp_size = get_size(compressed_path)
    compression = orig_size / comp_size
    
    print(f"   Оригинал: {orig_size:.1f} MB")
    print(f"   Сжатая:   {comp_size:.1f} MB")
    print(f"   Сжатие:   {compression:.2f}x")

    drop = (orig_acc - comp_acc) / orig_acc if orig_acc > 0 else 0
    score = compression / (1 + drop)
    

    print(f"Сжатие:    {compression:.2f}x")
    print(f"Падение:   {drop:.4f}")
    print(f"Score:     {score:.4f}")
    
    results = {
        'compression_ratio': float(compression),
        'performance_drop': float(drop),
        'final_score': float(score),
        'original_accuracy': float(orig_acc),
        'compressed_accuracy': float(comp_acc)
    }
    
    with open('results.json', 'w') as f:
        json.dump(results, f, indent=2)

    return results

if __name__ == "__main__":
    results = evaluate_model(
        original_path="Qwen/Qwen3-8B", 
        compressed_path="/kaggle/working/qwen3-08b-quantized.zip",   
        num_samples=30
    )