In [1]:
!pip install -q lpips scikit-image opencv-python Pillow
!pip install -q transformers torch torchvision

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/53.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.8/53.8 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
"""
Reference-Based Evaluation
Сравнивает генерации с ground truth эталоном
"""

import torch
import numpy as np
from PIL import Image
from typing import Dict, List, Tuple, Optional
import warnings
import lpips
warnings.filterwarnings('ignore')

class LightweightLPIPS:
    def __init__(self, net='alex', use_gpu=None):


        if use_gpu is None:
            use_gpu = torch.cuda.is_available()

        self.device = 'cuda' if use_gpu else 'cpu'

        print(f"Loading LPIPS ({net})...")


        self.model = lpips.LPIPS(net=net, verbose=False)
        self.model = self.model.to(self.device)
        self.model.eval()


        for param in self.model.parameters():
            param.requires_grad = False


        if self.device == 'cuda':
            allocated = torch.cuda.memory_allocated() / 1024**2
            print(f"   GPU Memory: {allocated:.1f} MB")

    def compute(self, img1: Image.Image, img2: Image.Image) -> float:

        tensor1 = self._preprocess(img1)
        tensor2 = self._preprocess(img2)

        with torch.no_grad():
            distance = self.model(tensor1, tensor2)

        similarity = 1.0 - distance.item()


        if self.device == 'cuda':
            torch.cuda.empty_cache()

        return max(0.0, min(1.0, similarity))

    def _preprocess(self, img: Image.Image) -> torch.Tensor:

        max_size = 512
        if max(img.size) > max_size:
            ratio = max_size / max(img.size)
            new_size = tuple(int(dim * ratio) for dim in img.size)
            img = img.resize(new_size, Image.LANCZOS)

        img_array = np.array(img).astype(np.float32) / 255.0
        tensor = torch.from_numpy(img_array).permute(2, 0, 1).unsqueeze(0)


        tensor = tensor * 2 - 1

        return tensor.to(self.device)

    def __del__(self):

        if hasattr(self, 'device') and self.device == 'cuda':
            torch.cuda.empty_cache()


class ReferenceBasedEvaluator:

    def __init__(
        self,
        use_lpips: bool = True,
        use_ssim: bool = True,
        use_clip: bool = False,  # По умолчанию выключен для экономии памяти
        lpips_net: str = 'alex',
        device: str = None
    ):

        if device is None:
            device = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.device = device


        print("Initializing Reference-Based Evaluator")
        print(f"Device: {self.device}")
        print(f"LPIPS: {use_lpips}")
        print(f"SSIM: {use_ssim}")
        print(f"CLIP: {use_clip}")
        print()

        self.lpips_model = None
        self.clip_model = None
        self.use_ssim = use_ssim


        if use_lpips:
            try:
                self.lpips_model = LightweightLPIPS(
                    net=lpips_net,
                    use_gpu=(self.device == 'cuda')
                )
            except Exception as e:
                print(f" LPIPS loading failed: {e}")

        if use_clip and self.device == 'cuda':
            try:

                from transformers import CLIPModel, CLIPProcessor

                self.clip_model = CLIPModel.from_pretrained(
                    "openai/clip-vit-base-patch32"
                ).to(self.device)
                self.clip_processor = CLIPProcessor.from_pretrained(
                    "openai/clip-vit-base-patch32"
                )
                self.clip_model.eval()


            except Exception as e:
                print(f"CLIP loading failed: {e}")
                self.clip_model = None

        print(" Evaluator initialized")


    def compute_lpips(
        self,
        generated: Image.Image,
        ground_truth: Image.Image
    ) -> float:

        if self.lpips_model is None:
            return 0.0

        try:
            return self.lpips_model.compute(generated, ground_truth)
        except Exception as e:
            print(f" LPIPS computation failed: {e}")
            return 0.0


    def compute_ssim(
        self,
        generated: Image.Image,
        ground_truth: Image.Image
    ) -> float:

        if not self.use_ssim:
            return 0.0

        try:
            from skimage.metrics import structural_similarity as ssim


            gen_array = np.array(generated.convert('RGB'))
            gt_array = np.array(ground_truth.convert('RGB'))


            if gen_array.shape != gt_array.shape:
                from skimage.transform import resize
                gen_array = resize(gen_array, gt_array.shape, anti_aliasing=True)
                gen_array = (gen_array * 255).astype(np.uint8)


            score = ssim(
                gen_array,
                gt_array,
                channel_axis=2,
                data_range=255
            )

            return float(score)

        except Exception as e:
            print(f"⚠SSIM computation failed: {e}")
            return 0.0

    def compute_psnr(
        self,
        generated: Image.Image,
        ground_truth: Image.Image
    ) -> float:

        try:
            gen_array = np.array(generated.convert('RGB')).astype(np.float64)
            gt_array = np.array(ground_truth.convert('RGB')).astype(np.float64)


            if gen_array.shape != gt_array.shape:
                from skimage.transform import resize
                gen_array = resize(gen_array, gt_array.shape, anti_aliasing=True)
                gen_array = gen_array * 255

            mse = np.mean((gen_array - gt_array) ** 2)

            if mse == 0:
                return 100.0

            max_pixel = 255.0
            psnr = 20 * np.log10(max_pixel / np.sqrt(mse))

            return float(psnr)

        except Exception as e:
            print(f" PSNR computation failed: {e}")
            return 0.0


    def compute_clip_similarity(
        self,
        generated: Image.Image,
        ground_truth: Image.Image
    ) -> float:

        if self.clip_model is None:
            return 0.0

        try:
            inputs = self.clip_processor(
                images=[generated, ground_truth],
                return_tensors="pt"
            ).to(self.device)

            with torch.no_grad():
                image_features = self.clip_model.get_image_features(**inputs)

                # Normalize
                image_features = image_features / image_features.norm(dim=-1, keepdim=True)

                # Cosine similarity
                similarity = torch.cosine_similarity(
                    image_features[0:1],
                    image_features[1:2]
                ).item()

            # Cleanup
            if self.device == 'cuda':
                torch.cuda.empty_cache()

            return float(similarity)

        except Exception as e:
            print(f" CLIP computation failed: {e}")
            return 0.0


    def compute_color_similarity(
        self,
        generated: Image.Image,
        ground_truth: Image.Image
    ) -> float:

        try:
            import cv2

            gen_array = np.array(generated.convert('RGB'))
            gt_array = np.array(ground_truth.convert('RGB'))


            if gen_array.shape != gt_array.shape:
                gen_array = cv2.resize(gen_array, (gt_array.shape[1], gt_array.shape[0]))


            hist_gen = cv2.calcHist(
                [gen_array], [0, 1, 2], None,
                [8, 8, 8], [0, 256, 0, 256, 0, 256]
            )
            hist_gt = cv2.calcHist(
                [gt_array], [0, 1, 2], None,
                [8, 8, 8], [0, 256, 0, 256, 0, 256]
            )


            hist_gen = cv2.normalize(hist_gen, hist_gen).flatten()
            hist_gt = cv2.normalize(hist_gt, hist_gt).flatten()


            correlation = cv2.compareHist(
                hist_gen.reshape(-1, 1),
                hist_gt.reshape(-1, 1),
                cv2.HISTCMP_CORREL
            )

            return float(correlation)

        except Exception as e:
            print(f"⚠️  Color similarity computation failed: {e}")
            return 0.0



    def evaluate_single(
        self,
        generated: Image.Image,
        ground_truth: Image.Image,
        weights: Optional[Dict[str, float]] = None,
        verbose: bool = False
    ) -> Dict[str, float]:

        if verbose:
            print("Evaluating...", end=" ")

        results = {}


        if weights is None:
            weights = {
                'lpips': 0.50,
                'ssim': 0.30,
                'clip': 0.0,
                'color': 0.20
            }

        if verbose:
            print("LPIPS...", end=" ")
        results['lpips_similarity'] = self.compute_lpips(generated, ground_truth)


        if verbose:
            print("SSIM...", end=" ")
        results['ssim'] = self.compute_ssim(generated, ground_truth)

        if verbose:
            print("PSNR...", end=" ")
        results['psnr'] = self.compute_psnr(generated, ground_truth)


        if self.clip_model is not None:
            if verbose:
                print("CLIP...", end=" ")
            results['clip_similarity'] = self.compute_clip_similarity(
                generated, ground_truth
            )


        if verbose:
            print("Color...", end=" ")
        results['color_similarity'] = self.compute_color_similarity(
            generated, ground_truth
        )


        combined = 0.0
        total_weight = 0.0

        for metric, weight in weights.items():
            metric_key = f'{metric}_similarity' if metric != 'ssim' else metric

            if metric_key in results:
                value = results[metric_key]


                if metric == 'psnr':
                    value = min(1.0, max(0.0, (value - 20) / 20))

                combined += value * weight
                total_weight += weight

        results['combined_score'] = combined / total_weight if total_weight > 0 else 0.0

        if verbose:
            print(f"Done! Score: {results['combined_score']:.4f}")

        return results


In [3]:

    def evaluate_batch(
        self,
        generated_images: List[Image.Image],
        ground_truth: Image.Image,
        model_names: Optional[List[str]] = None,
        verbose: bool = True
    ) -> List[Dict[str, any]]:

        if model_names is None:
            model_names = [f"Model_{i+1}" for i in range(len(generated_images))]

        results = []

        for i, (gen_img, model_name) in enumerate(zip(generated_images, model_names)):
            if verbose:
                print(f"\n[{i+1}/{len(generated_images)}] {model_name}: ", end="")

            scores = self.evaluate_single(gen_img, ground_truth, verbose=verbose)
            scores['model_name'] = model_name
            scores['model_index'] = i

            results.append(scores)

        return results

    def select_best(
        self,
        generated_images: List[Image.Image],
        ground_truth: Image.Image,
        model_names: Optional[List[str]] = None,
        metric: str = 'combined_score'
    ) -> Tuple[int, Dict[str, any]]:
        """
        Выбирает лучшую генерацию

        Returns:
            (best_index, best_scores)
        """
        results = self.evaluate_batch(generated_images, ground_truth, model_names)

        best_idx = max(range(len(results)), key=lambda i: results[i].get(metric, 0))

        return best_idx, results[best_idx]



In [6]:
evaluator = ReferenceBasedEvaluator(
        use_lpips=True,
        use_ssim=True,
        use_clip=False,
        lpips_net='alex'
)


ground_truth = Image.open('/content/ground_truth.jpg')
generated = Image.open('/content/model_output.jpg')

scores = evaluator.evaluate_single(generated, ground_truth, verbose=True)
print(f"Combined Score: {scores['combined_score']:.4f}")
print(f"LPIPS: {scores['lpips_similarity']:.4f}")
print(f"SSIM: {scores['ssim']:.4f}")


Initializing Reference-Based Evaluator
Device: cpu
LPIPS: True
SSIM: True
CLIP: False

Loading LPIPS (alex)...
 Evaluator initialized
Evaluating... LPIPS... SSIM... PSNR... Color... Done! Score: 0.6427
Combined Score: 0.6427
LPIPS: 0.6753
SSIM: 0.6417


In [7]:
evaluator = ReferenceBasedEvaluator(
        use_lpips=True,
        use_ssim=True,
        use_clip=True,
        lpips_net='alex'
)


ground_truth = Image.open('/content/ground_truth.jpg')
generated = Image.open('/content/model_output.jpg')

scores = evaluator.evaluate_single(generated, ground_truth, verbose=True)
print(f"Combined Score: {scores['combined_score']:.4f}")
print(f"LPIPS: {scores['lpips_similarity']:.4f}")
print(f"SSIM: {scores['ssim']:.4f}")


Initializing Reference-Based Evaluator
Device: cpu
LPIPS: True
SSIM: True
CLIP: True

Loading LPIPS (alex)...
 Evaluator initialized
Evaluating... LPIPS... SSIM... PSNR... Color... Done! Score: 0.6427
Combined Score: 0.6427
LPIPS: 0.6753
SSIM: 0.6417
