In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
#!unzip '/content/drive/My Drive/523_pipeline/GAN/ground_truth.zip' -d '/content/drive/My Drive/523_pipeline/GAN/ground_truth'
#!unzip '/content/drive/My Drive/523_pipeline/GAN/generated.zip' -d '/content/drive/My Drive/523_pipeline/GAN/generated'
#!unzip '/content/drive/My Drive/523_pipeline/GAN/description.zip' -d '/content/drive/My Drive/523_pipeline/GAN/text'

In [2]:
generated_dir = '/content/drive/My Drive/523_pipeline/GAN/generated'
ground_truth_dir = '/content/drive/My Drive/523_pipeline/GAN/ground_truth'
text_dir = '/content/drive/My Drive/523_pipeline/GAN/text'

In [3]:
len(generated_dir)

50

In [5]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import os

# Load a pre-trained ResNet model
model = models.resnet50(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define a transform to preprocess the image
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Function to load an image and transform it
def load_image(image_path):
    image = Image.open(image_path).convert('RGB')  # Convert image to RGB
    image = transform(image).unsqueeze(0)  # Add batch dimension
    return image

# Function to extract features using the PyTorch model
def extract_features(input_image, model):
    with torch.no_grad():  # Ensure no gradients are calculated
        features = model(input_image)
    return features

# Initialize the list to store the similarity score for each ground truth image
scores = []

# Loop through each ground truth image
for i in range(572):
    gt_img_path = os.path.join(ground_truth_dir, f'ground_truth_{i}.png')
    gen_img_path = os.path.join(generated_dir, f'generated_{i}.png')

    # Load and process both images
    gt_image_tensor = load_image(gt_img_path)
    gen_image_tensor = load_image(gen_img_path)

    # Extract features
    gt_features = extract_features(gt_image_tensor, model)
    gen_features = extract_features(gen_image_tensor, model)

    # Calculate cosine similarity
    similarity = torch.nn.functional.cosine_similarity(gen_features, gt_features, dim=1).item()

    # Store the similarity score
    scores.append(similarity)

# At this point, 'scores' contains the similarity scores for each corresponding pair of images
print(scores)



Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:01<00:00, 64.5MB/s]


[0.5231679677963257, 0.003889988176524639, 0.24673445522785187, 0.5102072358131409, 0.2770373523235321, 0.23656520247459412, 0.1549530178308487, 0.3769156038761139, 0.3274129629135132, 0.2860913574695587, 0.4865958094596863, 0.7063395380973816, 0.39870768785476685, 0.28962621092796326, 0.13690388202667236, 0.3443750739097595, 0.4811020791530609, 0.5986828804016113, 0.3728475272655487, 0.35326141119003296, 0.29513630270957947, 0.2151656448841095, 0.3163531422615051, 0.3201267719268799, 0.544203519821167, 0.5209226608276367, 0.11017825454473495, 0.03433166816830635, 0.3593931794166565, 0.2044898271560669, 0.30120179057121277, 0.3067903220653534, 0.3674444556236267, 0.3467615842819214, 0.5181602835655212, 0.3625611662864685, 0.4895329773426056, 0.6097838878631592, 0.11414984613656998, 0.36577001214027405, 0.5347099304199219, 0.2577883303165436, 0.07592221349477768, 0.3740089237689972, 0.3365347385406494, 0.36838433146476746, 0.36312538385391235, 0.3688637614250183, 0.3156375586986542, 0.5

In [6]:
len(scores)

572

In [7]:
average_score = sum(scores) / len(scores)
average_score

0.3640210438833956

In [9]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import os

# Initialize CLIP model and processor
model = CLIPModel.from_pretrained('openai/clip-vit-base-patch32')
processor = CLIPProcessor.from_pretrained('openai/clip-vit-base-patch32')

def calculate_text_image_correlation(image_path, text):
    image = Image.open(image_path).convert('RGB').resize((256, 256))
    inputs = processor(text=[text], images=image, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    similarity = torch.nn.functional.cosine_similarity(outputs.text_embeds, outputs.image_embeds, dim=1)
    return similarity.item()

# List to store the similarity score for each ground truth image and corresponding text
text_scores = []

# Assume 'text_dir' and 'generated_dir' are defined and have paths set up correctly
# Loop through each text file (302 total)
for i in range(572):
    text_path = os.path.join(text_dir, f'description_{i}.txt')
    gen_img_path = os.path.join(generated_dir, f'generated_{i}.png')

    # Read the text from the file
    with open(text_path, 'r') as file:
        text = file.read()

    # Calculate the similarity between the text and the corresponding single generated image
    similarity = calculate_text_image_correlation(gen_img_path, text)

    # Store the similarity score
    text_scores.append(similarity)

# Now 'text_scores' contains the similarity scores for each ground truth and its corresponding text
print(text_scores)


[0.1550765037536621, 0.16004034876823425, 0.16410629451274872, 0.15517334640026093, 0.19532428681850433, 0.22272467613220215, 0.23333968222141266, 0.1743948757648468, 0.23465080559253693, 0.2337178736925125, 0.2140197455883026, 0.2053217887878418, 0.18188832700252533, 0.184774711728096, 0.16123227775096893, 0.20737133920192719, 0.22550655901432037, 0.18162432312965393, 0.22136326134204865, 0.1421329230070114, 0.17789019644260406, 0.1794363558292389, 0.22706985473632812, 0.17944610118865967, 0.25326046347618103, 0.20169317722320557, 0.22196762263774872, 0.1455652415752411, 0.13280758261680603, 0.19286935031414032, 0.17443905770778656, 0.21472202241420746, 0.13304483890533447, 0.20997387170791626, 0.21450790762901306, 0.20489981770515442, 0.17398688197135925, 0.1904512345790863, 0.18359261751174927, 0.16579385101795197, 0.1772695928812027, 0.1748124361038208, 0.17514877021312714, 0.17008063197135925, 0.1471242755651474, 0.17823107540607452, 0.1506902426481247, 0.2242370992898941, 0.16497

In [10]:
average_text_score = sum(text_scores) / len(text_scores)
average_text_score

0.1900303592397408

In [11]:
len(text_scores)

572

In [12]:
import torchvision.models as models
import torchvision.transforms as transforms
from PIL import Image
import torch
import os

# Initialize the InceptionV3 model
model = models.inception_v3(pretrained=True)
model.eval()  # Set the model to evaluation mode

# Define image transformation
transform = transforms.Compose([
    transforms.Resize(299),  # Resize the image to fit the input size of the model
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# List of image file paths
image_files = sorted([os.path.join(generated_dir, f) for f in os.listdir(generated_dir) if f.endswith('.png')])

# List to store the confidence scores for each image
image_quality_score = []

# Process each image individually
for image_path in image_files:
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0)  # Add a batch dimension

    # Get model prediction and confidence
    with torch.no_grad():
        outputs = model(image)
        _, predicted = outputs.max(1)
        confidence = torch.nn.functional.softmax(outputs, dim=1)[0, predicted].item()

    # Store the confidence score
    image_quality_score.append(confidence)

# Print or return the list of confidence scores
print(image_quality_score)


Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 150MB/s] 


[0.06147686019539833, 0.13796567916870117, 0.08222496509552002, 0.11066687107086182, 0.571174144744873, 0.061188530176877975, 0.7855029702186584, 0.2759243845939636, 0.049164850264787674, 0.13165532052516937, 0.06727848947048187, 0.1888245940208435, 0.9992737174034119, 0.35118257999420166, 0.06944916397333145, 0.14991579949855804, 0.3324195444583893, 0.12153057754039764, 0.3657272458076477, 0.11805913597345352, 0.20507171750068665, 0.03908076137304306, 0.7044496536254883, 0.2838281989097595, 0.11001785844564438, 0.07697204500436783, 0.2740101218223572, 0.11749330163002014, 0.11325754970312119, 0.07874540239572525, 0.12278660386800766, 0.43315890431404114, 0.07955023646354675, 0.0533788800239563, 0.05021338537335396, 0.5387910008430481, 0.12774163484573364, 0.8359546065330505, 0.7529483437538147, 0.13757416605949402, 0.42686089873313904, 0.11691740900278091, 0.1154380738735199, 0.20045912265777588, 0.06146537885069847, 0.13903148472309113, 0.22046403586864471, 0.0623379610478878, 0.1345

In [14]:
average_text_score = sum(image_quality_score) / len(image_quality_score)
average_text_score

0.2244212239023324

In [13]:
len(image_quality_score)

572

In [10]:
import torch
from torchvision import models, transforms
from PIL import Image
import os
import numpy as np
from scipy.linalg import sqrtm

# Initialize InceptionV3 model
try:
    from torchvision.models import inception_v3, InceptionV3_Weights
    model = inception_v3(weights=InceptionV3_Weights.IMAGENET1K_V1).eval()
except ImportError:
    model = models.inception_v3(pretrained=True).eval()
model.fc = torch.nn.Identity()
model.cuda()

# Define the transformation
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

def extract_features(image_paths):
    features = []
    for image_path in image_paths:
        image = Image.open(image_path).convert('RGB')
        image = transform(image).unsqueeze(0).cuda()
        with torch.no_grad():
            feature = model(image).cpu().numpy()
        features.append(feature.squeeze())
    return np.array(features)

def calculate_fid(features1, features2, eps=1e-6):
    mu1, sigma1 = np.mean(features1, axis=0), np.cov(features1, rowvar=False)
    mu2, sigma2 = np.mean(features2, axis=0), np.cov(features2, rowvar=False)

    # Regularize the covariance matrices by adding a small value `eps` to the diagonals
    sigma1 += np.eye(sigma1.shape[0]) * eps
    sigma2 += np.eye(sigma2.shape[0]) * eps

    ssdiff = np.sum((mu1 - mu2) ** 2)
    try:
        covmean = sqrtm(sigma1.dot(sigma2))
        if np.iscomplexobj(covmean):
            covmean = covmean.real
    except Exception as e:
        print(f"Error computing sqrtm: {e}")
        return float('nan')  # Return NaN to indicate failure

    fid = ssdiff + np.trace(sigma1 + sigma2 - 2 * covmean)
    return fid

# Directories
generated_dir = '/content/drive/My Drive/523_pipeline/GAN/generated'
ground_truth_dir = '/content/drive/My Drive/523_pipeline/GAN/ground_truth'

# Get image paths
generated_image_paths = [os.path.join(generated_dir, f'generated_{i}.png') for i in range(572)]
ground_truth_image_paths = [os.path.join(ground_truth_dir, f'ground_truth_{i}.png') for i in range(572)]

# Extract features
generated_features = extract_features(generated_image_paths)
ground_truth_features = extract_features(ground_truth_image_paths)

# Calculate FID
fid_score = calculate_fid(generated_features, ground_truth_features)

# Output FID score
print("FID Score:", fid_score)


FID Score: 236.6387728478329
