<a href="https://colab.research.google.com/github/besimorhino/PPT_Conversion/blob/main/image_vectorize_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Cell 1: Install dependencies
!pip install -q torchvision matplotlib scikit-learn pillow

In [None]:
# Cell 2: Imports
import torch
import torchvision.transforms as transforms
import torchvision.models as models
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from PIL import Image, UnidentifiedImageError
import requests
from io import BytesIO
import numpy as np

In [None]:
# Cell 3: Load pretrained model (ResNet50, remove final classification layer)
model = models.resnet50(pretrained=True)
model.eval()
feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])



In [None]:
# Cell 4: Define image transform
transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [None]:
# Cell 5: Load some images (you can modify or upload your own, jpg only though)
# note: some sites may prevent your workbook from accessing
image_urls = [
    "https://upload.wikimedia.org/wikipedia/commons/b/b6/Felis_catus-cat_on_snow.jpg",             # cat
    "https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg",                               # another cat
    "https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Viceroy_Butterfly.jpg/960px-Viceroy_Butterfly.jpg" # butterfly
]

In [None]:
# Cell 6: Load and process images
import os
import tempfile

images = []
tensors = []

for url in image_urls:
    try:
        response = requests.get(url)
        response.raise_for_status() # Raise an exception for bad status codes

        # Save to a temporary file to help diagnose issues
        with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
            tmp_file.write(response.content)
            tmp_file_path = tmp_file.name

        try:
            img = Image.open(tmp_file_path).convert('RGB')
            images.append(img)
            tensors.append(transform(img))
            print(f"Successfully loaded image from URL: {url}")
        finally:
            # Clean up the temporary file
            os.unlink(tmp_file_path)

    except UnidentifiedImageError:
        print(f"Could not identify image format from URL: {url}. The downloaded content might not be a valid image or in a supported format.")
    except requests.exceptions.RequestException as e:
        print(f"Error fetching image from URL: {url}: {e}")
    except Exception as e:
        print(f"An unexpected error occurred while processing {url}: {e}")

if not tensors:
    print("No images were successfully loaded. Please ensure the URLs are correct and point to valid image files.")
else:
    batch = torch.stack(tensors)

Error fetching image from URL: https://upload.wikimedia.org/wikipedia/commons/b/b6/Felis_catus-cat_on_snow.jpg: 403 Client Error: Forbidden for url: https://upload.wikimedia.org/wikipedia/commons/b/b6/Felis_catus-cat_on_snow.jpg
Error fetching image from URL: https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg: 403 Client Error: Forbidden for url: https://upload.wikimedia.org/wikipedia/commons/3/3a/Cat03.jpg
Error fetching image from URL: https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Viceroy_Butterfly.jpg/960px-Viceroy_Butterfly.jpg: 403 Client Error: Forbidden for url: https://upload.wikimedia.org/wikipedia/commons/thumb/5/55/Viceroy_Butterfly.jpg/960px-Viceroy_Butterfly.jpg
No images were successfully loaded. Please ensure the URLs are correct and point to valid image files.


# checkpoint!
If you get a message that no images were successfully loaded, go back and verify they are accessible.

In [None]:
# Cell 7: Extract vectors
with torch.no_grad():
    features = feature_extractor(batch).squeeze()

vectors = features.numpy()


NameError: name 'batch' is not defined

In [None]:
# Cell 8: Cosine similarity matrix
similarities = cosine_similarity(vectors)
import pandas as pd
df = pd.DataFrame(similarities)
df.style.background_gradient(cmap='Blues')


In [None]:
# Cell 9: Visualize in 2D
pca = PCA(n_components=2)
reduced = pca.fit_transform(vectors)

plt.figure(figsize=(10, 6))
for i, (x, y) in enumerate(reduced):
    plt.scatter(x, y)
    #plt.text(x + 1, y + 1, "Image {i+1}\", fontsize=9)
plt.title("Image Embeddings in 2D Space (via PCA)")
plt.grid(True)
plt.show()


In [None]:
# Cell 10: Show images side-by-side
plt.figure(figsize=(15, 3))
for i, img in enumerate(images):
    plt.subplot(1, len(images), i+1)
    plt.imshow(img)
    plt.axis('off')
    plt.title("Image {i+1}")
plt.show()
