In [None]:
import matplotlib.pyplot as plt
import torchvision
from torchvision.transforms import ToTensor

DATA_DIRECTORY = './data'  # Specify the directory where CIFAR-10 is stored

# Load CIFAR-10 dataset
cifar10_dataset = torchvision.datasets.CIFAR10(DATA_DIRECTORY, train=True, download=True)

# Create a dictionary to store the metadata for each CIFAR-10 class
class_metadata = {
    0: 'airplane',
    1: 'automobile',
    2: 'bird',
    3: 'cat',
    4: 'deer',
    5: 'dog',
    6: 'frog',
    7: 'horse',
    8: 'ship',
    9: 'truck'
}

def visualize_cifar10_images(dataset, start_index, num_images):
    fig, axes = plt.subplots(1, num_images, figsize=(12, 3))

    for i in range(num_images):
        index = start_index + i
        image, _ = dataset[index]
        class_label = class_metadata[dataset.targets[index]]

        tensor_image = ToTensor()(image)  # Convert PIL image to Torch Tensor

        axes[i].imshow(tensor_image.permute(1, 2, 0))  # Transpose tensor dimensions for visualization
        axes[i].set_title(f'ID: {index}, Class: {class_label}')
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Usage example:
start_index = 0  # Start index of the images to visualize
num_images = 5  # Number of images to display

visualize_cifar10_images(cifar10_dataset, start_index, num_images)

torch.cuda.is_available()


In [None]:
import os
import requests

import tqdm
import httpimport
import pinecone
import numpy as np
from PIL import Image

import torch

DATA_DIRECTORY = 'tmp'
INDEX_NAME = 'image-search'
INDEX_DIMENSION = 1000
BATCH_SIZE=200
datasets = {
    'CIFAR10': torchvision.datasets.CIFAR10(DATA_DIRECTORY, transform=h.preprocess, download=True),
    'CIFAR100': torchvision.datasets.CIFAR100(DATA_DIRECTORY, transform=h.preprocess, download=True)
}

combined_dataset = torch.utils.data.ConcatDataset(list(datasets.values()))
# Calculate the dimensionality of the combined dataset
sample = combined_dataset[0][0]
dimension = sample.numel()

print(f"Combined dataset dimension: {dimension}")
h.show_random_images_from_full_dataset(datasets['CIFAR100'])


In [None]:
model = torchvision.models.squeezenet1_1(pretrained=True).eval()
# authenticate with Pinecone API, keys and environment available at your project at https://app.pinecone.io
pinecone.init(h.pinecone_api_key, environment=’Your environment')
# if the index does not already exist, we create it
# if INDEX_NAME not in pinecone.list_indexes():
#     pinecone.create_index(name=INDEX_NAME, dimension=INDEX_DIMENSION)
# # instantiate connection to your Pinecone index
index = pinecone.Index('index')

def get_vector_ids(batch_number, batch_size, prefix):
    """Return vector ids."""
    start_index = batch_number * batch_size
    end_index = start_index + batch_size
    ids = np.arange(start_index, end_index)
    # create id based on prefix
    # eg. if id == 5, prefix == 'CIFAR10', then create 'CIFAR10.5' as vector id.
    ids_with_prefix = map(lambda x: f'{prefix}.{str(x)}', ids)
    return ids_with_prefix

def get_vector_metadata(label_indices, class_list):
    """Return list of {'label': <class name>}."""
    get_class_name = lambda index: {'label': class_list[index]}
    return map(get_class_name, label_indices)

def get_vectors_from_batch(preprocessed_data, label_indices, batch_number, dataset):
    """Return list of tuples like (vector_id, vector_values, vector_metadata)."""
    num_records = len(preprocessed_data)
    prefix = dataset.__class__.__name__
    with torch.no_grad():
        # generate image embeddings with PyTorch model
        vector_values = model(preprocessed_data).tolist()
    # return respective IDs/metadata for each image embedding
    vector_metadata = get_vector_metadata(label_indices, dataset.classes)
    vector_ids = get_vector_ids(batch_number, num_records, prefix)
    return list(zip(vector_ids, vector_values, vector_metadata))

dataset = datasets['CIFAR100']
list_of_preprocessed_tensors, label_indices = list(zip(*[dataset[i] for i in range(BATCH_SIZE)]))
preprocessed_data = torch.stack(list_of_preprocessed_tensors)
vectors = get_vectors_from_batch(preprocessed_data, label_indices, 0, dataset)
id_, embedding, metadata = vectors[123]
print(id_, embedding[:3], metadata, sep=', ')

def upsert_image_embeddings(dataset, pinecone_index, batch_size=BATCH_SIZE, num_rows=None):
    """Iterate through dataset, generate embeddings and upsert in batches to Pinecone index.

    Args:
     - dataset: a PyTorch Dataset
     - pinecone_index: your Pinecone index
     - batch_size: batch size
     - num_rows: Number of initial rows to use of dataset, use all rows if None.
    """
    if num_rows>len(dataset):
        raise ValueError(f'`num_rows` should not exceed length of dataset: {len(dataset)}')
    if num_rows:
        sampler = range(num_rows)
    else:
        sampler = None
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=BATCH_SIZE, sampler=sampler)
    tqdm_kwargs = h.get_tqdm_kwargs(dataloader)
    for batch_number, (data, label_indices) in tqdm.notebook.tqdm(enumerate(dataloader), tqdm_kwargs):
        vectors = get_vectors_from_batch(
            data,
            label_indices,
            batch_number,
            dataloader.dataset)
        pinecone_index.upsert(vectors)

for dataset in datasets.values():
    upsert_image_embeddings(dataset, index, num_rows=50_000)
url = 'https://cdn.britannica.com/40/109040-050-62EEDEA6/Male-white-tailed-deer.jpg'
r = requests.get(url, stream=True)
query_image = Image.open(r.raw)
h.printmd("#### A sample image")
query_image.resize((125,125))


In [None]:
query_embedding = model(h.preprocess(query_image).unsqueeze(0)).tolist()
response = index.query(query_embedding, top_k=4, include_metadata=True)
#h.printmd(f"#### A sample response from Pinecone \n ==============\n \n")
h.printmd(f"```python\n{response}\n```")

response = response

def visualize_images_with_ids(response, dataset):
    fig, axes = plt.subplots(1, len(response['matches']), figsize=(12, 3))

    for i, match in enumerate(response['matches']):
        image_id = int(match['id'].split('.')[1])
        image, _ = dataset[image_id]
        class_label = class_metadata[dataset.targets[image_id]]

        tensor_image = ToTensor()(image)  # Convert PIL image to Torch Tensor

        axes[i].imshow(tensor_image.permute(1, 2, 0))  # Transpose tensor dimensions for visualization
        axes[i].set_title(f'ID: {image_id}, Class: {class_label}')
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Usage example:
visualize_images_with_ids(response, cifar10_dataset)


In [None]:
url = 'https://t4.ftcdn.net/jpg/00/97/58/97/360_F_97589769_t45CqXyzjz0KXwoBZT9PRaWGHRk5hQqQ.jpg'
r = requests.get(url, stream=True)
query_image = Image.open(r.raw)
h.printmd("#### A sample image")
query_image.resize((125,125))


In [None]:

Figure 11.6: Image of a cat
query_embedding = model(h.preprocess(query_image).unsqueeze(0)).tolist()
response = index.query(query_embedding, top_k=4, include_metadata=True)
#h.printmd(f"#### A sample response from Pinecone \n ==============\n \n")
h.printmd(f"```python\n{response}\n```")

response = response

def visualize_images_with_ids(response, dataset):
    fig, axes = plt.subplots(1, len(response['matches']), figsize=(12, 3))

    for i, match in enumerate(response['matches']):
        image_id = int(match['id'].split('.')[1])
        image, _ = dataset[image_id]
        class_label = class_metadata[dataset.targets[image_id]]

        tensor_image = ToTensor()(image)  # Convert PIL image to Torch Tensor

        axes[i].imshow(tensor_image.permute(1, 2, 0))  # Transpose tensor dimensions for visualization
        axes[i].set_title(f'ID: {image_id}, Class: {class_label}')
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Usage example:
visualize_images_with_ids(response, cifar10_dataset)


In [None]:
url = 'https://t3.ftcdn.net/jpg/00/20/13/60/240_F_20136083_gk0ppzak6UdK9PcDRgPdLjcuAdo7o1LK.jpg'
r = requests.get(url, stream=True)
query_image = Image.open(r.raw)
h.printmd("#### A sample image")
query_image.resize((125,125))


In [None]:
query_embedding = model(h.preprocess(query_image).unsqueeze(0)).tolist()
response = index.query(query_embedding, top_k=4, include_metadata=True)
#h.printmd(f"#### A sample response from Pinecone \n ==============\n \n")
h.printmd(f"```python\n{response}\n```")
response = response

def visualize_images_with_ids(response, dataset):
    fig, axes = plt.subplots(1, len(response['matches']), figsize=(12, 3))

    for i, match in enumerate(response['matches']):
        image_id = int(match['id'].split('.')[1])
        image, _ = dataset[image_id]
        class_label = class_metadata[dataset.targets[image_id]]

        tensor_image = ToTensor()(image)  # Convert PIL image to Torch Tensor

        axes[i].imshow(tensor_image.permute(1, 2, 0))  # Transpose tensor dimensions for visualization
        axes[i].set_title(f'ID: {image_id}, Class: {class_label}')
        axes[i].axis('off')

    plt.tight_layout()
    plt.show()

# Usage example:
visualize_images_with_ids(response, cifar10_dataset)