First, let's ensure we have the proper dependencies installed with pip by running the following code.

In [None]:
pip install jigsawstack python-dotenv

Next, grab your API key and set it in `.env`. You can create a new API key at https://jigsawstack.com/dashboard

You can do this manually or use the following Python script to do it for you.

In [5]:
file_path = ".env"
api_key = input("Enter your API key here.")
set_api_line = f"JIGSAWSTACK_API_KEY='{api_key}'\n"
 
# Open the file in write mode
with open(file_path, 'w') as file:
    # Write content to the file
    file.write(set_api_line)

We can now call the vOCR API to analyze some images and create some text identifiers to help us categorize these images.

In [None]:
import json
from jigsawstack import JigsawStack
from dotenv import load_dotenv

load_dotenv()

jigsawstack= JigsawStack() # API key will be read from environment thanks to dotenv.

# The URL of the image to analyze
url = "https://images.unsplash.com/photo-1719129271869-7ea84f6335c0?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxleHBsb3JlLWZlZWR8NHx8fGVufDB8fHx8fA%3D%3D"

result = jigsawstack.vision.vocr({"url": url, "prompt" : "Describe the image in detail"});

# Print the JSON result prettier
print(json.dumps(result, indent=4))

We can store this data inside of a dictionary, using the describing words as the key, and appending the URL of the image as a value.

In [None]:
tag_dict = {}

def populate_dict(response, url):
    for tag in response["tags"]:
        # If tag exists, append the url to the array
        if tag in tag_dict:
            # Ensure no duplicates are added
            if url not in tag_dict[tag]:
                tag_dict[tag].append(url)
        # Otherwise, create a new entry
        else:
            tag_dict[tag] = [url]

populate_dict(result, url)
print(json.dumps(tag_dict, indent=4))


Now we'll turn this process into an easily callable function and repeat with a few more images to populate our database.

In [None]:
# Make a helper function to easily combine these actions together
def analyze_image(url):
    result = jigsawstack.vision.vocr({"url": url, "prompt" : "Describe the image in detail"});
    populate_dict(result, url)

analyze_image("https://images.unsplash.com/photo-1719240286501-dfd3f531b919?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxleHBsb3JlLWZlZWR8OHx8fGVufDB8fHx8fA%3D%3D")
analyze_image("https://images.unsplash.com/photo-1719307893825-f652cc5316e4?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxleHBsb3JlLWZlZWR8MTJ8fHxlbnwwfHx8fHw%3D")
analyze_image("https://images.unsplash.com/photo-1719115131591-eddecb64ca8b?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxleHBsb3JlLWZlZWR8MzF8fHxlbnwwfHx8fHw%3D")
analyze_image("https://images.unsplash.com/photo-1718965107564-daa12e777876?q=80&w=3087&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D")
analyze_image("https://images.unsplash.com/photo-1719115067961-439b3e5fcc78?q=80&w=2953&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D")

print(json.dumps(tag_dict, indent=4))

Now, we can use this data in a few ways. One cool way is to input a new image, and return the top 5 images we have stored in our database based on matching key words.

In [None]:
url = "https://images.unsplash.com/photo-1718963927757-6ebadd0600d6?w=900&auto=format&fit=crop&q=60&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxleHBsb3JlLWZlZWR8NjF8fHxlbnwwfHx8fHw%3D"

def find_matches(url):
    base_image = jigsawstack.vision.vocr({"url": url, "prompt" : "Describe the image in detail"});
    matching_dict = {}

    # Iterate over each tag
    for tag in base_image["tags"]:
        # If the tag exists, find the URLs
        if tag in tag_dict:
            for url in tag_dict[tag]:
                # We want to add the URLs to the list and keep track of the number of tags in common they have
                if url in matching_dict:
                    matching_dict[url] += 1
                else:
                    matching_dict[url] = 1
    
    # Use sorting lambda to print them in order of most to least matches
    most_matches = dict(sorted(matching_dict.items(), key=lambda item: item[1], reverse=True))
    print(json.dumps(most_matches, indent=4))

find_matches(url)

And there you have it! A sorted list of the images based on matching tags. This is only the beginning, basic sample