In [1]:
import io

import numpy as np
import ee
import cv2

In [2]:
# get test data

PROJECT_ID='dgena-ee-training' #@param {type:"string"}

ee.Authenticate()
ee.Initialize(project=PROJECT_ID)

In [3]:
def get_image_patch(image, center, size, scale=None):
  # Make a projection to discover the scale in degrees.
  proj = image.projection().getInfo()

  # Get scales out of the transform.
  scale_x = proj['transform'][0]
  scale_y = -proj['transform'][4]

  if scale is not None:
    scale_x = scale
    scale_y = scale

  # Transform coordinates from lon/lat to local coordinate system
  center = ee.Geometry.Point(center)
  center = center.transform(proj['crs']).getInfo()

  # Define request for computePixels
  request = {
      'expression': image,
      'fileFormat': 'npy',
      'bandIds': ['R', 'G', 'B'],
      'grid': {
          'dimensions': {
              'width': size,
              'height': size
          },
          'affineTransform': {
              'scaleX': scale_x,
              'shearX': 0,
              'translateX': center['coordinates'][0],
              'shearY': 0,
              'scaleY': scale_y,
              'translateY': center['coordinates'][1]
          },
          'crsCode': proj['crs'],
      }
  }

  v = ee.data.computePixels(request)
  v = np.load(io.BytesIO(v))
  v = np.dstack([v['R'], v['G'], v['B']])

  return v

In [4]:
# initialize vision (image to text) model
import vertexai
from vertexai.preview.vision_models import Image, ImageTextModel

def print_caption(input_file):

  vertexai.init(project=PROJECT_ID, location="us-central1")

  MODEL_ID = "imagetext@001" # @param {type:"string"}
  model = ImageTextModel.from_pretrained("imagetext@001") # PaLI
  source_img = Image.load_from_file(location=input_file)

  captions = model.get_captions(
      image=source_img,
      # Optional parameters
      language="en",
      number_of_results=1,
  )

  print(captions)

In [68]:
# initialize a smarter model
from vertexai.generative_models import GenerativeModel, GenerationConfig

MODEL_ID = "gemini-2.0-flash-exp" # @param {type:"string"}
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel(MODEL_ID)

# Set model parameters
generation_config = GenerationConfig(
    temperature=0.2,
    top_p=1.0,
    top_k=32,
    candidate_count=1,
    max_output_tokens=8192,
)

def print_answer(image_file, prompt):
  contents = [image_file, prompt]
  response = model.generate_content(contents, generation_config=generation_config)

  print(response.text)

In [84]:
# https://code.earthengine.google.com/b12ec447360ff8300ddae52086d2d6da

coords = [-77.35704518094273, 34.70350144108098]

image = ee.ImageCollection("USDA/NAIP/DOQQ").filterDate('2022', '2024').filterBounds(ee.Geometry.Point(coords)).first().resample('bicubic')

size = 320

patch = get_image_patch(image, coords, size, 0.25)
patch = patch[::-1, :, :].copy()

cv2.imwrite('patch_test1.png', patch[:, :, ::-1])

patch

In [85]:
# good
print_caption('patch_test1.png')

['an aerial view of three tennis courts and a basketball court']


In [86]:
# bad
print_answer('patch_test1.png', 'What do you see in this aerial image?')

Okay, I've analyzed the aerial image you sent (patch_test1.png). Here's what I see:

**Overall Impression:**

The image appears to be an aerial view of a **primarily agricultural area**. It's a patchwork of fields with varying colors and textures, suggesting different crops or stages of growth.

**Specific Details:**

*   **Fields:** The dominant feature is the presence of numerous rectangular and irregularly shaped fields. These fields are the main focus of the image.
*   **Color Variation:** There's a noticeable variation in color across the fields. Some are a vibrant green, likely indicating healthy, growing crops. Others are lighter, possibly tan or brown, which could suggest harvested fields, fallow land, or different types of vegetation. There are also some darker patches which could be soil or different crops.
*   **Field Boundaries:** The fields are clearly delineated by lines, which could be roads, paths, or natural boundaries like hedgerows or drainage ditches.
*   **Texture:

In [87]:
coords = [7.8975600621706965, 47.308244883568]

image = ee.ImageCollection("Switzerland/SWISSIMAGE/orthos/10cm").filterBounds(ee.Geometry.Point(coords)).first()

size = 320

patch = get_image_patch(image, coords, size)
patch = patch[::-1, :, :].copy()

cv2.imwrite('patch_test2.png', patch[:, :, ::-1])

patch

In [88]:
print_caption('patch_test2.png')

['an aerial view of a house with a swimming pool']


In [89]:
print_answer('patch_test2.png', 'How many chimneys are on the roof of this building?')

Based on the image you sent, there are **two** chimneys visible on the roof of the building.


In [90]:
coords = [-77.35179341569157, 34.70511797290014]

image = ee.ImageCollection("USDA/NAIP/DOQQ").filterDate('2022', '2024').filterBounds(ee.Geometry.Point(coords)).first().resample('bicubic')

size = 128

patch = get_image_patch(image, coords, size, 0.25)
patch = patch[::-1, :, :].copy()

cv2.imwrite('patch_test3.png', patch[:, :, ::-1])

patch

In [91]:
print_caption('patch_test3.png')

['an aerial view of a house with a shadow on the ground']


In [97]:
# good - bad - good - bad, results seem to be unstable
print_answer('patch_test3.png', 'Does the tree in the image significantly overlaps with the roof of the building?')

Based on the image you sent, **patch_test3.png**, the tree does **not** significantly overlap with the roof of the building. 

While there are some branches that appear to be close to the roofline, they don't seem to be directly on top of or heavily intertwined with the roof. There's a clear separation between the tree's foliage and the building's roof.

Therefore, the answer is **no**, the tree does not significantly overlap with the roof.



In [98]:
coords = [-77.35079908301183, 34.70400309272154]

image = ee.ImageCollection("USDA/NAIP/DOQQ").filterDate('2022', '2024').filterBounds(ee.Geometry.Point(coords)).first().resample('bicubic')

size = 170

patch = get_image_patch(image, coords, size, 0.25)
patch = patch[::-1, :, :].copy()

cv2.imwrite('patch_test4.png', patch[:, :, ::-1])

patch

In [99]:
print_caption('patch_test4.png')

['an aerial view of a house with a tree in front of it']


In [100]:
print_answer('patch_test4.png', 'Does the tree in the image significantly overlaps with the roof of the building?')

Based on the image you sent, **yes, the tree does significantly overlap with the roof of the building.** 

The branches and foliage of the tree extend over a considerable portion of the roof, creating a clear overlap.

