# Structured extraction over NFL games (and upload to airtable)

In this tutorial, we'll build a flow for 1) extracting info from screenshots of NFL games and 2) uploading that info to a database we will create on Airtable.

In [14]:
# Install dependencies if needed
!pip install requests Pillow torch-nos



In [3]:
# Set up VLM-1 API.

import os
import requests


VLM_BASE_URL = "https://vlm-dev.nos.run/v1"
response = requests.get(f"{VLM_BASE_URL}/health")
response.raise_for_status()
assert response.status_code == 200

VLM_API_KEY = os.getenv("VLM_API_KEY", None)
if VLM_API_KEY is None:
    VLM_API_KEY = input()
print(f"Using API key: {VLM_API_KEY[:4]}...")

headers = {
    "Content-Type": "application/json",
    "X-API-Key": VLM_API_KEY,
}
response = requests.get(f"{VLM_BASE_URL}/models", headers=headers)
response.raise_for_status()

Using API key: sk-t...


In [4]:
# %load ../vlm_tools/image.py
import json
from base64 import b64encode
from io import BytesIO
from typing import Literal, Union

import requests
from PIL import Image


def pprint(data):
    print(json.dumps(data, indent=2))


def encode_image(image: Image.Image, format: Literal["PNG", "JPEG"] = "PNG") -> Union[str, bytes]:
    """Convert an image to a base64 string."""
    buffered = BytesIO()
    image_format = image.format or format
    image.save(buffered, format=image_format)
    img_str = b64encode(buffered.getvalue()).decode()
    return f"data:image/{image_format.lower()};base64,{img_str}"


def download_image(url: str) -> Image.Image:
    """Download an image from a URL."""
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:106.0) Gecko/20100101 Firefox/106.0",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
    }
    bytes = BytesIO(requests.get(url, headers=headers).content)
    bytes.seek(0)
    return Image.open(bytes).convert("RGB")


In [5]:
from pathlib import Path
from typing import Union

from PIL import Image
from IPython.display import display


def vlm(image: Image.Image, domain: str):
    """Send an image to the VLM API."""
    data = {
        "model": "vlm-1",
        "domain": domain,
        "image": encode_image(image),
    }
    response = requests.post(f"{VLM_BASE_URL}/generate", headers=headers, json=data)
    response.raise_for_status()
    return response.json()

In [10]:
# Init our airtable API
import os
from pyairtable import Api

airtable_app_id = os.getenv("AIRTABLE_APP_ID")
airtable_table_name = os.getenv("AIRTABLE_TABLE_NAME")
airtable_auth = os.getenv("AIRTABLE_AUTH_KEY")
print('init airtable with auth key: ', airtable_auth)
airtable_api = Api(api_key=airtable_auth)

# check that we have a base app
base = airtable_api.bases()[0]
print(base.tables()[0].all())

def update_airtable(reponse, airtable_api, table_name):
    # Create the table if it doesn't already exist
    base = airtable_api.bases()[0]
    fields = [] # The schema for the airtable, these are the field names and descriptions that we will populate.
    new_entry = {}
    # Due to limitations in Airtable, we currently only support string entries, so we'll skip recursive fields in this schema
    # and convert any ints to strs.
    for field in reponse.keys():
        if not isinstance(response[field], dict) and not isinstance(response[field], list) and response[field] is not None:
            fields.append({"description": f"Description for {field}", "name": field, "type": "singleLineText"})
            new_entry[field] = str(response[field])
        else:
            print(f"Skipping field {field} because it is a dict or list")

    try:
        print('creating table with fields: ', fields)
        table = base.create_table(table_name, fields)
    except Exception:
        table = base.table(table_name)
        schema_fields= table.schema().fields
        # only update fields that already exist in the table if we didn't just create it:
        table_fields = [field.name for field in schema_fields]
        new_entry = {k: v for k, v in new_entry.items() if k in table_fields}

    print(new_entry)
    table.create(new_entry)



init airtable with auth key:  pat0X1VzA8RSsCf9h.679e5ecd9da6ca6d76e2b97bf112b53b39f98ab9b1de19d9fbf74a766155c135
base: <Base id='app6mbowPIs3A75BO' name='NFL Play Breakdowns' permission_level='create'>
[{'id': 'recLsHbpBl1CKLy0H', 'createdTime': '2024-03-12T06:45:09.000Z', 'fields': {'record_id': '4', 'image': [{'id': 'attH1K7JpgdomYHxj', 'width': 420, 'height': 236, 'url': 'https://v5.airtableusercontent.com/v3/u/27/27/1711749600000/111zpGqblIUwjoy8le1PpA/Jmv8UeW_vf-8k2oLM3IqmN76Aa69VekhFe8wx46hMdHQhGxt_1nma2ylUg0Dm3yX2Yp2NNOmW5MLtAVhGbnh2qc-BnPvxTOfqEDJV-UegjW9RKe0sIs_dTHesPqyyhGw9UrB2RQB_vNfKRr_-R1LSQ/TZ1tQOtTr9DhDtRQxnfOnRnXNnDPRCh_HnJpkZO67_w', 'filename': 'sf_kc.webp', 'size': 16550, 'type': 'image/webp', 'thumbnails': {'small': {'url': 'https://v5.airtableusercontent.com/v3/u/27/27/1711749600000/odbAfoSuLpjPcyQxn-cB8g/hlvv3CZ-8TPXu3mOzJkXfjLKqUB1i8_y36x9ygiocp3deRFjy4e-8jKL0U0SIqSxAaFSWEpBNi_ty-ejJDO7fIlPz7XjqblSitJlDIWxe2iXgdWd0gPjgpq6VQquTuA2blfSG3R7-l6eS9EkbfQc3g/wywos26um9QA

In [11]:
# Process and visualize a few urls:
URLS = [
    "https://static.www.nfl.com/image/private/t_editorial_landscape_12_desktop/league/osfac1nvcxkjdfa5yhtn",
    "https://i.insider.com/5e24c8ba62fa813a7d022fe3?width=1200&format=jpeg",
    "https://pbs.twimg.com/amplify_video_thumb/1485009987269128195/img/JIIA0Cea5o1LpkHZ.jpg:large",
    "https://static.www.nfl.com/image/private/t_editorial_landscape_12_desktop/league/d33yyhuped2bmobifqoy",
    "https://static.www.nfl.com/image/upload/t_editorial_landscape_12_desktop/league/hid31t8byzxfpz0vtak4",
    "https://cdn-images-1.medium.com/max/800/1*eluBF0MOTAqNCSc4vCEXmQ.png",
    "https://cdn.geekwire.com/wp-content/uploads/2017/12/Screen-Shot-2017-09-28-at-5.50.14-PM-1260x700-630x350.png",
    "https://pbs.twimg.com/amplify_video_thumb/1738699907484065792/img/s-pzq7wLBByXvsuX.jpg",
]

frames = (download_image(_url) for _url in URLS)

for frame in frames:
    response = vlm(frame, domain="sports.nfl")
    pprint(response)
    print(response.keys())
    update_airtable(response, airtable_api, 'nfl_frames_annotated')

{
  "cls_name": "SportsNFLGameState",
  "description": "The Dallas Cowboys are playing against the Washington Redskins. The Cowboys are leading with a score of 17 to 13. It is the 3rd quarter with 2 minutes and 27 seconds remaining on the clock. The Redskins have the ball on the 2nd down with 9 yards to go.",
  "teams": [
    {
      "name": "Washington Redskins",
      "score": 13
    },
    {
      "name": "Dallas Cowboys",
      "score": 17
    }
  ],
  "status": "in_progress",
  "quarter": 3,
  "clock_time": "2:27",
  "possession_team": "Washington Redskins",
  "down": "2nd",
  "distance": 9,
  "yard_line": null,
  "network": null,
  "is_shown": true
}
dict_keys(['cls_name', 'description', 'teams', 'status', 'quarter', 'clock_time', 'possession_team', 'down', 'distance', 'yard_line', 'network', 'is_shown'])
field type:  <class 'str'> field:  cls_name
set field to string:  SportsNFLGameState
field type:  <class 'str'> field:  description
set field to string:  The Dallas Cowboys are 