# Setup Notebook

In [1]:
#@markdown import libraries
import os
import sys
import json
import logging
from typing import List
import ipywidgets as widgets
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

import sqlalchemy as sqla
from sqlalchemy import create_engine
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base

# Setup Cloud Database

In [2]:
from google.colab import auth

auth.authenticate_user()

# Setup Google Cloud Project

In [3]:
#@markdown Please enter your GCP Project ID
project_id = "locust-early-warning" #@param {type:"string"}
assert project_id, "Please enter your Google Project ID to continue"
!gcloud config set project {project_id}

Updated property [core/project].


In [4]:
#@markdown Set IAM policy binding
user_account = !gcloud auth list --filter=status:ACTIVE --format="value(account)"
print("Active User Account: ", user_account[0])

!gcloud projects add-iam-policy-binding {project_id} \
  --member=user:{user_account[0]} \
  --role="roles/cloudsql.client"

Active User Account:  k.panford-quainoo@instadeep.com
bindings:
- members:
- members:
  - serviceAccount:service-835848499535@gcp-sa-aiplatform-cc.iam.gserviceaccount.com
  role: roles/aiplatform.customCodeServiceAgent
- members:
  - serviceAccount:service-835848499535@gcp-sa-aiplatform.iam.gserviceaccount.com
  role: roles/aiplatform.serviceAgent
- members:
  - user:a.pretorius@instadeep.com
  - user:ababiker@google.com
  - user:hectoramoah@google.com
  - user:i.yusuf@instadeep.com
  - user:jyh@google.com
  - user:m.moussa@instadeep.com
  - user:perrynelson@google.com
  - user:s.morjane@instadeep.com
  - user:t.tumiel@instadeep.com
  role: roles/aiplatform.tensorboardWebAppUser
- members:
  role: roles/aiplatform.user
- members:
  - serviceAccount:service-835848499535@gcp-sa-artifactregistry.iam.gserviceaccount.com
  role: roles/artifactregistry.serviceAgent
- members:
  - serviceAccount:835848499535@cloudbuild.gserviceaccount.com
  role: roles/cloudbuild.builds.builder
- members:
  -

In [5]:
#@title Connect to Cloud SQL Instance

#@markdown Please fill in the both the Google Cloud region and name of your Cloud SQL instance. Once filled in, run the cell.

# Please fill in these values.
region = "us-central1" #@param {type:"string"}
instance_name = "my-test-db" #@param {type:"string"}

assert region, "Please enter a Google Cloud region"
assert instance_name, "Please enter the name of your instance"

# check if Cloud SQL instance exists in the provided region
database_version = !gcloud sql instances describe {instance_name} --format="value(databaseVersion)"
if database_version[0].startswith("POSTGRES"):
  print("Existing Postgres Cloud SQL Instance found!")
else:
  print("No existing Cloud SQL instance found!")


Existing Postgres Cloud SQL Instance found!


In [6]:
instance_connection_name = f"{project_id}:{region}:{instance_name}"
print("Instance Connection Name: ", instance_connection_name)



# Connect to Database

In [7]:
#@title Install Cloud SQL connector
!{sys.executable} -m pip install -q cloud-sql-python-connector["pg8000"]

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/189.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━[0m [32m184.3/189.2 kB[0m [31m6.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m189.2/189.2 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.2/54.2 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 kB[0m [31m11.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires google-auth==2.27.0, but you have google-auth 2.29.0 which is incompatible.[0m[31m
[0m

In [8]:
#@markdown Database Manager
import logging
from sqlalchemy import create_engine, exc, inspect
from sqlalchemy.orm import sessionmaker, scoped_session
from google.cloud.sql.connector import Connector

class ExternalDatabaseManager:
  def __init__(self, username: str, password: str, db_name: str, instance_connection_name: str):
    self.username = username
    self.password = password
    self.db_name = db_name
    self.instance_connection_name = instance_connection_name
    self._engine = None
    self._session = None

  def setup(self):
    try:
      connector = Connector()
      conn = connector.connect(
          self.instance_connection_name,
          "pg8000",
          user=self.username,
          password=self.password,
          db=self.db_name
      )
      self._engine = create_engine("postgresql+pg8000://", creator=lambda: conn)
      Session = sessionmaker(bind=self._engine)
      self._session = scoped_session(Session)
      logging.info("Database Manager Initialized")
    except Exception as e:
      logging.error(f"Error setting up database: {e}")

  def get_session(self):
    return self._session

  def list_tables(self):
    inspector = inspect(self._engine)
    tables = inspector.get_table_names()
    return tables

  def load_table_by_tablename(self, tablename: str):
    Base = automap_base()
    Base.prepare(autoload_with=self._engine)
    return Base.classes.get(tablename)

  # def get_project_configs(self):
  #   Base = automap_base()
  #   Base.prepare(self._engine, reflect=True)
  #   project_configurations = Base.classes.ProjectConfigurations
  #   return project_configurations

In [12]:
#@markdown Enter name of Cloud SQL database to connect to
database_name = "ann-otate" #@param {type: "string"}
assert database_name, "Please enter a name for the database to be created"

#@markdown Enter username to access Cloud SQL Database
username = "postgres" #@param {type: "string"}
assert username, "Please enter a valid username"

#@markdown Enter password to access Cloud SQL Database
password = "postgres" #@param {type:"string"}

auth_configs = {'username': username,
                'password': password,
                'db_name': database_name,
                'instance_connection_name': instance_connection_name
                }

extern_database_manager = ExternalDatabaseManager(**auth_configs)
extern_database_manager.setup()
extern_session = extern_database_manager.get_session()

# Setup Project Configs

In [13]:
extern_database_manager.list_tables()

['project_configurations',
 'examples',
 'annotations',
 'annotators',
 'assigned_annotators']

In [14]:
project_configs_table = extern_database_manager.load_table_by_tablename('project_configurations')
configs = extern_session.query(project_configs_table).first()

In [15]:
project_title = configs.project_title
completion_deadline = configs.completion_deadline
bucket_name = configs.cloud_bucket_name
bucket_prefix = configs.cloud_bucket_prefix
comma_separated_labels = configs.comma_separated_labels
max_annotators_per_example = configs.max_annotation_per_example
creation_date = configs.created_at

print(f'Global Project Configurations\n{"*" * 26} \n' +
      f'Project Title = {project_title!r}\n' +
      f'Cloud Bucket Name = {bucket_name!r}\n' +
      f'Cloud Bucket Prefix = {bucket_prefix!r}\n' +
      f'Label Options = {comma_separated_labels!r}\n' +
      f'Max Annotation Per Example = {max_annotators_per_example!r}\n' +
      f'Expected Completion Deadline = {completion_deadline!r}\n' +
      f'Project Creation Date = {creation_date!r}')

Global Project Configurations
************************** 
Project Title = 'SKAI Data Annotation'
Cloud Bucket Name = 'skai-project'
Cloud Bucket Prefix = 'skai-source_storage'
Label Options = 'no_damage, destroyed, major_damage, minor_damage, None of the above'
Max Annotation Per Example = 3
Expected Completion Deadline = datetime.datetime(2024, 4, 30, 0, 0)
Project Creation Date = datetime.datetime(2024, 4, 15, 14, 26, 8, 745105)


In [16]:
def preprocess_labels(labels_string: str):
  labels = labels_string.split(",")
  processed_labels = []

  for label in labels:
    label = label.strip(" ")
    if label:
      processed_labels.append(label)
  return processed_labels

config_project_labels = preprocess_labels(comma_separated_labels)

In [17]:
#@title Set Local Project Configs
#@markdown Enter local storage path
local_images_path = "images" #@param {type: "string"}
assert local_images_path, "Please enter a pathname where images to be labeled are stored"

# Retrieve Assigned Tasks

In [18]:
extern_examples_table = extern_database_manager.load_table_by_tablename('examples')
examples = extern_session.query(extern_examples_table).all()

In [19]:
my_assigned_examples = examples[:10]
len(my_assigned_examples)

10

# Internal Database

In [None]:
#@markdown
from sqlalchemy import Column
from sqlalchemy import Integer
from sqlalchemy import String
from sqlalchemy import ForeignKey
from sqlalchemy.orm import declarative_base

# Define Base class
SQLiteBase = declarative_base()

class Example(SQLiteBase):
  __tablename__ = 'examples_table'

  id = Column(Integer, primary_key=True, autoincrement=True)
  example_id = Column(String(255), nullable=False, unique=True)
  image_path = Column(String(255), nullable=False)
  image_filename = Column(String(255), nullable=False)
  label = Column(String(255))

  def __repr__(self) -> str:
    return (f'Example(example_id={self.example_id!r},' +
            f'image_path={self.image_path!r},' +
            f'image_name={self.image_filename!r},' +
            f'label={self.label}'
            )

# Internal Database Manager


In [23]:
#@markdown
from typing import List
import sqlalchemy as sqla
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.orm import declarative_base

class InternalDatabaseManager:
  def __init__(self, db_path: str, **kwargs: dict) -> None:
    self.db_path = db_path
    self.kwargs = kwargs
    self._engine = None
    self._session = None

  def setup(self) -> None:
    """Initialize the database."""
    self._engine = sqla.create_engine(f'sqlite:///{self.db_path}', **self.kwargs)
    SQLiteBase.metadata.bind = self._engine
    SQLiteBase.metadata.create_all(self._engine)

    session_factory = sessionmaker(autocommit=False, autoflush=False, bind=self._engine)
    self._session = scoped_session(session_factory)

    SQLiteBase.query = self._session.query_property()
    print("Database Manager Initialized")

  def status(self) -> None:
    """Check Database Status."""
    if self._engine is None:
        raise ValueError("Database Manager not initialized.")

  def drop_db(self) -> None:
    """Drop the database."""
    engine = self._engine
    SQLiteBase.metadata.drop_all(engine)

  def get_session(self) -> sqla.orm.session.Session:
    """Get the database session."""
    return self._session

# Internal Task Manager

In [44]:
#@markdown
from google.cloud import storage
class InternalTaskManager:
  def __init__(self, assigned_examples, db_configs):
    self.assigned_examples = assigned_examples
    self.db_configs = db_configs
    self._database_manager = None
    self.session = None

  def init(self):
    self._database_manager = InternalDatabaseManager(**self.db_configs)
    self._database_manager.setup()

  def get_session(self):
    return self._database_manager.get_session()

  def get_remote_image_paths(self):
    return [example.image_path for example in self.assigned_examples]

  def download_assigned_images(
    self,
    bucket_name: str,
    bucket_prefix: str,
    output_path: str
  ) -> None:
    if not os.path.exists(output_path):
      os.makedirs(output_path)

    client = storage.Client()
    bucket = client.bucket(bucket_name)

    for example in self.assigned_examples:
      image_path = example.image_path
      image_filename = image_path.split('/')[-1]
      blob = bucket.blob(os.path.join(bucket_prefix, image_filename))
      image_filepath = os.path.join(output_path, image_filename)

      try:
        blob.download_to_filename(image_filepath)
        example.image_filename = image_filename
      except Exception as e:
        print(f"Error downloading image: {e}")

      self.add_example_to_database(example_id=example.example_id,
                    image_path=example.image_path,
                    image_filename=image_filename
                    )

  def get_all_examples(self) -> List[Example]:
    """Get all examples from the database."""
    session = self.get_session()
    examples = session.query(Example).all()
    return examples

  def get_example_by_id(self, id: int):
    """Get an example by its ID."""
    session = self.get_session()
    example = session.query(Example).filter(Example.example_id == id).scalar()
    return example

  def add_example_to_database(self, example_id, image_path, image_filename, label=None) -> None:
    """Add examples to the database."""
    session = self.get_session()
    existing_example = session.query(Example).filter_by(example_id=str(example_id)).first()

    if not existing_example:
      example = Example(example_id=example_id, image_path=image_path, image_filename=image_filename)
      if label:
        example.label = label
      session.add(example)
    else:
      session.rollback()
      print(f"Example with ID {example_id} already exists.")
    session.commit()

  def update_label(self, image_filename, new_label) -> None:
    """Update the label of an example."""
    session = self.get_session()
    example = session.query(Example).filter(Example.image_filename == image_filename).first()
    if example:
      example.label = new_label
      session.commit()
      logging.info(f"Label updated for Image {image_filename}")
    else:
      logging.error(f"No example found with name {image_filename}")

  def get_label(self, image_filename):
    session = self.get_session()
    example = session.query(Example).filter(Example.image_filename == image_filename).first()
    return example.label if not None else None

  def update_example(self, example: Example) -> None:
    """Update an example."""
    session = self.get_session()
    session.add(example)
    session.commit()

  def delete_example(self, example: Example) -> None:
    """Delete an example."""
    session = self.get_session()
    session.delete(example)
    session.commit()

In [45]:
db_configs = {'db_path': 'sqlite.db'}
intern_task_manager = InternalTaskManager(my_assigned_examples, db_configs)
intern_task_manager.init()

Database Manager Initialized


In [40]:
intern_task_manager.download_assigned_images(bucket_name, bucket_prefix, local_images_path)

Example with ID 4ec2f03040470970a07931e1e0167225 already exists.
Example with ID dce01ef4824ed6098cd2936e872d4297 already exists.
Example with ID 6e4a1da94c1e7c72315b437606fa77e1 already exists.
Example with ID c33377fe1c83dd9ddf2dd9cf9f83c481 already exists.
Example with ID 6bedaea56bf9ffd4aff3783f084ea2f8 already exists.
Example with ID 3f2799b9a90acb00ab130946898d157b already exists.
Example with ID 6eb7d3e7140621c19c81ff6ce81c8d07 already exists.
Example with ID d8691e96ae7d0a6b1d09ba533a3857a8 already exists.
Example with ID 1bbad5bae6241cf6846923c022d1f3a7 already exists.
Example with ID e41693b91f189f22cfaf3a1466faf166 already exists.


In [41]:
intern_task_manager.get_all_examples()

[Example(example_id='4ec2f03040470970a07931e1e0167225',image_path='gs://skai-project/skai-source_storage/4ec2f03040470970a07931e1e0167225.png',image_name='4ec2f03040470970a07931e1e0167225.png',label=None,
 Example(example_id='dce01ef4824ed6098cd2936e872d4297',image_path='gs://skai-project/skai-source_storage/dce01ef4824ed6098cd2936e872d4297.png',image_name='dce01ef4824ed6098cd2936e872d4297.png',label=None,
 Example(example_id='6e4a1da94c1e7c72315b437606fa77e1',image_path='gs://skai-project/skai-source_storage/6e4a1da94c1e7c72315b437606fa77e1.png',image_name='6e4a1da94c1e7c72315b437606fa77e1.png',label=None,
 Example(example_id='c33377fe1c83dd9ddf2dd9cf9f83c481',image_path='gs://skai-project/skai-source_storage/c33377fe1c83dd9ddf2dd9cf9f83c481.png',image_name='c33377fe1c83dd9ddf2dd9cf9f83c481.png',label=None,
 Example(example_id='6bedaea56bf9ffd4aff3783f084ea2f8',image_path='gs://skai-project/skai-source_storage/6bedaea56bf9ffd4aff3783f084ea2f8.png',image_name='6bedaea56bf9ffd4aff3783f0

In [42]:
intern_task_manager.get_remote_image_paths()

['gs://skai-project/skai-source_storage/4ec2f03040470970a07931e1e0167225.png',
 'gs://skai-project/skai-source_storage/dce01ef4824ed6098cd2936e872d4297.png',
 'gs://skai-project/skai-source_storage/6e4a1da94c1e7c72315b437606fa77e1.png',
 'gs://skai-project/skai-source_storage/c33377fe1c83dd9ddf2dd9cf9f83c481.png',
 'gs://skai-project/skai-source_storage/6bedaea56bf9ffd4aff3783f084ea2f8.png',
 'gs://skai-project/skai-source_storage/3f2799b9a90acb00ab130946898d157b.png',
 'gs://skai-project/skai-source_storage/6eb7d3e7140621c19c81ff6ce81c8d07.png',
 'gs://skai-project/skai-source_storage/d8691e96ae7d0a6b1d09ba533a3857a8.png',
 'gs://skai-project/skai-source_storage/1bbad5bae6241cf6846923c022d1f3a7.png',
 'gs://skai-project/skai-source_storage/e41693b91f189f22cfaf3a1466faf166.png']

In [46]:
intern_task_manager.get_label('4ec2f03040470970a07931e1e0167225.png')

# Annotations Display UI

In [47]:
#@markdown
from collections import defaultdict
from ipywidgets import Layout
from ipywidgets import IntSlider
from ipywidgets import FloatSlider
from ipywidgets import Button
from ipywidgets import RadioButtons
from ipywidgets import VBox
from ipywidgets import HBox
from ipywidgets import interact
import matplotlib.pyplot as plt
from PIL import Image

def update_label(selected_label):
  if selected_label['new'] != default_image_label:
    image_filename = images[image_index.value].split('/')[-1]
    intern_task_manager.update_label(image_filename=image_filename, new_label=selected_label['new'])

def load_images(local_images_path):
    return [f'{local_images_path}/{image}' for image in os.listdir(local_images_path)]

def display_image(image_index, size):
    image_path = images[image_index]
    plt.figure(figsize=(size, size))
    image = Image.open(image_path)
    plt.imshow(image)
    plt.axis('off')
    plt.show()

def get_current_label(index):
  image_filename = images[index].split('/')[-1]
  label_radio_buttons.value = intern_task_manager.get_label(image_filename)

def on_next_button_click(b):
    if image_index.value < len(images) - 1:
        image_index.value += 1
    get_current_label(image_index.value)

def on_previous_button_click(b):
    if image_index.value > 0:
        image_index.value -= 1
    get_current_label(image_index.value)

# Define default values and widgets
default_image_label = 'unlabeled'
example_id_to_label = defaultdict(lambda: default_image_label)
local_images_path = 'images'
images = load_images(local_images_path)
image_index = IntSlider(min=0, max=len(images)-1, step=1, value=0)
size_slider = FloatSlider(value=10.0, min=1.0, max=20.0, step=0.1, description='Image Size:', continuous_update=False)
next_button = Button(description="Next")
previous_button = Button(description="Previous")
label_radio_buttons = RadioButtons(value=None, options=config_project_labels, layout={'width': 'max-content'}, description='String Label: \n', disabled=False)
export_annotations = Button(value=False, description='Export All Annotations', disabled=False, button_style='success', tooltip='Description', icon='check', layout=Layout(flex_flow='column'))

# Define widget interactions
next_button.on_click(on_next_button_click)
previous_button.on_click(on_previous_button_click)
label_radio_buttons.observe(update_label, names='value')

# Display UI
interact(display_image, image_index=image_index, size=size_slider)
navigation_buttons = HBox([previous_button, next_button])
ui_elements = HBox([label_radio_buttons, VBox([navigation_buttons])])
display(VBox([ui_elements, export_annotations]))


interactive(children=(IntSlider(value=0, description='image_index', max=9), FloatSlider(value=10.0, continuous…

VBox(children=(HBox(children=(RadioButtons(description='String Label: \n', layout=Layout(width='max-content'),…

In [None]:
# TODO: Get actual assigned tasks
#TODO: Export Annotations