In [1]:
%pip install -r requirements.txt
dbutils.library.restartPython()

Collecting git+https://github.com/BlitzBricksterYY-db/ultralytics.git@main (from -r requirements.txt (line 3))
  Cloning https://github.com/BlitzBricksterYY-db/ultralytics.git (to revision main) to /private/var/folders/mc/phm326xs6zq_qh7j4ffqc72m0000gp/T/pip-req-build-eq3k_7x4
  Running command git clone --filter=blob:none --quiet https://github.com/BlitzBricksterYY-db/ultralytics.git /private/var/folders/mc/phm326xs6zq_qh7j4ffqc72m0000gp/T/pip-req-build-eq3k_7x4
  Resolved https://github.com/BlitzBricksterYY-db/ultralytics.git to commit e53def376017f77fb6704ba194b4b2ea034988d4
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Note: you may need to restart the kernel to use updated packages.


In [12]:
import yaml 
import os 

In [13]:
# Load config
with open('ws_config.yaml', 'r') as file:
    config = yaml.safe_load(file)

# Load environment vars and unity catalog vars 
env_vars = config['environment_variables']
unity_catalog_vars = config['unity_catalog']

In [14]:
# Set ENV vars using yaml config 
os.environ["CUDA_LAUNCH_BLOCKING"] = env_vars['CUDA_LAUNCH_BLOCKING']
os.environ['DATABRICKS_HOST'] = env_vars['DATABRICKS_HOST']
os.environ['DATABRICKS_WORKSPACE_ID'] = env_vars['DATABRICKS_WORKSPACE_ID']

# Get UC vars from config 
catalog_name = unity_catalog_vars['catalog']
schema_name = unity_catalog_vars['schema']
volume_name = unity_catalog_vars['volume']

# Get Databricks secrets information
databricks_token_scope = env_vars['DATABRICKS_TOKEN']['scope']
databricks_token_key = env_vars['DATABRICKS_TOKEN']['key']

# Set Databricks token using Databricks secrets
os.environ['DATABRICKS_TOKEN'] = db_token = dbutils.secrets.get(scope=databricks_token_scope, key=databricks_token_key)

In [15]:
# Create assets if not exist

try:
    spark.sql(f"CREATE CATALOG IF NOT EXISTS {catalog_name}")
except Exception as e:
    if "quota" in str(e).lower():
        print("Quota limit reached for catalog creation.")
        print(f"Full error: {str(e)} \n")
    else:
        raise e

# Quota limits affecting the catalog, but schema and volume should work fine. 

try: 
    spark.sql(f"CREATE SCHEMA IF NOT EXISTS {catalog_name}.{schema_name}")
    spark.sql(f"CREATE VOLUME IF NOT EXISTS {catalog_name}.{schema_name}.{volume_name}")
    print(f"Successfully created schema: {schema_name}, and volume: {volume_name} in catalog: {catalog_name}")
except Exception as e:
    raise e

Successfully created schema: computer_vision, and volume: yolo_data in catalog: will_smith


In [20]:
# Use dbutils.fs.mkdirs instead of os.makedirs to create directories in DBFS
# due to limitations with Databricks Connect
# otherwise may receive: PermissionError: [Errno 13] Permission denied

# Config project structure directory
project_path = f"/Volumes/{catalog_name}/{schema_name}/{volume_name}/"

# Create project structure
training_path = f"{project_path}/training_runs/'"
result_path = f"{project_path}/training_results/"
data_path = f'{project_path}/data/'
raw_model_path = f'{project_path}/raw_model/'

# for cache related to ultralytics
os.environ['ULTRALYTICS_CACHE_DIR'] = raw_model_path

dbutils.fs.mkdirs(training_path)
dbutils.fs.mkdirs(result_path)
dbutils.fs.mkdirs(data_path)
dbutils.fs.mkdirs(raw_model_path)

dbutils.fs.ls(project_path)

[FileInfo(path='/Volumes/will_smith/computer_vision/yolo_data/data/', name='', size=None, modificationTime=None),
 FileInfo(path='/Volumes/will_smith/computer_vision/yolo_data/raw_model/', name='', size=None, modificationTime=None),
 FileInfo(path='/Volumes/will_smith/computer_vision/yolo_data/training_results/', name='', size=None, modificationTime=None),
 FileInfo(path='/Volumes/will_smith/computer_vision/yolo_data/training_runs/', name='', size=None, modificationTime=None)]

### Optional DBFS-based code:

##### More "traditional" way, setup folder under DBFS.
##### dbfs_project_location = '/dbfs/FileStore/cv_project_location/yolo/'
```python
dbfs_project_location = '/dbfs/tmp/cv_project_location/yolo/'
os.makedirs(dbfs_project_location, exist_ok=True)
```

##### ephemeral /tmp/ project location on VM
```python
tmp_project_location = "/tmp/training_results/"
os.makedirs(tmp_project_location, exist_ok=True)
```