# S3 📂

In [19]:
%reload_ext autoreload
%autoreload 2

import common.constants as constants
import common.aws.s3 as s3_utils



def save_training_tif_to_s3(tif_path, suffix, job_name):
    
    file_name = f'{job_name}_{suffix}.tif'
    object_key = f'training/{job_name}/{file_name}'
    href = f'https://data.smartcarte.earth/{object_key}'
    
    print(f'uploading {tif_path} to s3://{constants.S3_DATA_BUCKET}/{object_key}')
    s3_utils.put_item(tif_path, constants.S3_DATA_BUCKET, object_key)
    
    return href


In [13]:
import glob
import os
import shelve


state_path = "./data/trainingState"
with shelve.open(state_path) as state:
    jobs_paths = glob.glob("./data/20*")
    for path in jobs_paths:
        job_name = path.split('/')[-1]

        all_tif_path = f'{path}/all_uint16_cog.tif'
        rgb_tif_path = f'{path}/rgb_byte_cog.tif'

        all_cog_href = save_training_tif_to_s3(all_tif_path, "all_uint16_cog", job_name)        
        rgb_cog_href = save_training_tif_to_s3(rgb_tif_path, "rgb_byte_cog", job_name)
        
        state[job_name] = {
            'all_cog_href': all_cog_href,
            'rgb_cog_href': rgb_cog_href
        }

        
        
print("done uploading TIFs to S3...")

uploading ./data/20200701_20200901_central/all_uint16_cog.tif to s3://smartcarte-data/training/20200701_20200901_central/20200701_20200901_central_all_uint16_cog.tif
uploading ./data/20200701_20200901_central/rgb_byte_cog.tif to s3://smartcarte-data/training/20200701_20200901_central/20200701_20200901_central_rgb_byte_cog.tif
uploading ./data/20211001_20211201_south/all_uint16_cog.tif to s3://smartcarte-data/training/20211001_20211201_south/20211001_20211201_south_all_uint16_cog.tif
uploading ./data/20211001_20211201_south/rgb_byte_cog.tif to s3://smartcarte-data/training/20211001_20211201_south/20211001_20211201_south_rgb_byte_cog.tif
uploading ./data/20210101_20210301_south/all_uint16_cog.tif to s3://smartcarte-data/training/20210101_20210301_south/20210101_20210301_south_all_uint16_cog.tif
uploading ./data/20210101_20210301_south/rgb_byte_cog.tif to s3://smartcarte-data/training/20210101_20210301_south/20210101_20210301_south_rgb_byte_cog.tif
uploading ./data/20220101_20220301_centr

# Labelbox 📝

In [21]:
%%capture
%pip install labelbox

from common.sagemaker_env import LABELBOX_API_KEY


In [43]:
import datetime
import json
from labelbox import Client, Dataset, DataRow
import os
import shelve
from uuid import uuid4


client = Client(api_key=LABELBOX_API_KEY)
project = client.get_project("clcrkcvn9281j07xybmww69pn")

today = datetime.datetime.today().strftime('%Y%m%d')

central_dataset_name = f"{today} Central Sector"
southern_dataset_name = f"{today} Southern Sector"

central_dataset = client.get_datasets(where=(Dataset.name==central_dataset_name)).get_one()
if central_dataset is None:
    central_dataset = client.create_dataset(name=central_dataset_name)
else:
    raise ValueError('central dataset already exists')
    
southern_dataset = client.get_datasets(where=(Dataset.name==southern_dataset_name)).get_one()
if southern_dataset is None:
    southern_dataset = client.create_dataset(name=southern_dataset_name)
else:
    raise ValueError('southern dataset already exists')

central_payload, southern_payload = [], []

state_path = "./data/trainingState"
with shelve.open(state_path) as state:

    for job_name in state.keys():
        sector = job_name.split('_')[-1]
                
        row =   {
            "min_zoom": 10,
            "max_zoom": 16,
            "tile_layer_url": state[job_name]["rgb_cog_href"]
        }
        
        if sector == "central":
            central_payload.append(row)
        elif sector == "south":
            southern_payload.append(row)
        else:
            raise ValueError("Unknown sector: " + sector)
       

central_payload = [{DataRow.row_data: row} for row in central_payload]
southern_payload = [{DataRow.row_data: row} for row in southern_payload]

central_task = central_dataset.create_data_rows(central_payload)
central_task.wait_till_done()
print('central done')

southern_task = southern_dataset.create_data_rows(southern_payload)
southern_task.wait_till_done()
print('southern done')
        

ValueError: central dataset already exists