# S3 📂

In [None]:
%reload_ext autoreload
%autoreload 2

import common.constants as constants
import common.aws.s3 as s3_utils



def save_training_tif_to_s3(tif_path, suffix, job_name):
    
    file_name = f'{job_name}_{suffix}.tif'
    object_key = f'training/{job_name}/{file_name}'
    href = f'https://data.smartcarte.earth/{object_key}'
    
    print(f'uploading {tif_path} to s3://{constants.S3_DATA_BUCKET}/{object_key}')
    s3_utils.put_item(tif_path, constants.S3_DATA_BUCKET, object_key)
    
    return href


In [None]:
import glob
import os
import shelve


state_path = "./data/trainingState"
with shelve.open(state_path) as state:
    jobs_paths = glob.glob("./data/20*")
    for path in jobs_paths:
        job_name = path.split('/')[-1]

        composite_tif_path = f'{path}/composite.tif'
        rgb_tif_path = f'{path}/rgb_byte.tif'

        composite_cog_href = save_training_tif_to_s3(composite_tif_path, "composite_float32_cog", job_name)        
        rgb_cog_href = save_training_tif_to_s3(rgb_tif_path, "rgb_byte_cog", job_name)
        
        state[job_name] = {
            'all_cog_href': composite_cog_href,
            'rgb_cog_href': rgb_cog_href
        }

        
        
print("done uploading TIFs to S3...")

uploading ./data/20200101_20200301_central/composite.tif to s3://smartcarte-data/training/20200101_20200301_central/20200101_20200301_central_composite_float32_cog.tif
uploading ./data/20200101_20200301_central/rgb_byte.tif to s3://smartcarte-data/training/20200101_20200301_central/20200101_20200301_central_rgb_byte_cog.tif
uploading ./data/20200101_20200301_south/composite.tif to s3://smartcarte-data/training/20200101_20200301_south/20200101_20200301_south_composite_float32_cog.tif
uploading ./data/20200101_20200301_south/rgb_byte.tif to s3://smartcarte-data/training/20200101_20200301_south/20200101_20200301_south_rgb_byte_cog.tif
uploading ./data/20200401_20200601_central/composite.tif to s3://smartcarte-data/training/20200401_20200601_central/20200401_20200601_central_composite_float32_cog.tif
uploading ./data/20200401_20200601_central/rgb_byte.tif to s3://smartcarte-data/training/20200401_20200601_central/20200401_20200601_central_rgb_byte_cog.tif
uploading ./data/20200401_20200601

# Labelbox 📝

In [1]:
%%capture
%pip install labelbox

import os

try:
    from common.sagemaker_env import LABELBOX_API_KEY
except: 
    LABELBOX_API_KEY = os.environ['LABELBOX_API_KEY']


In [16]:
import datetime
import glob
import json
from labelbox import Client, Dataset, DataRow
import os
import shelve
from uuid import uuid4


client = Client(api_key=LABELBOX_API_KEY)
project = client.get_project("clcrkcvn9281j07xybmww69pn")

today = datetime.datetime.today().strftime('%Y%m%d')

central_dataset_name = f"{today} Central Sector"
southern_dataset_name = f"{today} Southern Sector"

central_dataset = client.get_datasets(where=(Dataset.name==central_dataset_name)).get_one()
if central_dataset is None:
    central_dataset = client.create_dataset(name=central_dataset_name)
else:
    raise ValueError('central dataset already exists')

southern_dataset = client.get_datasets(where=(Dataset.name==southern_dataset_name)).get_one()
if southern_dataset is None:
    southern_dataset = client.create_dataset(name=southern_dataset_name)
else:
    raise ValueError('southern dataset already exists')

central_payload, southern_payload = [], []
        
state_path = "./data/trainingState"
with shelve.open(state_path) as state:

    for job_name in state.keys():
        sector = job_name.split('_')[-1]
                
        row =   {
            "min_zoom": 10,
            "max_zoom": 16,
            "tile_layer_url": state[job_name]["rgb_cog_href"]
        }
        
        if sector == "central":
            central_payload.append(row)
        elif sector == "south":
            southern_payload.append(row)
        else:
            raise ValueError("unknown sector: " + sector)
       

    
central_payload = [{DataRow.row_data: row} for row in central_payload]
southern_payload = [{DataRow.row_data: row} for row in southern_payload]

central_task = central_dataset.create_data_rows(central_payload)
central_task.wait_till_done()
print('central done')

southern_task = southern_dataset.create_data_rows(southern_payload)
southern_task.wait_till_done()
print('southern done')
        

central done
southern done


In [17]:
%%bash

gdalinfo ./data/20200101_20200301_south/rgb_byte.tif

Driver: GTiff/GeoTIFF
Files: ./data/20200101_20200301_south/rgb_byte.tif
Size is 5265, 4283
Coordinate System is:
GEOGCRS["WGS 84",
    DATUM["World Geodetic System 1984",
        ELLIPSOID["WGS 84",6378137,298.257223563,
            LENGTHUNIT["metre",1]]],
    PRIMEM["Greenwich",0,
        ANGLEUNIT["degree",0.0174532925199433]],
    CS[ellipsoidal,2],
        AXIS["geodetic latitude (Lat)",north,
            ORDER[1],
            ANGLEUNIT["degree",0.0174532925199433]],
        AXIS["geodetic longitude (Lon)",east,
            ORDER[2],
            ANGLEUNIT["degree",0.0174532925199433]],
    ID["EPSG",4326]]
Data axis to CRS axis mapping: 2,1
Origin = (29.296298000000000,-1.197520000000000)
Pixel Size = (0.000089837606838,-0.000089830259164)
Metadata:
  AREA_OR_POINT=Area
Image Structure Metadata:
  INTERLEAVE=PIXEL
Corner Coordinates:
Upper Left  (  29.2962980,  -1.1975200) ( 29d17'46.67"E,  1d11'51.07"S)
Lower Left  (  29.2962980,  -1.5822630) ( 29d17'46.67"E,  1d34'56.15"S)
Uppe