# STAC Challenge automatized
<br><br>
All of this is made assuming that the structure of the folder would be: <br><br>

stac_challenge_folder / accra_1 / accra_1_drains.geojson <br><br> 
stac_challenge_folder / accra_1 / accra_1_roads.geojson <br><br> 
stac_challenge_folder / accra_1 / accra_1_buildings.geojson <br><br> 
stac_challenge_folder / accra_2 / accra_2_drains.geojson <br><br> 
...<br><br>

And that the links to the corresponding tiffiles would be listed in tiffiles (see below)

In [1]:
#import the required libraries:
from satstac import Catalog
from satstac import Collection
from satstac import Item
import rasterio
import shapely
import sys,os,os.path
import json

In [2]:
#Set up addresses:
stac_challenge_folder = './STAC_Challenge_3'
catalog_address = stac_challenge_folder + '/' + 'catalog.json'
tiffiles = {"accra_1": "https://oin-hotosm.s3.amazonaws.com/5b694a0f4b87366cc0f0fa70/0/5b694a0f4b8736ebfff0fa71.tif",
           "accra_2":"https://oin-hotosm.s3.amazonaws.com/5bb9323e9ed15b0006d24f33/0/5bb9323e9ed15b0006d24f34.tif",
           'monrovia_1':'https://oin-hotosm.s3.amazonaws.com/5c08c2ec6918390006b7a8a1/0/5c08c2ec6918390006b7a8a2.tif',
           'monrovia_2':'https://oin-hotosm.s3.amazonaws.com/5b83a514c8e197000a93403e/0/5b83a514c8e197000a93403f.tif'}

### Info for the root catalog (hence generic info about the challenge):

In [3]:
stac_version = "0.6.2"
catalog_id = 'challenge_catalog'
catalog_title = 'Challenge OpenML'
catalog_description = 'Data for the ML challenge, in the STAC format'
catalog_links = [ {
      "rel": "self",
      "href": '../'+catalog_address
    },
        {
      "rel": "root",
      "href": '../'+catalog_address
    },
]

### Generic Info for the collections (Info that will be in common for all of them)

In [4]:
license_col = "CC-BY-4.0" #open, proprietary?
version_col = "1.0"
providers_col = [
    {
      "name": "WB/OCA",
      "roles": [
        "producer",
        "licensor"
      ],
      "url": "https://opencitiesproject.org"
    }
  ]
basic_keywords_col = ["challenge", "world bank"]
extent_col = {
    "spatial": [
      -180.0,
      -56.0,
      180.0,
      83.0
    ],
    "temporal": [
      "2015-06-23T00:00:00Z",
        None
    ]
  }
id_collection_basic = "challenge_collection_"
basic_description_col = "The data available for the challenge formatted in a STAC collection for the "
basic_title_col = "Data for the challenge "

### Generic Info for the items (Info that will be in common for all of them)

In [5]:
type_item = "Feature"
properties_item = {
    "datetime": "2019-02-26T00:00:00Z",
    "td:title": "test",
    "td:description": "test",
    "td:label_type": "segmentation",
    "td:classes": [
      'buildings', 'drains', 'roads'
    ]
  }
basic_keywords_items = ["challenge", "world bank"]

## Create the Catalog


## Functions to create the collections



In [6]:
def get_tags_collection(foldername):
    key = basic_keywords_col.copy()
    key.append(foldername)
    extent_col = {
    "spatial": [
      -180.0,
      -56.0,
      180.0,
      83.0
    ],
    "temporal": [
      "2015-06-23T00:00:00Z",
        None
    ]
    }
    collection_address = collection_address = stac_challenge_folder +'/' +id_collection_basic+sur_folder+'/'+ 'catalog.json'
    links =[ {
          "rel": "self",
          "href": collection_address
        },
            {
          "rel": "root",
          "href": '../catalog.json'
        }]
    title = basic_title_col + foldername + ' s AoI'
    description = basic_description_col + foldername + ' AoI'
    id_collection = id_collection_basic + foldername
    return stac_version, id_collection, title, description, key, version_col, license_col, providers_col, extent_col, links

In [7]:
def create_corresponding_collection(stac_version, id_collection, title, description, keywords, version, license, providers, extent, links):
    # create a Collection object with JSON
    collection_json = {
    "stac_version": stac_version,
    "id": id_collection,
    "title": title ,
    "description": description,
    "collection version": version,
    "keywords":keywords,
    "license": license,
    "version": version,
    "providers":providers,
    "extent":extent,
    "links": links
    }
    col = Collection(collection_json)
    print(col.id)
    return col

## Functions to create the items

In [8]:
#get the appropriate extent for a geojson file:
def get_bbox(directory, filename):
    with open(directory + '/'+ filename, 'r') as f:
        data = json.load(f)
        c1 = [];
        c2 = [];
        if("Polygon" in data['features'][0]["geometry"]["type"]):
            for f in data["features"]:
                c_partial_1 = f["geometry"]["coordinates"]
                #print(c_partial_1)
                for c_partial_2 in c_partial_1:
                    for c_partial in c_partial_2:
                        for x in c_partial:
                            #print('x0:',x[0])
                            c1.append(x[0]) # first coordinate
                            c2.append(x[1])     
        else:
            for f in data["features"]:
                c_partial_1 = f["geometry"]["coordinates"]
                #print(c_partial_1)
                for c_partial in c_partial_1:
                    for x in c_partial:
                        #print('x0:',x[0])
                        c1.append(x[0]) # first coordinate
                        c2.append(x[1])
        m1_ = min(c1)
        m2_ = min(c2)
        M1_ = max(c1)
        M2_ = max(c2)
        #bbox = [[m1,m2],[m1,M2],[M1,M2],[M1,m2],[m1,m2]]
        bbox = [m1_,m2_,M1_,M2_]
    return bbox

In [9]:
#get the appropriate tags for the items
def get_tags_items(collection,surfoldername, foldername, filename, bbox):
    filename_split_1 = filename.split('.');
    filename_split = filename_split_1[0]
    id_item = filename_split
    links_item = [    {
      "rel": "self",
      "href": stac_challenge_folder+"/"+filename_split+"-item.json"
    },
    {
      "rel": "root",
      "href": '../../catalog.json'
    },]
    
    assets_item = {
    "raster": {
        "title": "image",
        "href": tiffiles[filename.split('_')[0]+'_'+filename.split('_')[1]],
        "type": "image/vnd.stac.geotiff; cloud-optimized=true"
    },
    "vector": {
      "title": filename_split,
      "href": '../../'+surfoldername +'/'+foldername+'/'+filename,
      "type": "application/geo+json"
    }
    }
    key = basic_keywords_items.copy()
    key.append(filename_split[0]+'_'+filename_split[1])
    key.append(filename_split[2])
    collection_address = stac_challenge_folder +'/' + filename
    directory = stac_challenge_folder + '/'+surfoldername+'/'+ foldername
    
    with open(directory + '/'+ filename, 'r') as f:
        data = json.load(f)
        geom = []
        for f in data["features"]:
            geom.append(f["geometry"]["coordinates"][0])
        if("Polygon" in data['features'][0]["geometry"]["type"]):    
            geometry_item = {
                   "type": "MultiPolygon",
                    "coordinates": geom
                 }
        else:
            geometry_item = {
                   "type": "MultiLineString",
                   "coordinates": geom
                 }
    bbox_item = bbox
    prop = properties_item;
    prop['collection']=filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    prop['td:title']=filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    prop['td:label_type']=filename_split.split('_')[2]
    prop['td:description']=filename_split.split('_')[2]+' for '+filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    return id_item, type_item, geometry_item, bbox_item, prop, links_item,assets_item

In [10]:
# create an Item object with JSON
def create_corresponding_item(id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item):
    item_json = {
        "id": id_item,
        "type": type_item,
        "geometry": geometry_item,
        "bbox": bbox_item,
        "properties": properties_item,
        "links": links_item,
        "assets":assets_item,
    }
    it= Item(item_json)
    return it

## Create the items and collections


In [11]:
stac_version = stac_version
catalog_id = 'challenge_catalog'
catalog_title = 'Challenge OpenML'
catalog_description = 'Data for the ML challenge, in the STAC format'
catalog_links = [ {
      "rel": "self",
      "href": '../'+catalog_address
    },
        {
      "rel": "root",
      "href": '../'+catalog_address
    },
]

In [12]:
catalog_json = {
    "stac_version": stac_version,
    "id": catalog_id,
    "title": catalog_title,
    "description": catalog_description,
    "links": catalog_links   
}
catalog = Catalog(catalog_json)
print(catalog.id)
# save as a root catalog
catalog.save_as(catalog_address)
print(catalog.id)
print(catalog.filename)
print(catalog.path)
catalog.data

challenge_catalog
challenge_catalog
./STAC_Challenge_3/catalog.json
./STAC_Challenge_3


{'stac_version': '0.6.2',
 'id': 'challenge_catalog',
 'title': 'Challenge OpenML',
 'description': 'Data for the ML challenge, in the STAC format',
 'links': [{'rel': 'self', 'href': '.././STAC_Challenge_3/catalog.json'},
  {'rel': 'root', 'href': '.././STAC_Challenge_3/catalog.json'}]}

In [13]:
for sur_folder in os.listdir(stac_challenge_folder):
    if(sur_folder!= '.DS_Store' and "json" not in sur_folder):
        stac_version, id_collection, title, description, keywords, version, license, providers, extent, links = get_tags_collection(sur_folder)
        collection = create_corresponding_collection(stac_version, id_collection, title, description, keywords, version, license, providers, extent, links)
        collection_address = stac_challenge_folder +'/' +id_collection_basic+sur_folder+'/'+ 'catalog.json'
        collection.save_as(collection_address)
        catalog.add_catalog(collection) 
        
        for folder in os.listdir(stac_challenge_folder+'/'+sur_folder):
            if(folder!= '.DS_Store' and "json" not in folder):
                print('folder:',folder)
                
                
                file = os.listdir(stac_challenge_folder+'/'+sur_folder+'/'+folder)[0]
                bbox = get_bbox(stac_challenge_folder+'/'+sur_folder + '/'+ folder, file)
                m1_col = bbox[0]
                m2_col = bbox[1]
                M1_col = bbox[2]
                M2_col = bbox[3]
                m1 = bbox[0]
                m2 = bbox[1]
                M1 = bbox[2]
                M2 = bbox[3]
                
                for file in os.listdir(stac_challenge_folder+'/'+sur_folder+'/'+folder):
                    print('file:',file)
                    bbox = get_bbox(stac_challenge_folder+'/'+sur_folder + '/'+ folder, file)
                    
                    m1 = bbox[0]
                    m2 = bbox[1]
                    M1 = bbox[2]
                    M2 = bbox[3]
                    
                    if(m1<m1_col):
                        m1_col = m1
                        
                    if(m2<m2_col):
                        m2_col = m2
                    
                    if(M1>M1_col):
                        M1_col = M1
                    
                    if(M2>M2_col):
                        M2_col = M2
                    
                    

                    
                    
                    id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item = get_tags_items(collection,sur_folder,folder, file, bbox)
                    item = create_corresponding_item(id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item)
                    item_address = stac_challenge_folder+'/' +id_collection_basic+sur_folder +'/' +folder+'/'+file.split('.')[0] + '-item.json'
                    item.save_as(item_address)
                    path = folder
                    filename = file.split('.')[0] + '-item'
                    item.save_as(item_address)
                    collection.add_item(item, path = path, filename = filename)
        bbox_col = [m1_col,m2_col,M1_col,M2_col]
        collection.data['extent']['spatial'] = bbox_col
        collection.save_as(collection_address) 
   
    
catalog.save_as(catalog_address)

challenge_collection_accra
folder: accra_1
file: accra_1_roads.geojson
file: accra_1_buildings.geojson
file: accra_1_drains.geojson
folder: accra_2
file: accra_2_roads.geojson
file: accra_2_drains.geojson
file: accra_2_buildings.geojson
challenge_collection_monrovia
folder: monrovia_2
file: monrovia_2_buildings.geojson
file: monrovia_2_drains.geojson
file: monrovia_2_roads.geojson
folder: monrovia_1
file: monrovia_1_drains.geojson
file: monrovia_1_buildings.geojson
file: monrovia_1_roads.geojson


challenge_catalog