# STAC Challenge automatized
<br><br>
All of this is made assuming that the structure of the folder would be: <br><br>

stac_challenge_folder / accra_1 / accra_1_drains.geojson <br><br> 
stac_challenge_folder / accra_1 / accra_1_roads.geojson <br><br> 
stac_challenge_folder / accra_1 / accra_1_buildings.geojson <br><br> 
stac_challenge_folder / accra_2 / accra_2_drains.geojson <br><br> 
...<br><br>

And that the links to the corresponding tiffiles would be listed in tiffiles (see below)

In [1]:
from satstac import Catalog
from satstac import Collection
from satstac import Item
import rasterio
import shapely
import sys,os,os.path
import json

In [2]:
#Where to store the collection and the items:
stac_challenge_folder = './STAC_Challenge_2'
catalog_address = stac_challenge_folder + '/' + 'catalog.json'
tiffiles = {"accra_1": "https://oin-hotosm.s3.amazonaws.com/5b694a0f4b87366cc0f0fa70/0/5b694a0f4b8736ebfff0fa71.tif",
           "accra_2":"https://oin-hotosm.s3.amazonaws.com/5bb9323e9ed15b0006d24f33/0/5bb9323e9ed15b0006d24f34.tif",
           'monrovia_1':'https://oin-hotosm.s3.amazonaws.com/5b83a514c8e197000a93403e/0/5b83a514c8e197000a93403f.tif',
           'monrovia_2':'https://oin-hotosm.s3.amazonaws.com/5b83a514c8e197000a93403e/0/5b83a514c8e197000a93403f.tif'}

### Info for the root catalog (hence generic info about the challenge):

In [3]:
stac_version = "??"
catalog_id = 'challenge_catalog'
catalog_title = 'Challenge OpenML'
catalog_description = 'Data for the ML challenge, in the STAC format'
catalog_links = [ {
      "rel": "self",
      "href": '../'+catalog_address
    },
        {
      "rel": "root",
      "href": '../'+catalog_address
    },
]

### Generic Info for the collections (Info that will be in common for all of them)

In [4]:
license = "open" #open, proprietary?
version = "1.0"
providers = [
    {
      "name": "WB/OCA",
      "roles": [
        "producer",
        "licensor"
      ],
      "url": "https://opencitiesproject.org"
    }
  ]
basic_keywords = ["challenge", "world bank"]
extent = {
    "spatial": [
      -180.0,
      -56.0,
      180.0,
      83.0
    ],
    "temporal": [
      "2015-06-23T00:00:00Z",
        None
    ]
  }
id_collection_basic = "challenge_collection_for_"
basic_description = "The data available for the challenge formatted in a STAC collection for "
basic_title = "Data for the challenge "

### Generic Info for the items (Info that will be in common for all of them)

In [5]:
type_item = "Feature"
properties_item = {
    "datetime": "2019-02-26T00:00:00Z",
    "td:title": "test",
    "td:description": "test",
    "td:label_type": "test",
    "td:classes": [
      'buildings', 'drains', 'roads'
    ]
  }

## Create the Catalog


In [6]:
catalog_json = {
    "stac_version": stac_version,
    "id": catalog_id,
    "title": catalog_title,
    "description": catalog_description,
    "links": catalog_links   
}
catalog = Catalog(catalog_json)
print(catalog.id)
# save as a root catalog
catalog.save_as(catalog_address)
print(catalog.id)
print(catalog.filename)
print(catalog.path)

challenge_catalog
challenge_catalog
./STAC_Challenge_2/catalog.json
./STAC_Challenge_2


## Functions to create the collections



In [7]:
def get_tags_collection(foldername):
    key = basic_keywords.copy()
    key.append(foldername)
    key.append(foldername.split('_')[0]+' '+foldername.split('_')[1])
    collection_address = stac_challenge_folder +'/' +id_collection_basic+foldername+'/'+ foldername + '_collection.json'
    links =[ {
          "rel": "self",
          "href": collection_address
        },
            {
          "rel": "root",
          "href": '../catalog.json'#catalog_address
        }]
    title = basic_title + foldername
    description = basic_description + foldername
    id_collection = id_collection_basic + foldername
    return stac_version, id_collection, title, description, key, version, license, providers, extent, links

In [8]:
def create_corresponding_collection(stac_version, id_collection, title, description, keywords, version, license, providers, extent, links):
    # create a Collection object with JSON
    collection_json = {
    "stac_version": "??",
    "id": id_collection,
    "title": title ,
    "description": description,
    "collection version": version,
    "keywords":keywords,
    "license": license,
    "version": version,
    "providers":providers,
    "extent":extent,
    "links": links
    }
    col = Collection(collection_json)
    print(col.id)
    return col

## Functions to create the items

In [9]:
def get_bbox(directory, filename):
    with open(directory + '/'+ filename, 'r') as f:
        data = json.load(f)
        c1 = [];
        c2 = [];
        if("Polygon" in data['features'][0]["geometry"]["type"]):
            for f in data["features"]:
                c_partial_1 = f["geometry"]["coordinates"]
                #print(c_partial_1)
                for c_partial_2 in c_partial_1:
                    for c_partial in c_partial_2:
                        for x in c_partial:
                            #print('x0:',x[0])
                            c1.append(x[0]) # first coordinate
                            c2.append(x[1])     
        else:
            for f in data["features"]:
                c_partial_1 = f["geometry"]["coordinates"]
                #print(c_partial_1)
                for c_partial in c_partial_1:
                    for x in c_partial:
                        #print('x0:',x[0])
                        c1.append(x[0]) # first coordinate
                        c2.append(x[1])
        bbox = [[min(c1),min(c2)],[min(c1),max(c2)],[max(c1),max(c2)],[max(c1),min(c2)],[min(c1),min(c2)]]
    return bbox

In [10]:
def get_tags_items(collection, foldername, filename, bbox):
    filename_split_1 = filename.split('.');
    filename_split = filename_split_1[0]
    id_item = filename_split
    links_item = [    {
      "rel": "self",
      "href": stac_challenge_folder+"/"+filename_split+"-item.json"
    },
    {
      "rel": "root",
      "href": collection.filename
    },]
    
    assets_item = {
    "raster": {
        "title": "image",
        "href": tiffiles[filename.split('_')[0]+'_'+filename.split('_')[1]],
        "type": "image/vnd.stac.geotiff; cloud-optimized=true"
    },
    "vector": {
      "title": filename_split,
      "href": '../'+foldername +'/'+filename,
      "type": "application/geo+json"
    }
    }
    key = basic_keywords.copy()
    key.append(filename_split[0]+'_'+filename_split[1])
    key.append(filename_split[2])
    collection_address = stac_challenge_folder +'/' + filename
    directory = stac_challenge_folder + '/'+ foldername
    
    with open(directory + '/'+ filename, 'r') as f:
        data = json.load(f)
        geom = []
        for f in data["features"]:
            geom.append(f["geometry"]["coordinates"][0])
        if("Polygon" in data['features'][0]["geometry"]["type"]):    
            geometry_item = {
                   "type": "MultiPolygon",
                    "coordinates": geom
                 }
        else:
            geometry_item = {
                   "type": "MultiLineString",
                   "coordinates": geom
                 }
    bbox_item = bbox
    prop = properties_item;
    prop['collection']=filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    prop['td:title']=filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    prop['td:label_type']=filename_split.split('_')[2]
    prop['td:description']=filename_split.split('_')[2]+' for '+filename_split.split('_')[0]+' '+filename_split.split('_')[1]
    return id_item, type_item, geometry_item, bbox_item, prop, links_item,assets_item

In [11]:
# create an Item object with JSON
def create_corresponding_item(id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item):
    item_json = {
        "id": id_item,
        "type": type_item,
        "geometry": geometry_item,
        "bbox": bbox_item,
        "properties": properties_item,
        "links": links_item,
        "assets":assets_item,
    }
    it= Item(item_json)
    return it

## Create the items and collections


In [12]:
catalog.data

{'stac_version': '??',
 'id': 'challenge_catalog',
 'title': 'Challenge OpenML',
 'description': 'Data for the ML challenge, in the STAC format',
 'links': [{'rel': 'self', 'href': '.././STAC_Challenge_2/catalog.json'},
  {'rel': 'root', 'href': '.././STAC_Challenge_2/catalog.json'}]}

In [13]:
for folder in os.listdir(stac_challenge_folder):
    if(folder!= '.DS_Store' and "json" not in folder):
        print(folder)
        
        stac_version, id_collection, title, description, keywords, version, license, providers, extent, links = get_tags_collection(folder)
        collection = create_corresponding_collection(stac_version, id_collection, title, description, keywords, version, license, providers, extent, links)
        #collection_address = stac_challenge_folder +'/' + folder + '_collection.json'
        collection_address = stac_challenge_folder +'/' +id_collection_basic+folder+'/'+ folder + '_collection.json'
        print(collection_address)
        collection.save_as(collection_address)
        catalog.add_catalog(collection)
        
        for file in os.listdir(stac_challenge_folder+'/'+folder):
            print(file)
            
            bbox = get_bbox(stac_challenge_folder + '/'+ folder, file)
            id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item = get_tags_items(collection,folder, file, bbox)
            item = create_corresponding_item(id_item, type_item, geometry_item, bbox_item, properties_item, links_item,assets_item)
            item_address = stac_challenge_folder +'/' +id_collection_basic+folder+'/'+ file.split('.')[0] + '-item.json'
            item.save_as(item_address)
            collection.add_item(item)
            
        #collection_address = stac_challenge_folder +'/' + folder + '_collection.json'
        collection.save_as(collection_address)
catalog.save_as(catalog_address)

monrovia_2
challenge_collection_for_monrovia_2
./STAC_Challenge_2/challenge_collection_for_monrovia_2/monrovia_2_collection.json
monrovia_2_buildings.geojson
monrovia_2_drains.geojson
monrovia_2_roads.geojson
accra_1
challenge_collection_for_accra_1
./STAC_Challenge_2/challenge_collection_for_accra_1/accra_1_collection.json
accra_1_roads.geojson
accra_1_buildings.geojson
accra_1_drains.geojson
monrovia_1
challenge_collection_for_monrovia_1
./STAC_Challenge_2/challenge_collection_for_monrovia_1/monrovia_1_collection.json
monrovia_1_drains.geojson
monrovia_1_buildings.geojson
monrovia_1_roads.geojson
accra_2
challenge_collection_for_accra_2
./STAC_Challenge_2/challenge_collection_for_accra_2/accra_2_collection.json
accra_2_roads.geojson
accra_2_drains.geojson
accra_2_buildings.geojson


challenge_catalog