In [None]:
# Imports
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from pprint import pprint
# import solaris as sol
from pathlib import Path
import rasterio
from rasterio.windows import Window
import geopandas as gpd
from pystac import (Catalog, CatalogType, Item, Asset, LabelItem, Collection)
from rasterio.transform import from_bounds
from shapely.geometry import Polygon
from shapely.ops import cascaded_union
from rio_tiler import main as rt_main
import skimage
from tqdm import tqdm
import os
os.environ["CURL_CA_BUNDLE"] = "/etc/ssl/certs/ca-certificates.crt"

# We have to add this wrkaround for stackio:
# (https://pystac.readthedocs.io/en/latest/concepts.html#using-stac-io)
from urllib.parse import urlparse
import requests
from pystac import STAC_IO
def my_read_method(uri):
    parsed = urlparse(uri)
    if parsed.scheme.startswith('http'):
        return requests.get(uri).text
    else:
        return STAC_IO.default_read_text_method(uri)
STAC_IO.read_text_method = my_read_method

In [None]:
# Folder Setup
data_dir = Path('data')
data_dir.mkdir(exist_ok=True)

img_path = data_dir/'images-256'
mask_path = data_dir/'masks-256'
img_path.mkdir(exist_ok=True)
mask_path.mkdir(exist_ok=True)

In [None]:
# load our training and test catalogs
train1_cat = Catalog.from_file('https://drivendata-competition-building-segmentation.s3-us-west-1.amazonaws.com/train_tier_1/catalog.json')
train2_cat = Catalog.from_file('https://drivendata-competition-building-segmentation.s3-us-west-1.amazonaws.com/train_tier_2/catalog.json')
test_cat = Catalog.from_file('https://drivendata-competition-building-segmentation.s3-us-west-1.amazonaws.com/test/catalog.json')
cols = {cols.id:cols for cols in train1_cat.get_children()}

In [None]:
list(cols['acc'].get_all_items())

In [None]:
# Get a list of the possible areas ('scenes) and ids
areas = []
for c in cols:
    itms = [x for x in cols[c].get_all_items()]
    for i, id in enumerate(itms):
        if i % 2 == 0 and i+1 < len(itms):
            areas.append((c, itms[i].id, itms[i+1].id))
print(areas)

In [None]:
for i in cols['acc'].get_all_items():
    print(i.id)

In [None]:
# for all items within acc col, either load and display label geojson with geopandas or raster metadata with rasterio

for i in cols['acc'].get_all_items():
    print(i.id, '\n----------------------------')
    pprint(i.properties)
    if 'label' in i.id:

        gdf = gpd.read_file(
            i.make_asset_hrefs_absolute().assets['labels'].href)
        gdf.plot()
        plt.show()
    else:
        print('raster metadata:')
        pprint(rasterio.open(
            i.make_asset_hrefs_absolute().assets['image'].href).meta)
    print('\n----------------------------')

In [None]:
# open one image item
SCENE_ID = 'ca041a'

one_item = cols['acc'].get_item(id=SCENE_ID)
one_item.to_dict()

In [None]:
# load raster for this item
rst = rasterio.open(one_item.assets['image'].href)
rst.meta

In [None]:
# check raster resolution
rst.res

In [None]:
# make a windowed read of this raster and reshape into a displayable 4-d array (RGB+alpha channel)
# more on windowed reads with rasterio: https://rasterio.readthedocs.io/en/stable/topics/windowed-rw.html#windows

win_sz = 1024

window = Window(rst.meta['width']//2,rst.meta['height']//2,win_sz,win_sz) # 1024x1024 window starting at center of raster
win_arr = rst.read(window=window)
win_arr = np.moveaxis(win_arr,0,2)
plt.figure(figsize=(10,10))
plt.imshow(win_arr)

In [None]:
DATA_PATH = "/home/zakirov/datasets/opencities/train_tier_1/"