# Given a list of orphaned tiles, map their spatial footprint

In [1]:
import subprocess
import json
from shapely.geometry import box
import geopandas as gpd
import pandas as pd
from pprint import pprint
from deafrica_tools.classification import HiddenPrints

In [2]:
text_file = 'landsat_orphans_T1.txt'

data = pd.read_csv(text_file, sep=" ", header=None).rename({0:'file_location'}, axis=1)
len(data)

36

## Loop through files and find bbox of data

Depending on the product, the suffix will change. For Landsat, `_SR_stac.json` is the suffix of the filename we want to grab the bbox info from.

In [3]:
suffix='_SR_stac.json'

In [4]:
results = []
i=0
for f in data["file_location"]:

    print(" Tile {:04}/{:04}\r".format(i + 1, len(data)), end="")
    
    # sync json from s3
    subprocess.run(
        "aws s3 sync "
        + f
        + ' tmp --exclude "*" --include "*.json" --no-sign-request',
        shell=True,
        stdout=subprocess.DEVNULL,
    )

    with open("tmp/" + f[-41:-1] + suffix) as fp:
        j = json.load(fp)

    # get bbox and crs
    epsg = j["properties"]["proj:epsg"]

    bb = j["bbox"]
    b = box(bb[0], bb[1], bb[2], bb[3])

    # Convert bbox to a GeoDataFrame
    d = {"file": [f], "geometry": b}
    gdf = gpd.GeoDataFrame(d, crs="epsg:4326")

    # add to our results
    results.append(gdf)

    # delete data
    subprocess.run('rm -r -f tmp/', shell=True)

    i+=1
# concatenate all results togeter
x = pd.concat(results).reset_index(drop=True)

 Tile 0036/0036

### plot

In [5]:
x.explore()