In [None]:
%load_ext autoreload
%autoreload 2

import dask
import numpy as np
import pandas as pd
import xarray as xr

from carbonplan_trace.tiles import tiles
from carbonplan_trace.v1 import biomass_rollup

import fsspec
import h5py
import regionmask
from rasterio.session import AWSSession
from rasterio.warp import Resampling

from carbonplan_trace.v1 import utils
from collections import defaultdict
from carbonplan_trace.v1.landsat_preprocess import access_credentials, find_months_of_interest, make_datestamps
from carbonplan_trace.v0.core import compute_grid_area
import rioxarray as rio 
import geopandas as gpd
from s3fs import S3FileSystem

In [None]:
from carbonplan_styles.mpl import set_theme

set_theme()

In [None]:
from carbonplan_trace.v1.landsat_preprocess import access_credentials

access_key_id, secret_access_key = access_credentials()
# fs = fsspec.get_filesystem_class("s3")(
#     key=access_key_id,
#     secret=secret_access_key,
# )
fs = S3FileSystem(key=access_key_id, secret=secret_access_key, requester_pays=True)

In [None]:
gdf = gpd.read_file(
    "https://prd-wret.s3-us-west-2.amazonaws.com/assets/"
    "palladium/production/s3fs-public/atoms/files/"
    "WRS2_descending_0.zip"
)

In [None]:
import boto3
from rasterio.session import AWSSession
from s3fs import S3FileSystem
from carbonplan_trace.v1.landsat_preprocess import scene_seasonal_average

In [None]:
"""
iso3_country	CarbonPlan	alpha3	country_area	minx	miny	maxx	maxy
193	SJM	0.0	SJM	25.043923	-9.076563	70.824997	36.807816	82.537498  --> Svalbard, probably don't have biomass
106	ISL	0.0	ISL	19.424745	-27.980730	60.002083	-12.050521	66.702080  --> Iceland, 3 scenes available, but none in the right month
194	SLB	0.0	SLB	2.345975	155.392502	-12.308334	170.192505	-4.445220 --> Solomon Island, no scenes available
71	FJI	0.0	FJI	1.613079	-180.000000	-21.042500	180.000000	-12.461724 --> Fiji, no scenes available 
191	SGS	0.0	SGS	0.570622	-41.815617	-59.484280	-26.229315	-53.646881 --> South Georgia and South Sandwich islands near antartica, no scenes and no biomass
49	CPV	0.0	CPV	0.345344	-25.361803	14.801805	-22.656805	17.205416 --> Cape Verde, no scenes 
181	PYF	0.0	PYF	0.342648	-154.727295	-27.900627	-134.451111	-7.894929 --> French Polynesia, no scenes 
74	FRO	0.0	FRO	0.247089	-7.683333	61.393749	-6.245833	62.391666
239	WSM	0.0	WSM	0.238464	-172.804123	-14.077221	-171.397705	-13.439809
183	REU	0.0	REU	0.218469	55.216251	-21.389860	55.837360	-20.871805
"""

min_lon, min_lat, max_lon, max_lat = [-27.980730, 60.002083, -12.050521, 66.702080]  # iceland
min_lon, min_lat, max_lon, max_lat = [
    155.392502,
    -12.308334,
    170.192505,
    -4.445220,
]  # solomon island
# min_lon, min_lat, max_lon, max_lat = [177.000000, -21.042500, 180.000000, -12.461724]  # fiji
# min_lon, min_lat, max_lon, max_lat = [-41.815617, -59.484280, -26.229315, -53.646881] # South Georgia and South Sandwich islands
# min_lon, min_lat, max_lon, max_lat = [-25.361803, 14.801805, -22.656805, 17.205416]  # Cape Verde
# min_lon, min_lat, max_lon, max_lat = [-154.727295, -27.900627, -134.451111, -7.894929]  # French Polynesia

scenes_in_tile = gdf.cx[min_lon:max_lon, min_lat:max_lat][["PATH", "ROW"]].values

In [None]:
landsat_bucket = "s3://usgs-landsat/collection02/level-2/standard/etm/{}/{:03d}/{:03d}/"

for i in range(5):  # len(scenes_in_tile)):
    path = scenes_in_tile[i][0]
    row = scenes_in_tile[i][1]
    for year in np.arange(2014, 2021):
        scene_stores = fs.ls(landsat_bucket.format(year, path, row))
        if len(scene_stores) > 0:
            print(path, row, year)
            print(scene_stores)

In [None]:
all_scenes = []

for tile in tiles:
    lat, lon = utils.get_lat_lon_tags_from_tile_path(tile)
    min_lat, max_lat, min_lon, max_lon = utils.parse_bounding_box_from_lat_lon_tags(lat, lon)
    all_scenes.extend(gdf.cx[min_lon:max_lon, min_lat:max_lat][["PATH", "ROW"]].values)

In [None]:
len(all_scenes)

In [None]:
scenes_with_valid_data = []
for path, row in all_scenes:
    months = find_months_of_interest(row)
    valid_files = []
    for year in np.arange(2014, 2021):
        scene_stores = fs.ls(landsat_bucket.format(year, path, row))
        datestamps = make_datestamps(months, year)
        for scene_store in scene_stores:
            for datestamp in datestamps:
                if datestamp in scene_store:
                    valid_files.append(scene_store)

    if len(valid_files) > 0:
        scenes_with_valid_data.append([path, row])

In [None]:
len(scenes_with_valid_data)

In [None]:
scenes_with_valid_data[:2]

In [None]:
valid_scene_ids = list(
    set(["{:03d}/{:03d}".format(path, row) for path, row in scenes_with_valid_data])
)

In [None]:
len(valid_scene_ids)

In [None]:
gdf["scene_id"] = gdf.apply(lambda row: "{:03d}/{:03d}".format(row["PATH"], row["ROW"]), axis=1)

In [None]:
sub = gdf.loc[gdf.scene_id.isin(valid_scene_ids)]
# get all scenes in our tiles
# do len checking

In [None]:
len(sub)

In [None]:
sub = sub.dissolve()

In [None]:
# landsat 8 launched in 2014, same as landsat 5 decommissioning

In [None]:
sub.geometry[0].plot()

In [None]:
sub["name"] = "valid_landsat"

In [None]:
sub[["name", "geometry"]].to_file("valid_landsat.shp")

In [None]:
fs = S3FileSystem(requester_pays=True)

with fs.open("s3://carbonplan-climatetrace/v1.2/masks/valid_landsat.shp.zip") as f:
    df = gpd.read_file(f)

In [None]:
import regionmask

In [None]:
land = regionmask.defined_regions.natural_earth.land_110

In [None]:
land.plot()

In [None]:
import matplotlib.pyplot as plt

In [None]:
world = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres"))

In [None]:
plt.figure(figsize=(16, 8))
world.plot(ax=plt.gca())
df.plot(ax=plt.gca(), facecolor="none", edgecolor="red", hatch=r"//")
plt.ylabel("Latitude")
plt.xlabel("Longitude")
plt.tight_layout()
plt.savefig("landsat_mask.png")
plt.show()
plt.close()