In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
import random
InteractiveShell.ast_node_interactivity = "all"
import os
from datetime import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import glob
from tqdm import tqdm

import plotly.figure_factory as ff
import plotly.express as px
import numpy as np
import requests
import json
from datetime import datetime
import tarfile
pd.set_option('display.max_rows', 100)

In [2]:
from src.appeears_tools import (
    get_appeears_token, list_appeears_tasks, get_appeears_files, download_appeears_file,
    request_appeears_task, get_dates_df
)
from src.config import south_america_coordinates, REGIONS, SOUTH_AMERICA

## Login
https://appeears.earthdatacloud.nasa.gov/api/#introduction

The API leverages the same NASA Earthdata Login as the AρρEEARS user interface.

Whenever a request is made to one of the secure API endpoints, HTTP Basic Authentication credentials are required. Authentication first requires a call to the login service using the NASA Earthdata Login username and password to generate a Bearer token. This Bearer token will be then used for all subsequent calls to secure endpoints. **This token will expire approximately 48 hours after being acquired.** All programs should check for an HTTP 403 Forbidden status code to successfully handle token expiration by acquiring a new token.

Create

In [3]:
token = get_appeears_token()
token

'clwwuoibJBxQWHl5fUzOGstNyacf9Sc24X0L1bUg29IUEVJNWRAJlXzB0ltmI8uSOUwyiIMprauSC0HiusGoww'

# List tasks

In [4]:
tasks = list_appeears_tasks(token)
tasks[["task_name", "task_id", "created", "completed", "estimate", "status"]].head(5)

Unnamed: 0,task_name,task_id,created,completed,estimate,status
0,sa-2020-12,74e22449-02d0-4e07-9ff9-0f62c9ac4b06,2023-05-14T11:23:07.429356,2023-05-14T13:25:22.411030,"{'request_size': 18658935783.429977, 'request_...",done
1,sa-2020-11,d5d7afd4-1c29-468c-b011-1c47cd6db002,2023-05-14T11:23:06.685921,2023-05-14T13:25:01.757892,"{'request_size': 18658935783.429977, 'request_...",done
2,sa-2020-10,6d988dc0-63ae-4244-a8fc-a63e9c36473c,2023-05-14T11:23:05.315305,2023-05-14T13:23:31.614518,"{'request_size': 18658935783.429977, 'request_...",done
3,sa-2020-09,6840ff49-0272-41e9-ae4c-9b978e59f3da,2023-05-14T11:23:04.302555,2023-05-14T13:21:40.325700,"{'request_size': 18658935783.429977, 'request_...",done
4,sa-2020-08,846ff314-e371-49bc-a762-c06fe7eed7e7,2023-05-14T11:23:03.676877,2023-05-14T13:24:11.975166,"{'request_size': 18658935783.429977, 'request_...",done


In [10]:
done = tasks[tasks.status == "done"]

done = done[done.task_name.str.startswith(SOUTH_AMERICA)]

done = done.sort_values(by='completed', ascending=False)
# done['y'] = done.task_name.apply(lambda s: int(s.split('-')[1]))
# done = done[done.y >= 2013]

# done = done[done.task_name.str.startswith('amazon_patch_v2')]
# done = done.head(1)

# done = done.head(2)

In [11]:
done.head(2)
done.tail(2)
done.shape

Unnamed: 0,task_name,task_id,created,completed,estimate,status
11,sa-2020-01,88044542-79b6-4bcd-9f42-87db0624beac,2023-05-14T11:22:58.406026,2023-05-14T13:35:59.888497,"{'request_size': 18658935783.429977, 'request_...",done
0,sa-2020-12,74e22449-02d0-4e07-9ff9-0f62c9ac4b06,2023-05-14T11:23:07.429356,2023-05-14T13:25:22.411030,"{'request_size': 18658935783.429977, 'request_...",done


Unnamed: 0,task_name,task_id,created,completed,estimate,status
93,sa-2013-03,e4e22d61-f81a-42ca-a98e-afa7ea116a9a,2023-05-14T11:22:03.695796,2023-05-14T12:16:06.681754,"{'request_size': 18658935783.429977, 'request_...",done
94,sa-2013-02,adc0789f-6159-4aab-9c88-6f90424f9fbf,2023-05-14T11:22:02.887531,2023-05-14T12:14:51.556806,"{'request_size': 18658935783.429977, 'request_...",done


(96, 6)

In [12]:
# drop duplicated tasks keep latest
done = done.drop_duplicates(subset='task_name', keep='first')

# List files

In [13]:
task_id = done.task_id.values[0]
task_id

'88044542-79b6-4bcd-9f42-87db0624beac'

In [14]:
files = get_appeears_files(token, task_id)
files.head()
files.shape

Unnamed: 0,sha256,file_id,file_name,file_size,file_type,s3_url
2,7be1e192d3d6f1b5cd7585e3d2eb5fb83fe7f9d572b1cc...,747bb4e9-e6c0-4e8f-98d3-ba462dd9f897,VNP13A1.001_500m_aid0001.nc,2825794868,nc,s3://appeears-output/88044542-79b6-4bcd-9f42-8...
1,0a6b7680ac7c4f1fa8d8a836bbe3be08b9c94fae505878...,6f81b366-ccbe-4d73-8c49-fb2cdb5425a6,VNP09H1.001_500m_aid0001.nc,857585155,nc,s3://appeears-output/88044542-79b6-4bcd-9f42-8...
0,357211e1c82deb9d486ca04606fd073032a14b4a1114b5...,7b7e6d6c-fae4-4ebc-850d-1ed4c289bbcb,MCD15A2H.061_500m_aid0001.nc,170971383,nc,s3://appeears-output/88044542-79b6-4bcd-9f42-8...
18,09a01e852a2ee0d239a46f4950eee07d0fb495f07f9857...,99f58cc7-8a77-49da-a343-5ad375ff3e5c,sa-2020-01-granule-list.txt,27646,txt,s3://appeears-output/88044542-79b6-4bcd-9f42-8...
23,ca394fb8dfa8d78e365121cfa5b38abd8d8737e61dc81c...,42210052-b401-4594-bc3e-0051b61fdb8e,README.md,26697,txt,s3://appeears-output/88044542-79b6-4bcd-9f42-8...


(24, 6)

# Download all files

In [15]:
os.makedirs("appeears_data", exist_ok=True)

In [16]:
all_files = []
for task_name, task_id in tqdm(done[['task_name', 'task_id']].values):
    files = get_appeears_files(token, task_id)
    for file_id, file_name in files[['file_id', 'file_name']].values[:3]:  # 3 largest files for satellite
        all_files.append([
            task_name, task_id, file_name, file_id
        ])
    
files_to_download = pd.DataFrame(
    all_files,
    columns=['task_name', 'task_id', 'file_name', 'file_id']
)

100%|██████████| 96/96 [00:13<00:00,  7.38it/s]


In [17]:
files_to_download.shape
files_to_download.head(2)
files_to_download.tail(2)


(288, 4)

Unnamed: 0,task_name,task_id,file_name,file_id
0,sa-2020-01,88044542-79b6-4bcd-9f42-87db0624beac,VNP13A1.001_500m_aid0001.nc,747bb4e9-e6c0-4e8f-98d3-ba462dd9f897
1,sa-2020-01,88044542-79b6-4bcd-9f42-87db0624beac,VNP09H1.001_500m_aid0001.nc,6f81b366-ccbe-4d73-8c49-fb2cdb5425a6


Unnamed: 0,task_name,task_id,file_name,file_id
286,sa-2013-02,adc0789f-6159-4aab-9c88-6f90424f9fbf,VNP09H1.001_500m_aid0001.nc,8d9432f6-b51d-40b5-af09-e8a88d7c40b1
287,sa-2013-02,adc0789f-6159-4aab-9c88-6f90424f9fbf,MCD15A2H.061_500m_aid0001.nc,fb760d1a-9897-436d-92b4-a0a74bbfa08d


In [18]:
DOWNLOAD_FLAG = True
for task_name, task_id, file_name, file_id in tqdm(files_to_download.values):
    dest_dir = f"appeears_data/{task_name}/"
    filepath = os.path.join(dest_dir, file_name)
    os.makedirs(os.path.dirname(filepath), exist_ok=True)
    if DOWNLOAD_FLAG and (not os.path.exists(filepath)) and (not os.path.exists(f"maps/{task_name}.npz")):
        download_appeears_file(token, task_id, file_id, filepath)

100%|██████████| 288/288 [2:04:48<00:00, 26.00s/it]  


# Send requests for satellite data

In [None]:
requested = list_appeears_tasks(token)
requested = requested[requested.created > "2023-05"]
requested[["task_name", "task_id", "created", "completed", "estimate", "status"]].head(2)
requested[["task_name", "task_id", "created", "completed", "estimate", "status"]].tail(5)
requested.shape

In [None]:
dates_df = get_dates_df('amazon_patch_v2')
dates_df

In [None]:
# dates_df = pd.read_csv('satellite_dates.csv', parse_dates=['s_d', 'l_d', 'v_d'])

In [None]:
dates_df['start_date'] = dates_df[['s_d', 'l_d', 'v_d']].min(axis=1)
dates_df['end_date'] = dates_df[['s_d', 'l_d', 'v_d']].max(axis=1)
dates_df.start_date = dates_df.start_date.dt.strftime("%m-%d-%Y")
dates_df.end_date = dates_df.end_date.dt.strftime("%m-%d-%Y")
dates_df

In [None]:
# dates_df = dates_df.tail(1)
# requested = requested[requested.task_name == "RM"]

In [None]:
SEND = False

In [None]:
for start_date, end_date, ym in dates_df[['start_date', 'end_date', 'm']].values:
    task_name = f"sa-{ym}"
    if SEND and (task_name not in requested.task_name.values):
        task_response = request_appeears_task(
            token,
            start_date,
            end_date,
            south_america_coordinates,
            task_name
        )
        print(ym, task_response)

In [None]:
tasks = list_appeears_tasks(token)
tasks[["task_name", "task_id", "created", "completed", "estimate", "status"]].head(2)

# FIRE CCI


wget -e robots=off --mirror --no-parent -r https://dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2020/


In [64]:
!ls firecci

dap.ceda.ac.uk


In [58]:
gzs=glob.glob("./firecci/**/*.gz", recursive=True)

In [59]:
# Keep South America
for f in gzs:
    if "AREA_2" not in f:
        os.remove(f)
    else:
        pass

In [60]:
len(gzs)

99

In [63]:
gzs

['./firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20150801-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20151001-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20150401-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20150301-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20151201-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015/20150701-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz',
 './firecci/dap.ceda.ac.uk/neodc/esacci/fire/data/burned_area/MODIS/pixel/v5.1/compressed/2015

In [66]:
for f in tqdm(gzs):
    tar = tarfile.open(f, "r:gz")
    tar.extractall()
    tar.close()

100%|██████████| 99/99 [00:18<00:00,  5.31it/s]


In [62]:
!ls firecci

dap.ceda.ac.uk


In [26]:
rm firecci/20200101-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz

rm: cannot remove 'firecci/20200101-ESACCI-L3S_FIRE-BA-MODIS-AREA_2-fv5.1.tar.gz': No such file or directory
