In [58]:
from pyproj import Proj, transform
import math
import requests
from bs4 import BeautifulSoup
import os
import h5py
from pyhdf.SD import SD, SDC
from osgeo import gdal, osr
import warnings

warnings.simplefilter(action="ignore", category=FutureWarning)


def latlon_to_modis_tile(lat, lon):
    # MODIS Sinusoidal Projection
    modis_sinu = Proj("+proj=sinu +R=6371007.181 +nadgrids=@null +wktext")
    wgs84 = Proj(proj="latlong", datum="WGS84")

    # Convert lat/lon to MODIS Sinusoidal coordinates
    x, y = transform(wgs84, modis_sinu, lon, lat)

    # MODIS Grid specifics
    tile_size_meters = 1111950.5196666666  # Size of each MODIS tile in meters
    x_origin = -20015109.354  # Westernmost coordinate
    y_origin = 10007554.677  # Northernmost coordinate

    h = int((x - x_origin) / tile_size_meters)
    v = int((y_origin - y) / tile_size_meters)

    return h, v


def generate_filename(lat, lon, template="GLASS01E01.V60.A2002001.h00v08.2022010.hdf"):
    h, v = latlon_to_modis_tile(lat, lon)
    hv_str = f"h{h:02d}v{v:02d}"
    filename = template.replace("h00v08", hv_str)
    return filename


def get_hdf_files(url):
    response = requests.get(url)
    if response.status_code == 200:
        soup = BeautifulSoup(response.text, "html.parser")
        hdf_files = [
            a["href"]
            for a in soup.find_all("a", href=True)
            if a["href"].endswith(".hdf")
        ]
        return hdf_files
    else:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []


def filter_files_by_tile(hdf_files, h, v):
    hv_str = f"h{h:02d}v{v:02d}"
    filtered_files = [file for file in hdf_files if hv_str in file]
    return filtered_files


def download_file(url, file_name, output_directory):
    file_url = url + file_name
    response = requests.get(file_url, stream=True)
    if response.status_code == 200:
        file_path = os.path.join(output_directory, file_name)
        with open(file_path, "wb") as file:
            for chunk in response.iter_content(chunk_size=8192):
                file.write(chunk)
        # print(f"Downloaded file: {file_path}")
    else:
        print(f"Failed to download file. Status code: {response.status_code}")


def inspect_hdf_file(file_path):
    with h5py.File(file_path, "r") as file:
        # List all groups
        print("Keys: %s" % file.keys())
        for key in file.keys():
            print(f"\nContent in '{key}':")
            data = file[key]
            print(data)
            if isinstance(data, h5py.Dataset):
                print(f"Dataset '{key}' shape: {data.shape}")
                print(f"Dataset '{key}' dtype: {data.dtype}")
                print(data[:])
            elif isinstance(data, h5py.Group):
                print(f"Group '{key}' contains: {list(data.keys())}")


def get_directories(url):
    try:
        # Send a GET request to the URL
        response = requests.get(url)

        # Parse the response text with BeautifulSoup
        soup = BeautifulSoup(response.text, "html.parser")

        # Find all the <a> tags and get their href attributes
        links = [a["href"] for a in soup.find_all("a", href=True)]

        # Filter out the links that don't represent directories
        directories = [link for link in links if link.endswith("/")]

        return directories
    except Exception as e:
        print(f"Error: {e}")


def convert_projection(file_path):
    try:
        # Access the LAI_500M subdataset
        lai_sds_path = 'HDF4_EOS:EOS_GRID:"' + file_path + '":GLASS01E01:LAI_500M'
        lai_sds_ds = gdal.Open(lai_sds_path, gdal.GA_ReadOnly)

        # Create a new geotransform and spatial reference for the geographic projection
        srs = osr.SpatialReference()
        srs.ImportFromEPSG(4326)  # WGS84

        # Create the output filename
        output_file = os.path.splitext(file_path)[0] + "_geographic.tif"

        # Use gdal.Warp to convert the projection
        gdal.Warp(output_file, lai_sds_ds, dstSRS=srs)

        # print(f"File saved at: {output_file}")
    except Exception as e:
        print(f"Error: {e}")

In [57]:
import pandas as pd

ameriflux_coords = pd.read_csv("../data/Ameriflux_coords.csv")
fluxnet_coords = pd.read_csv("../data/Fluxnet_coords.csv")
merged_coords = pd.concat([ameriflux_coords, fluxnet_coords], ignore_index=True)
merged_coords.drop_duplicates(subset=merged_coords.columns[0], inplace=True)
merged_coords.reset_index(drop=True, inplace=True)
merged_coords.rename({"Name": "name"}, axis=1, inplace=True)
merged_coords

Unnamed: 0,name,Lat,Lon
0,AR-TF1,-54.9733,-66.7335
1,BR-CST,-7.9682,-38.3842
2,BR-Npw,-16.4980,-56.4120
3,CA-ARB,52.6950,-83.9452
4,CA-ARF,52.7008,-83.9550
...,...,...,...
382,US-Uaf,64.8663,-147.8555
383,US-WCr,45.8059,-90.0799
384,US-Wi2,46.6869,-91.1528
385,ZA-Kru,-25.0197,31.4969


In [88]:
site_names = merged_coords["name"]
site_lat = merged_coords["Lat"]
site_lon = merged_coords["Lon"]

for i in range(0, len(site_names)):
    name = site_names[i]
    lat = site_lat[i]
    lon = site_lon[i]
    for year in range(2002, 2021):
        output_directory = f"../outputs/Glass/{name}/{str(year)}"
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
        directories = get_directories(base_url + str(year) + "/")
        for j in range(1, len(directories)):
            url = base_url + str(year) + "/" + directories[j]
            h, v = latlon_to_modis_tile(lat, lon)
            print(f"index {i}, name {name}, year {year}, directory {directories[j]}")
            hdf_files = get_hdf_files(url)
            filtered_files = filter_files_by_tile(hdf_files, h, v)
            if filtered_files:
                file_to_download = filtered_files[0]
                download_file(url, file_to_download, output_directory)
                file_path = f"../outputs/Glass/{name}/{str(year)}/" + filtered_files[0]
                convert_projection(file_path)
                os.remove(file_path)
            else:
                print(
                    f"No .hdf files found for MODIS, name {name}, year {year}, directory {directories[j]}."
                )

index 0, name AR-TF1, year 2002, directory 001/
index 0, name AR-TF1, year 2002, directory 009/
index 0, name AR-TF1, year 2002, directory 017/
index 0, name AR-TF1, year 2002, directory 025/
index 0, name AR-TF1, year 2002, directory 033/
index 0, name AR-TF1, year 2002, directory 041/
index 0, name AR-TF1, year 2002, directory 049/
index 0, name AR-TF1, year 2002, directory 057/
index 0, name AR-TF1, year 2002, directory 065/
index 0, name AR-TF1, year 2003, directory 001/
index 0, name AR-TF1, year 2003, directory 009/
index 0, name AR-TF1, year 2003, directory 017/
index 0, name AR-TF1, year 2003, directory 025/
index 0, name AR-TF1, year 2003, directory 033/
index 0, name AR-TF1, year 2003, directory 041/
index 0, name AR-TF1, year 2003, directory 049/
index 0, name AR-TF1, year 2003, directory 057/
index 0, name AR-TF1, year 2003, directory 065/
index 1, name BR-CST, year 2002, directory 001/
index 1, name BR-CST, year 2002, directory 009/
index 1, name BR-CST, year 2002, directo