In [12]:
%pip install earthengine-api --upgrade
%pip install docstring-generator

Note: you may need to restart the kernel to use updated packages.
Collecting docstring-generator
  Downloading docstring_generator-0.2.0-py3-none-any.whl (9.5 kB)
Collecting strongtyping (from docstring-generator)
  Obtaining dependency information for strongtyping from https://files.pythonhosted.org/packages/35/e6/8efdc2719e2fe368d4ff1887912b484a252e85cbb80dcf88f6ca1de138fb/strongtyping-3.11.4-py3-none-any.whl.metadata
  Downloading strongtyping-3.11.4-py3-none-any.whl.metadata (2.0 kB)
Downloading strongtyping-3.11.4-py3-none-any.whl (23 kB)
Installing collected packages: strongtyping, docstring-generator
Successfully installed docstring-generator-0.2.0 strongtyping-3.11.4
Note: you may need to restart the kernel to use updated packages.


In [1]:
import ee
import numpy as np
import pandas as pd
from concurrent.futures import ProcessPoolExecutor
import os

In [2]:
# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

Enter verification code:  4/1AfJohXnAwGu5WlAtgFn9csEFGmDU8i6PLPCIg6cwnGyCTIRZKX4clslhKv8



Successfully saved authorization token.


In [None]:
file_dir = "/home/ubuntu/gridmet_test_run"
os.chdir(file_dir)

def smap_getter(lat_long:list)->pd.DataFrame:
    site_lat = lat_long[0]
    site_lon = lat_long[1]
    time_periods = [("2017-10-01", "2018-01-01"),
               ("2018-01-02", "2018-07-02")]
    smap_by_time = []
    selected_variables = ["heat_flux_ground", "net_downward_shortwave_flux",
                          "snow_mass", "snow_depth", "snow_melt_flux", "surface_temp"]
    for time_tuple in time_periods:
        start_time = time_tuple[0]
        end_time = time_tuple[1]
        smap_data = ee.ImageCollection("NASA/SMAP/SPL4SMGP/007").\
                            filterDate(start_time, end_time).\
                                select(selected_variables).\
                                    getRegion(ee.Geometry.Point([site_lon, site_lat]), 11000).\
                                        getInfo() ###extract data in a list  
        data = pd.DataFrame(smap_data[1:], columns=["date", "lon", "lat", "time"] + selected_variables)
        data = data.drop(columns=["lon", "lat", "time"])
        data["date"] = pd.to_datetime(data["date"], format = "%Y%m%d_%H%M").dt.date.astype(str)
        data = data.groupby("date", as_index=False).mean()
        smap_by_time.append(data)
        
    grabbed_data = pd.concat(smap_by_time, ignore_index=True)
    grabbed_data["lat"] = lat_long[0] 
    grabbed_data["lon"] = lat_long[1]
    
    return grabbed_data


def parallel_processing(lat_lon_tuple, name)->None:
    with ProcessPoolExecutor(max_workers = 14) as pool:
        results = list(pool.map(smap_getter, lat_lon_tuple))  
     
    all_data = pd.concat(results, ignore_index=True)
    all_data.to_csv(f"./smap_{name}.csv", index=False)
    return all_data

def main():
    # testing points 
    # training points 
    testing =  pd.read_csv("./testing_all_ready.csv_hackweek_subset.csv").loc[:, ["Latitude", "Longitude"]].drop_duplicates()

    training = pd.read_csv("./final_merged_data_3yrs_cleaned_v3_hackweek_subset.csv").loc[:, ["lat", "lon"]].drop_duplicates()
    training_points_smap = parallel_processing(training.values.tolist(), "training")
   
    testing_points_smap = parallel_processing(testing.values.tolist(), "testing")
    print("finished running.")
#     

main()