In [1]:
# Import Libraries
import time
import requests
import json
import csv
import pandas as pd
import matplotlib.pyplot as plt
from arcgis.gis import GIS

In [2]:
test = 'Get Statistics'
timestamp = time.strftime("%Y%m%d-%H%M%S")

base_url = 'https://gis.earthdata.nasa.gov/image/rest/services/POWER/POWER_901_ANNUAL_METEOROLOGY_UTC/ImageServer'
stats_url = base_url + '/statistics'
multi_url = base_url + '/multidimensionalInfo?f=pjson'

multi_get = requests.get(multi_url)
multi_content = multi_get.json()
variables = multi_content['multidimensionalInfo']['variables']
names = [var.get('name') for var in variables]

start_time = time.time()

for var in names:
    params = {
        'variable': var,
        'f': 'json'
    }

    # Run the request
    response = requests.get(stats_url, params)
    stat_content = response.json()
    print(stat_content)
            
# Calculate the cell run time
end_time = time.time()
runtime = end_time - start_time
        
length = len(names)

# Export details to a CSV file
with open('stats_logs.csv', mode='a', newline='') as file:
    writer = csv.writer(file)
    # Check if the file is empty to write headers
    if file.tell() == 0:
        writer.writerow(['TimeStamp', 'Test', 'Num Variables', 'Service Name', 'Cell Run Time'])
    writer.writerow([timestamp, test, length, 'POWER_901_ANNUAL_METEOROLOGY_UTC', runtime])

{'statistics': [{'min': 0, 'max': 8972.0625, 'mean': 2226.154531946176, 'standardDeviation': 2508.575092466757, 'median': 598.1375, 'mode': 0, 'skipX': 1, 'skipY': 1, 'count': 10916640}]}
{'statistics': [{'min': 0, 'max': 5943.0625, 'mean': 902.1668471995546, 'standardDeviation': 1262.9922955858558, 'median': 0, 'mode': 0, 'skipX': 1, 'skipY': 1, 'count': 10916640}]}
{'statistics': [{'min': 0, 'max': 23.078125, 'mean': 0.7215633083989065, 'standardDeviation': 2.985524884598739, 'median': 0, 'mode': 0, 'skipX': 1, 'skipY': 1, 'count': 10916640}]}
{'statistics': [{'min': 0, 'max': 9.918212890625e-05, 'mean': 1.7389695579997767e-05, 'standardDeviation': 1.359583727354395e-05, 'median': 1.4780081954656863e-05, 'mode': 7.390040977328431e-06, 'skipX': 1, 'skipY': 1, 'count': 3089016}]}
{'statistics': [{'min': 0, 'max': 116.390625, 'mean': 12.83871389416638, 'standardDeviation': 18.863950329165817, 'median': 3.651470588235294, 'mode': 0, 'skipX': 1, 'skipY': 1, 'count': 3088862}]}
{'statistic

In [3]:
portal_url = 'https://gis.earthdata.nasa.gov/portal'
gis = GIS(portal_url) #anonymous user
search_result = gis.content.search(query="",item_type="Imagery Layer", max_items=1000)

print("Number of Image Servers:", len(search_result),"\n")

# Extract the URLs of the ImageServer layers
image_server_urls = []
image_server_name = []
for item in search_result:
    if item.url:
        if 'Server' in item.url:
            image_server_name.append(item.title)
            image_server_urls.append(item.url)
combined_list = list(zip(image_server_urls, image_server_name))

# convert combined list to data frame
image_server_df = pd.DataFrame(combined_list, columns=['URL', 'Name'])
image_server_df

# extract to search for POWER in df name
power_df = image_server_df[image_server_df['Name'].str.contains("POWER")]
power_df
print("Number of Image Servers with POWER in the name:", len(power_df),"\n")

Number of Image Servers: 175 

Number of Image Servers with POWER in the name: 14 



In [4]:
def get_samples(base_url, name):
    '''
        TODO: Write function summary
        Inputs:
        Outputs:
    '''
    test = 'Get Statistics'
    timestamp = time.strftime("%Y%m%d-%H%M%S")

    stats_url = base_url + '/statistics'
    multi_url = base_url + '/multidimensionalInfo?f=pjson'

    try:
        multi_get = requests.get(multi_url)
        multi_content = multi_get.json()
        variables = multi_content['multidimensionalInfo']['variables']
        names = [var.get('name') for var in variables]
        length = len(names)

        start_time = time.time()

        for var in names:
            params = {
                'variable': var,
                'f': 'json'
            }

            # Run the request
            response = requests.get(stats_url, params)
            stat_content = response.json()
            print(stat_content)
            
        # Calculate the cell run time
        end_time = time.time()
        runtime = end_time - start_time
        
        print(f'Fishing running get statistics for {name}')
        
        # Export details to a CSV file
        with open('stats_logs.csv', mode='a', newline='') as file:
            writer = csv.writer(file)
            # Check if the file is empty to write headers
            if file.tell() == 0:
                writer.writerow(['Start Time', 'Test', 'Num Variables', 'Service Name', 'Cell Run Time'])
            writer.writerow([start_time, test, length, name, runtime])
    
    except(json.JSONDecoder, KeyError) as e:
        print(f"Error decoding JSON or accessing key: {e} for {name}")
        print('Starting single variable statistics retrieval')
        
        # Reset the start time for single variable histogram retrieval
        start_time = time.time()
        
        # Set the file output to JSON
        params = {
            'f': 'json'
        }
        
        # Run request
        response = requests.get(stats_url, params)
        
        # calculate the cell run time
        end_time = time.time()
        runtime = end_time - start_time # Total cell run time
        
        # Export details to a CSV file
        with open('stats_logs.csv', mode='a', newline='') as file:
            writer = csv.writer(file)
            # Check if the file is empty to write headers
            if file.tell() == 0:
                writer.writerow(['Start Time', 'Test', 'Num Variables', 'Base URL', 'Cell Run Time'])
            writer.writerow([start_time, test, '1', name, runtime])
            
        print(f'Finished running single histogram request for {stats_url}')
        return
    
    print('Finished returning all histograms')
    
    return 'done'

In [5]:
for url, name in combined_list:
    get_samples(base_url=url, name=name)

TypeError: catching classes that do not inherit from BaseException is not allowed