In [18]:
import pandas as pd
import requests
import os
import neonutilities as nu
import requests
import json
import re

SERVER = 'http://data.neonscience.org/api/v0/'

input_csv_directory = "C:\\Users\\allen\\OneDrive\\Desktop\\Work\\Scripts\\IceSat2\\sites_by_year"

def find_csv_files_by_site(directory, site):
    matching_files = []

    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        if filename.endswith('.csv') and filename.startswith(f"{site}_"):
            # Split the filename into parts using "_" as the delimiter
            parts = filename.split('_')

            # Ensure the filename has at least 5 parts to match the structure
            if len(parts) >= 5:
                file_site = parts[0]  # First part is the site
                file_year = parts[4]  # Fifth part is the year

                # Build the full file path
                filepath = os.path.join(directory, filename)

                # Append to the results list
                matching_files.append((filepath, file_site, int(file_year)))

    return matching_files

def process_csv_file(filepath, site, year):
    print(f"Processing file: {filepath} for site: {site} in year: {year}")
    # Load the CSV file (assuming it has a 'date' column)
    import pandas as pd
    data = pd.read_csv(filepath)

    # Check for a 'date' column
    if 'date' not in data.columns:
        raise ValueError(f"File {filepath} does not contain a 'date' column.")

    # Iterate over each date in the CSV and download the corresponding image
    for date in data['date']:
        # Call the image download function (replace `download_phenocam_image` with your function)
        download_phenocam_image(site, date)

def download_phenocam_images_from_results(results):
    for filepath, site, year in results:
        try:
            process_csv_file(filepath, site, year)
        except Exception as e:
            print(f"Error processing file {filepath}: {e}")

In [33]:
# Test parameters
phenology = "DP1.00033.001"  # NEON Phenology product code
site_id  = "DEJU"  # Site ID
year = '2019'
month = '05'
day = '16'
save_dir = "test_phenocam_images"  # Temporary directory for testing

sitename = "NEON.D19.DEJU.DP1.00033"
sitename_2 = "canadaojp"
# Construct the specific URL using neonUtilities or manually
url = f"https://phenocam.nau.edu/api/siteimagelist/{sitename_2}/"
print(f"Accessing URL: {url}")
response = requests.get(url, headers={"accept": "application/json"})

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Extract the imagelist array
    imagelist = data.get("imagelist", [])
    
    if imagelist:
        # Save the imagelist to a file
        output_file = f"imagelist_{sitename.replace('.', '_')}.txt"
        with open(output_file, "w") as file:
            for image_url in imagelist:
                file.write(image_url + "\n")
        print(f"Image list saved to {output_file}")
    else:
        print("No images found in the imagelist.")
else:
    print(f"Failed to retrieve data. HTTP Status Code: {response.status_code}")

Accessing URL: https://phenocam.nau.edu/api/siteimagelist/canadaojp/
Image list saved to imagelist_NEON_D19_DEJU_DP1_00033.txt


In [None]:
# Test parameters
phenology = "DP1.00033.001"  # NEON Phenology product code
site_id  = "DEJU"  # Site ID
year = '2019'
month = '05'
day = '16'
save_dir = "test_phenocam_images"  # Temporary directory for testing

sitename = "NEON.D19.DEJU.DP1.00033"
sitename_2 = "canadaojp"
# Construct the specific URL using neonUtilities or manually
url = f"https://phenocam.nau.edu/api/middayimages/{sitename_2}/"
print(f"Accessing URL: {url}")
response = requests.get(url, headers={"accept": "application/json"})

# Check if the request was successful
if response.status_code == 200:
    # Parse the JSON response
    data = response.json()
    
    # Ensure the data is valid and not empty
    if isinstance(data, list) and data:
        # Extract imgpath values from each dictionary
        image_paths = [entry['imgpath'] for entry in data if 'imgpath' in entry]

        if image_paths:
            # Save the image paths to a file
            output_file = f"midday_images_{sitename}.txt"
            with open(output_file, "w") as file:
                for path in image_paths:
                    file.write(path + "\n")
            print(f"Midday image paths saved to {output_file}")
        else:
            print("No image paths found in the data.")
    else:
        print("No valid data found in the response.")
else:
    print(f"Failed to retrieve data. HTTP Status Code: {response.status_code}")

In [28]:
from tqdm import tqdm

snow_train = "canadaojp"
try:
    resp = requests.get(
        f"https://phenocam.nau.edu/webcam/browse/{snow_train}/",
        timeout=5
    )
except requests.exceptions.RequestException as e:
    raise SystemExit(e)
content = resp.content.decode()
# print(content)
year_tags = re.findall(r"<a name=\"[0-9]{4}\">", content)

years = [int(re.search(r"\d+", yt).group()) for yt in year_tags]
# print(years)
dates = pd.date_range(f"{min(years)}-01-01", f"{max(years)}-12-31").strftime("%Y/%m/%d")

print(dates)

# Loop through all dates
root = "https://phenocam.nau.edu"
pattern = re.compile(rf"\/data\/archive\/{snow_train}\/[0-9]{{4}}\/[0-9]{{2}}\/{snow_train}_[0-9]{{4}}_[0-9]{{2}}_[0-9]{{2}}_[0-9]{{6}}\.jpg")
all_photos = []
for d in tqdm(dates):
    try:
        resp = requests.get(
            f"https://phenocam.nau.edu/webcam/browse/{snow_train}/{d}/",
            timeout=5
        )
    except requests.exceptions.RequestException as e:
        continue
    if resp.ok:
        content = resp.content.decode()
        matches = pattern.finditer(content)
        for m in matches:
            all_photos.append(f"{root}{m.group()}")

# Ensure all_photos list has images
if all_photos:
    first_image_url = all_photos[0]  # Get the first image URL
    output_directory = "downloads"  # Directory to save the image
    os.makedirs(output_directory, exist_ok=True)  # Create directory if it doesn't exist
    
    # File name based on the URL
    file_name = os.path.join(output_directory, first_image_url.split("/")[-1])
    
    # Download the image
    try:
        response = requests.get(first_image_url, stream=True, timeout=10)
        if response.status_code == 200:
            with open(file_name, 'wb') as f:
                for chunk in response.iter_content(1024):
                    f.write(chunk)
            print(f"Image downloaded successfully: {file_name}")
        else:
            print(f"Failed to download image: {response.status_code}")
    except requests.exceptions.RequestException as e:
        print(f"Error occurred while downloading: {e}")
else:
    print("No images found in the all_photos list.")

Index(['2014/01/01', '2014/01/02', '2014/01/03', '2014/01/04', '2014/01/05',
       '2014/01/06', '2014/01/07', '2014/01/08', '2014/01/09', '2014/01/10',
       ...
       '2024/12/22', '2024/12/23', '2024/12/24', '2024/12/25', '2024/12/26',
       '2024/12/27', '2024/12/28', '2024/12/29', '2024/12/30', '2024/12/31'],
      dtype='object', length=4018)


  8%|▊         | 304/4018 [03:39<44:40,  1.39it/s]


KeyboardInterrupt: 

In [2]:
results = find_csv_files_by_site(input_csv_directory, "DEJU")
for filepath, site, year in results:
    print(f"File: {filepath}, Site: {site}, Year: {year}")

File: C:\Users\allen\OneDrive\Desktop\Work\Scripts\IceSat2\sites_by_year\DEJU_terrain_vs_canopy_2019_ats10.csv, Site: DEJU, Year: 2019
File: C:\Users\allen\OneDrive\Desktop\Work\Scripts\IceSat2\sites_by_year\DEJU_terrain_vs_canopy_2020_ats10.csv, Site: DEJU, Year: 2020
File: C:\Users\allen\OneDrive\Desktop\Work\Scripts\IceSat2\sites_by_year\DEJU_terrain_vs_canopy_2021_ats10.csv, Site: DEJU, Year: 2021
File: C:\Users\allen\OneDrive\Desktop\Work\Scripts\IceSat2\sites_by_year\DEJU_terrain_vs_canopy_2022_ats10.csv, Site: DEJU, Year: 2022
File: C:\Users\allen\OneDrive\Desktop\Work\Scripts\IceSat2\sites_by_year\DEJU_terrain_vs_canopy_2023_ats10.csv, Site: DEJU, Year: 2023
