### Brief Description

This notebook was used for downloading temperature and precipitation data from NASA Power API. I am downloading wind, temperature and precipitation data

#### Imports

In [93]:
import requests
import pandas as pd
from pathlib import Path
from datetime import date


#### Configs

In [94]:
## Config

LOCATIONS = {
    "Story_County_IA": {"lat_min": 41.85, "lat_max": 44.20, "lon_min": -93.81, "lon_max": -90.19},
    "McLean_County_IL": {"lat_min": 40.21, "lat_max": 42.85, "lon_min": -89.23, "lon_max": -86.47}
}

PARAMS=['PRECTOTCORR', 
        'T2M_MAX', 
        'T2M_MIN', 
        'WS10M',
        'ALLSKY_SFC_SW_DWN',
        'RH2M']

START_YEAR = 2014
END_YEAR = 2025

OUTPUT_DIR = Path("C:/Users/Arnold/OneDrive/Desktop/CAPSTONE PROJECT/farming_risk_regions/data/raw/weather_data")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

INTERIM_DIR = Path("C:/Users/Arnold/OneDrive/Desktop/CAPSTONE PROJECT/farming_risk_regions/data/interim/weather_data")
INTERIM_DIR.mkdir(parents=True, exist_ok=True)

BASE_URL = 'https://power.larc.nasa.gov/api/temporal/daily/regional'


#### Downloading the Data

In [95]:
## Looping through parameters and locations to download data

for param in PARAMS:

    for location, coords in LOCATIONS.items():

        all_records = []

        output_file = OUTPUT_DIR / f"{location}_{param}_data.csv"

        # Looping over years

        for year in range(START_YEAR, END_YEAR):

            year_start = date(year, 1, 1)
            year_end = date(year, 12, 31) if year < END_YEAR else date(year, 8, 31) 

            try:
                # Construct API request. Refer to https://power.larc.nasa.gov/docs/v1/api/
                params = {
                    "parameters": param,
                    "community": "AG", # Agricultural community
                    "longitude-min": coords['lon_min'], 
                    "longitude-max": coords['lon_max'],
                    "latitude-min": coords['lat_min'],
                    "latitude-max": coords['lat_max'],
                    "start": year_start.strftime("%Y%m%d"),
                    "end": year_end.strftime("%Y%m%d"),
                    "format": "JSON"
                }

                response = requests.get(BASE_URL, params=params, timeout=10)
                print(f"Request URL: {response.url}")
                
                response.raise_for_status()
                data = response.json()
                
                # Parsing and storing the data

                if 'features' in data and data['features']:
                    param_data = data["features"][0]["properties"]["parameter"].get(param, {})
                    year_records = [
                        {"date": f"{timestamp[:4]}-{timestamp[4:6]}-{timestamp[6:8]}",
                         param: value if value != -999 else None}
                        for timestamp, value in param_data.items()
                    ]
                    all_records.extend(year_records)
                else:
                    print(f"Error: Missing expected data fields in response for {location}, param {param}, year {year}.")
                    print(f"Response: {str(data)[:200]}")
            
            except requests.exceptions.HTTPError as e:
                print(f"Error fetching data for {location}, param {param}, year {year}: {e}")
                continue
            
            except requests.exceptions.RequestException as e:
                print(f"Unexpected error for {location}, param {param}, year {year}: {e}")
                continue
            
            except (KeyError, IndexError) as e:
                print(f"Error: Missing expected data fields in response for {location}, param {param}, year {year}.")
                print(f"Response: {str(data)[:200]}")
                continue
        
        print(f"{location} {param}: total records for all years = {len(all_records)}")
        print(f"Intended output file: {output_file}")
        
        # Save to CSV
        if all_records:
            df = pd.DataFrame(all_records)
            df.to_csv(output_file, index=False)
            print(f"Saved {len(df)} records for {location} ({param}) to {output_file}")
            print(df.head())
        else:
            print(f"No data to save for {location} ({param}).")

Request URL: https://power.larc.nasa.gov/api/temporal/daily/regional?parameters=PRECTOTCORR&community=AG&longitude-min=-93.81&longitude-max=-90.19&latitude-min=41.85&latitude-max=44.2&start=20140101&end=20141231&format=JSON
Request URL: https://power.larc.nasa.gov/api/temporal/daily/regional?parameters=PRECTOTCORR&community=AG&longitude-min=-93.81&longitude-max=-90.19&latitude-min=41.85&latitude-max=44.2&start=20150101&end=20151231&format=JSON
Request URL: https://power.larc.nasa.gov/api/temporal/daily/regional?parameters=PRECTOTCORR&community=AG&longitude-min=-93.81&longitude-max=-90.19&latitude-min=41.85&latitude-max=44.2&start=20150101&end=20151231&format=JSON
Request URL: https://power.larc.nasa.gov/api/temporal/daily/regional?parameters=PRECTOTCORR&community=AG&longitude-min=-93.81&longitude-max=-90.19&latitude-min=41.85&latitude-max=44.2&start=20160101&end=20161231&format=JSON
Request URL: https://power.larc.nasa.gov/api/temporal/daily/regional?parameters=PRECTOTCORR&community=AG

#### Merging the dataset for each location

In [96]:
for location in LOCATIONS.keys():
    pattern = f"{location}_*_data.csv"
    files = sorted(OUTPUT_DIR.glob(pattern))       

    if file is None:
        print(f"No files found for location {location} with pattern {pattern}")
        continue
    else:
        print(f"\nFound {len(files)} files for location {location}: {[file.name for file in files]}")


    merge_df = None

    for file in files:
        df = pd.read_csv(file)
        
        if merge_df is None:
            merge_df = df
        else:
            merge_df = pd.merge(merge_df, df, on="date", how="outer")
    
    if merge_df is not None:
        merged_output_file = INTERIM_DIR / f"{location}_all_params.csv"
        merge_df.to_csv(merged_output_file, index=False)
        print(f"\nSaved merged data for {location} to {merged_output_file}")
        print("\n", merge_df.head())



Found 6 files for location Story_County_IA: ['Story_County_IA_ALLSKY_SFC_SW_DWN_data.csv', 'Story_County_IA_PRECTOTCORR_data.csv', 'Story_County_IA_RH2M_data.csv', 'Story_County_IA_T2M_MAX_data.csv', 'Story_County_IA_T2M_MIN_data.csv', 'Story_County_IA_WS10M_data.csv']

Saved merged data for Story_County_IA to C:\Users\Arnold\OneDrive\Desktop\CAPSTONE PROJECT\farming_risk_regions\data\interim\weather_data\Story_County_IA_all_params.csv

          date  ALLSKY_SFC_SW_DWN  PRECTOTCORR   RH2M  T2M_MAX  T2M_MIN  WS10M
0  2014-01-01               3.42         1.20  77.96   -13.07   -17.98   6.41
1  2014-01-02               8.17         0.00  77.08   -11.63   -18.55   4.37
2  2014-01-03               4.86         0.00  84.33    -2.26   -18.78   8.83
3  2014-01-04               3.20         0.01  72.45    -1.70   -12.34   8.15
4  2014-01-05               6.04         0.00  56.36   -13.14   -23.80  10.53

Found 6 files for location McLean_County_IL: ['McLean_County_IL_ALLSKY_SFC_SW_DWN_data.c