In [8]:
#pip install earthengine-api google-auth
#pip install earthengine-api geopandas pandas
#pip install earthengine-api google-auth google-auth-oauthlib

Note: you may need to restart the kernel to use updated packages.


### Environment Setup
This section handles the installation of required packages and imports essential modules. These modules include those for accessing Google Earth Engine, managing credentials, manipulating data, and handling dates.


- **`import ee`**: Imports Google Earth Engine, which allows access to satellite and geospatial data.
- **`import os`**: The `os` module is used for interacting with the operating system, such as reading environment variables. This helps manage file paths and sensitive information like credentials.
- **`import json`**: The `json` library is used to handle JSON files. In this script, it’s used to read and parse the credentials needed for Google Earth Engine authentication.
- **`from google.oauth2 import service_account`**: This import allows for creating credentials that can be used to authenticate to Google services, including Google Earth Engine.
- **`import google.auth.transport.requests`**: This library is required to facilitate HTTP requests needed during the authentication process.

In [None]:
import ee
import os
import json
from google.oauth2 import service_account
import google.auth.transport.requests
import pandas as pd
from datetime import datetime, timedelta

### Load Credentials and Setup Authentication
This section loads the service account key from a JSON file, creates the credentials, and refreshes them to get a valid access token. It then initializes the Google Earth Engine API with these credentials. If the authentication is successful, a confirmation message is printed.


In [None]:
# Path to your service account key file
key_path = r'C:\FWI Python\Weather_data\fwiweather-0f2d02e48d16.json'

# Load the service account key
with open(key_path, 'r') as f:
    credentials_dict = json.load(f)
    
# Create credentials using the service account key
credentials = service_account.Credentials.from_service_account_info(
    credentials_dict,
    scopes=["https://www.googleapis.com/auth/cloud-platform"]
)

# Refresh the credentials to get a valid access token
auth_req = google.auth.transport.requests.Request()
credentials.refresh(auth_req)

# Initialize the Earth Engine module with the credentials
try:
    ee.Initialize(credentials)
    print('Successfully authenticated with Google Earth Engine!')
except ee.EEException as e:
    print(f'Failed to authenticate: {e}')
except Exception as e:
    print(f'An error occurred: {e}')

### Function: Get Hourly Weather Data
This function retrieves hourly weather data from Google Earth Engine for a specified point (latitude and longitude) and date range. It returns the weather data as a list of records.


In [None]:
def get_hourly_weather_data(lat, lon, start_date, end_date):
    # Define the location
    point = ee.Geometry.Point(lon, lat)
    
    # Define the satellite data collection
    collection = ee.ImageCollection('ECMWF/ERA5_LAND/HOURLY')\
                    .filterDate(start_date, end_date)\
                    .filterBounds(point)
    
    # Get the data over the specified period
    data = collection.getRegion(point, 1000).getInfo()
    
    return data

### Function: Process Batch Data
This function processes the raw batch data retrieved from Google Earth Engine. It converts the data into a pandas DataFrame and formats the timestamp to a datetime object for easier manipulation.


In [None]:
def process_batch(batch_data):
    columns = batch_data[0]
    rows = batch_data[1:]
    df_batch = pd.DataFrame(rows, columns=columns)
    df_batch['time'] = pd.to_datetime(df_batch['time'], unit='ms')
    return df_batch

### Function: Fetch and Save Data
This function fetches weather data for a given pond in weekly batches and saves it to a CSV file. It processes the data iteratively, writing headers first and appending new data batches until the entire date range is covered.


In [None]:
def fetch_and_save_data(lat, lon, start_date, end_date, pond_id, output_file, batch_size='1W'):
    start = datetime.strptime(start_date, '%Y-%m-%d')
    end = datetime.strptime(end_date, '%Y-%m-%d')
    
    # Write header to CSV
    header = ['time', 'dewpoint_temperature_2m', 'temperature_2m', 'skin_temperature', 
              'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 
              'soil_temperature_level_4', 'latitude', 'longitude', 'Pond ID']
    with open(output_file, 'w') as f:
        f.write(','.join(header) + '\n')
    
    while start < end:
        batch_end = min(start + timedelta(days=7), end)  # 1 week batch or less if near the end
        
        print(f'Fetching data for {pond_id} from {start.strftime("%Y-%m-%d")} to {batch_end.strftime("%Y-%m-%d")}')
        
        batch_data = get_hourly_weather_data(lat, lon, start.strftime('%Y-%m-%d'), batch_end.strftime('%Y-%m-%d'))
        df_batch = process_batch(batch_data)
        
        df_batch['latitude'] = lat
        df_batch['longitude'] = lon
        df_batch['Pond ID'] = pond_id
        
        # Append to CSV
        df_batch.to_csv(output_file, mode='a', header=False, index=False)
        
        start = batch_end

### Pond Data Collection and Processing
This section defines the location information for each pond, including coordinates and date ranges. It then iterates over each pond and calls the `fetch_and_save_data` function to process and save weather data for each pond into individual CSV files.


In [3]:
# Define the locations and corresponding Pond IDs with their respective date ranges
ponds_info = {
    'WG-AJU1': {'lat': 16.6330, 'lon': 81.1494, 'start_date': '2022-02-24', 'end_date': '2024-05-15'},
    'WG-CRP1': {'lat': 16.6306, 'lon': 81.1502, 'start_date': '2021-11-18', 'end_date': '2024-05-05'},
    'WG-VMS1': {'lat': 16.6173, 'lon': 81.1349, 'start_date': '2022-06-08', 'end_date': '2024-05-15'}
}

# Process each pond separately
for pond_id, info in ponds_info.items():
    output_file = f'hourly_weather_data_{pond_id}.csv'
    fetch_and_save_data(info['lat'], info['lon'], info['start_date'], info['end_date'], pond_id, output_file)

print("All data has been processed and saved.")

Successfully authenticated with Google Earth Engine!
Fetching data for WG-AJU1 from 2022-02-24 to 2022-03-03
Fetching data for WG-AJU1 from 2022-03-03 to 2022-03-10
Fetching data for WG-AJU1 from 2022-03-10 to 2022-03-17
Fetching data for WG-AJU1 from 2022-03-17 to 2022-03-24
Fetching data for WG-AJU1 from 2022-03-24 to 2022-03-31
Fetching data for WG-AJU1 from 2022-03-31 to 2022-04-07
Fetching data for WG-AJU1 from 2022-04-07 to 2022-04-14
Fetching data for WG-AJU1 from 2022-04-14 to 2022-04-21
Fetching data for WG-AJU1 from 2022-04-21 to 2022-04-28
Fetching data for WG-AJU1 from 2022-04-28 to 2022-05-05
Fetching data for WG-AJU1 from 2022-05-05 to 2022-05-12
Fetching data for WG-AJU1 from 2022-05-12 to 2022-05-19
Fetching data for WG-AJU1 from 2022-05-19 to 2022-05-26
Fetching data for WG-AJU1 from 2022-05-26 to 2022-06-02
Fetching data for WG-AJU1 from 2022-06-02 to 2022-06-09
Fetching data for WG-AJU1 from 2022-06-09 to 2022-06-16
Fetching data for WG-AJU1 from 2022-06-16 to 2022-0

Fetching data for WG-CRP1 from 2022-06-16 to 2022-06-23
Fetching data for WG-CRP1 from 2022-06-23 to 2022-06-30
Fetching data for WG-CRP1 from 2022-06-30 to 2022-07-07
Fetching data for WG-CRP1 from 2022-07-07 to 2022-07-14
Fetching data for WG-CRP1 from 2022-07-14 to 2022-07-21
Fetching data for WG-CRP1 from 2022-07-21 to 2022-07-28
Fetching data for WG-CRP1 from 2022-07-28 to 2022-08-04
Fetching data for WG-CRP1 from 2022-08-04 to 2022-08-11
Fetching data for WG-CRP1 from 2022-08-11 to 2022-08-18
Fetching data for WG-CRP1 from 2022-08-18 to 2022-08-25
Fetching data for WG-CRP1 from 2022-08-25 to 2022-09-01
Fetching data for WG-CRP1 from 2022-09-01 to 2022-09-08
Fetching data for WG-CRP1 from 2022-09-08 to 2022-09-15
Fetching data for WG-CRP1 from 2022-09-15 to 2022-09-22
Fetching data for WG-CRP1 from 2022-09-22 to 2022-09-29
Fetching data for WG-CRP1 from 2022-09-29 to 2022-10-06
Fetching data for WG-CRP1 from 2022-10-06 to 2022-10-13
Fetching data for WG-CRP1 from 2022-10-13 to 202

Fetching data for WG-VMS1 from 2023-05-10 to 2023-05-17
Fetching data for WG-VMS1 from 2023-05-17 to 2023-05-24
Fetching data for WG-VMS1 from 2023-05-24 to 2023-05-31
Fetching data for WG-VMS1 from 2023-05-31 to 2023-06-07
Fetching data for WG-VMS1 from 2023-06-07 to 2023-06-14
Fetching data for WG-VMS1 from 2023-06-14 to 2023-06-21
Fetching data for WG-VMS1 from 2023-06-21 to 2023-06-28
Fetching data for WG-VMS1 from 2023-06-28 to 2023-07-05
Fetching data for WG-VMS1 from 2023-07-05 to 2023-07-12
Fetching data for WG-VMS1 from 2023-07-12 to 2023-07-19
Fetching data for WG-VMS1 from 2023-07-19 to 2023-07-26
Fetching data for WG-VMS1 from 2023-07-26 to 2023-08-02
Fetching data for WG-VMS1 from 2023-08-02 to 2023-08-09
Fetching data for WG-VMS1 from 2023-08-09 to 2023-08-16
Fetching data for WG-VMS1 from 2023-08-16 to 2023-08-23
Fetching data for WG-VMS1 from 2023-08-23 to 2023-08-30
Fetching data for WG-VMS1 from 2023-08-30 to 2023-09-06
Fetching data for WG-VMS1 from 2023-09-06 to 202