# Anameka South - ACCESS CM2 Daily Data Extraction

This notebook extracts daily time series data from NetCDF files for a specific coordinate.

## Variables: tasmax, tasmin, pr
## Scenarios: SSP585, SSP245


In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import os
from pathlib import Path
import glob
from tqdm import tqdm
import time

# Configuration
SSP585_DIR = r"C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 SSP585"
SSP245_DIR = r"C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 SSP245"
OUTPUT_DIR = r"C:\Users\ibian\Desktop\ClimAdapt\Anameka"

# Variables to process
VARIABLES = ['tasmax', 'tasmin', 'pr']
SCENARIOS = ['SSP585', 'SSP245']

# Coordinate matching tolerance (degrees)
COORD_TOLERANCE = 0.01  # Approximately 1.1 km

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

print("Configuration loaded:")
print(f"  - SSP585 directory: {SSP585_DIR}")
print(f"  - SSP245 directory: {SSP245_DIR}")
print(f"  - Output directory: {OUTPUT_DIR}")
print(f"  - Variables: {', '.join(VARIABLES)}")
print(f"  - Scenarios: {', '.join(SCENARIOS)}")


Configuration loaded:
  - SSP585 directory: C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 SSP585
  - SSP245 directory: C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 SSP245
  - Output directory: C:\Users\ibian\Desktop\ClimAdapt\Anameka
  - Variables: tasmax, tasmin, pr
  - Scenarios: SSP585, SSP245


## 2. Specify Target Coordinate

**Enter the latitude and longitude for the grid point you want to extract:**


In [2]:
# TARGET COORDINATE
TARGET_LAT = -31.75  # Latitude (degrees)
TARGET_LON = 117.5999984741211   # Longitude (degrees)

print(f"Target coordinate:")
print(f"  Latitude: {TARGET_LAT}")
print(f"  Longitude: {TARGET_LON}")
print(f"  Tolerance: {COORD_TOLERANCE} degrees")

Target coordinate:
  Latitude: -31.75
  Longitude: 117.5999984741211
  Tolerance: 0.01 degrees


In [3]:
def extract_daily_data_from_netcdf(netcdf_dir, variable, target_lat, target_lon, tolerance=0.01):
    """
    Extract daily time series data for a specific coordinate from NetCDF files.
    Optimized version with progress reporting and cached coordinate indices.
    
    Parameters:
    -----------
    netcdf_dir : str
        Directory containing NetCDF files for the variable
    variable : str
        Variable name (tasmax, tasmin, or pr)
    target_lat : float
        Target latitude
    target_lon : float
        Target longitude
    tolerance : float
        Coordinate matching tolerance in degrees
    
    Returns:
    --------
    pd.DataFrame
        DataFrame with columns: date, value
    """
    start_time = time.time()
    
    # Find all NetCDF files in the directory
    nc_files = sorted(glob.glob(os.path.join(netcdf_dir, f"*{variable}*.nc")))
    
    if len(nc_files) == 0:
        print(f"  ERROR: No NetCDF files found in {netcdf_dir}")
        return None
    
    print(f"  Found {len(nc_files)} NetCDF files")
    
    # Cache coordinate information from first file
    lat_name = None
    lon_name = None
    time_name = None
    lat_idx = None
    lon_idx = None
    actual_lat = None
    actual_lon = None
    var_name = None
    
    # List to store daily data
    all_data = []
    
    # Process first file to get coordinate structure
    if len(nc_files) > 0:
        try:
            ds_sample = xr.open_dataset(nc_files[0], decode_times=False)
            
            # Get variable name
            for v in ds_sample.data_vars:
                if variable in v.lower() or v.lower() in variable.lower():
                    var_name = v
                    break
            
            if var_name is None:
                possible_names = [variable, variable.upper(), f'{variable}_day']
                for name in possible_names:
                    if name in ds_sample.data_vars:
                        var_name = name
                        break
            
            # Get coordinate names
            for coord in ds_sample.coords:
                coord_lower = coord.lower()
                if 'lat' in coord_lower:
                    lat_name = coord
                elif 'lon' in coord_lower:
                    lon_name = coord
                elif 'time' in coord_lower:
                    time_name = coord
            
            if lat_name and lon_name:
                # Find nearest grid point (cache indices)
                lat_idx = np.abs(ds_sample[lat_name].values - target_lat).argmin()
                lon_idx = np.abs(ds_sample[lon_name].values - target_lon).argmin()
                
                actual_lat = float(ds_sample[lat_name].values[lat_idx])
                actual_lon = float(ds_sample[lon_name].values[lon_idx])
                
                # Check if within tolerance
                if abs(actual_lat - target_lat) > tolerance or abs(actual_lon - target_lon) > tolerance:
                    print(f"  Warning: Nearest point ({actual_lat:.4f}, {actual_lon:.4f}) is outside tolerance")
                else:
                    print(f"  Using grid point: ({actual_lat:.4f}, {actual_lon:.4f})")
            
            ds_sample.close()
            
        except Exception as e:
            print(f"  Warning: Could not read sample file: {e}")
    
    if var_name is None or lat_idx is None or lon_idx is None:
        print(f"  ERROR: Could not determine coordinate structure")
        return None
    
    # Process all files with progress bar
    print(f"  Processing files...")
    for nc_file in tqdm(nc_files, desc=f"  {variable}", unit="file"):
        try:
            # Open NetCDF file with minimal decoding for speed
            ds = xr.open_dataset(nc_file, decode_times=False)
            
            # Extract data using cached indices
            data = ds[var_name].isel({lat_name: lat_idx, lon_name: lon_idx})
            
            # Get time values - extract year from filename for simplicity and speed
            import re
            year_match = re.search(r'(\d{4})', os.path.basename(nc_file))
            if year_match:
                year = int(year_match.group(1))
                # Create daily dates for the year (handles leap years automatically)
                time_values = pd.date_range(start=f'{year}-01-01', end=f'{year}-12-31', freq='D')
            else:
                # Fallback: use index if year not found
                time_values = pd.date_range(start='2000-01-01', periods=len(data.values), freq='D')
            
            # Convert to numpy array (load into memory)
            values = data.values
            if values.ndim > 1:
                values = values.flatten()
            
            # Create DataFrame for this file
            if len(values) == len(time_values):
                df_file = pd.DataFrame({
                    'date': time_values[:len(values)],
                    'value': values
                })
                all_data.append(df_file)
            
            ds.close()
            
        except Exception as e:
            tqdm.write(f"    Error processing {os.path.basename(nc_file)}: {e}")
            continue
    
    if len(all_data) == 0:
        print(f"  ERROR: No data extracted")
        return None
    
    # Combine all data
    print(f"  Combining data from {len(all_data)} files...")
    combined_df = pd.concat(all_data, ignore_index=True)
    
    # Sort by date
    combined_df = combined_df.sort_values('date').reset_index(drop=True)
    
    # Remove duplicate dates (keep first occurrence)
    combined_df = combined_df.drop_duplicates(subset='date', keep='first')
    
    elapsed_time = time.time() - start_time
    print(f"  ✓ Extracted {len(combined_df):,} daily records in {elapsed_time:.1f} seconds")
    print(f"  Date range: {combined_df['date'].min()} to {combined_df['date'].max()}")
    
    return combined_df


In [4]:
# Process each scenario and variable
results_summary = {}
total_start_time = time.time()

print(f"\n{'='*70}")
print(f"STARTING EXTRACTION PROCESS")
print(f"{'='*70}")
print(f"Total tasks: {len(SCENARIOS) * len(VARIABLES)} (2 scenarios × 3 variables)")
print(f"Target coordinate: ({TARGET_LAT}, {TARGET_LON})")
print(f"{'='*70}\n")

task_num = 0
total_tasks = len(SCENARIOS) * len(VARIABLES)

for scenario in SCENARIOS:
    print(f"\n{'='*70}")
    print(f"Processing Scenario: {scenario}")
    print(f"{'='*70}")
    
    # Select directory based on scenario
    if scenario == 'SSP585':
        base_dir = SSP585_DIR
    elif scenario == 'SSP245':
        base_dir = SSP245_DIR
    else:
        print(f"  ERROR: Unknown scenario {scenario}")
        continue
    
    for variable in VARIABLES:
        task_num += 1
        print(f"\n{'-'*70}")
        print(f"Task {task_num}/{total_tasks}: Processing {variable} ({scenario})")
        print(f"{'-'*70}")
        
        task_start_time = time.time()
        
        # Construct directory path for this variable
        var_dir = os.path.join(base_dir, f"{variable}_ACCESS CM2 {scenario}")
        
        if not os.path.exists(var_dir):
            print(f"  ERROR: Directory not found: {var_dir}")
            continue
        
        # Extract daily data
        daily_data = extract_daily_data_from_netcdf(
            var_dir,
            variable,
            TARGET_LAT,
            TARGET_LON,
            tolerance=COORD_TOLERANCE
        )
        
        if daily_data is None or len(daily_data) == 0:
            print(f"  WARNING: No data extracted for {variable} ({scenario})")
            continue
        
        # Prepare output filename
        output_filename = f"Anameka South_ACCESS CM2_{variable}_{scenario}.csv"
        output_path = os.path.join(OUTPUT_DIR, output_filename)
        
        # Export to CSV (keep only date and value columns for tidy format)
        print(f"  Exporting to CSV...")
        output_df = daily_data[['date', 'value']].copy()
        output_df.to_csv(
            output_path,
            index=False,
            encoding='utf-8',
            float_format='%.6f'
        )
        
        task_elapsed = time.time() - task_start_time
        print(f"  ✓ Exported to CSV: {os.path.basename(output_path)}")
        print(f"  ✓ Rows: {len(output_df):,} | Time: {task_elapsed:.1f}s")
        
        # Store summary
        key = f"{variable}_{scenario}"
        results_summary[key] = {
            'rows': len(output_df),
            'date_range': f"{output_df['date'].min()} to {output_df['date'].max()}",
            'output_file': output_filename,
            'time_seconds': task_elapsed
        }

total_elapsed = time.time() - total_start_time
print(f"\n{'='*70}")
print(f"ALL TASKS COMPLETED")
print(f"{'='*70}")
print(f"Total processing time: {total_elapsed:.1f} seconds ({total_elapsed/60:.1f} minutes)")
print(f"{'='*70}")



STARTING EXTRACTION PROCESS
Total tasks: 6 (2 scenarios × 3 variables)
Target coordinate: (-31.75, 117.5999984741211)


Processing Scenario: SSP585

----------------------------------------------------------------------
Task 1/6: Processing tasmax (SSP585)
----------------------------------------------------------------------
  Found 30 NetCDF files


  Using grid point: (-31.7500, 117.6000)
  Processing files...


  tasmax:   0%|          | 0/30 [00:00<?, ?file/s]

  tasmax:   3%|▎         | 1/30 [00:04<02:24,  4.97s/file]

  tasmax:   7%|▋         | 2/30 [00:09<02:14,  4.82s/file]

  tasmax:  10%|█         | 3/30 [00:14<02:08,  4.74s/file]

  tasmax:  13%|█▎        | 4/30 [00:19<02:02,  4.72s/file]

  tasmax:  17%|█▋        | 5/30 [00:23<01:54,  4.57s/file]

  tasmax:  20%|██        | 6/30 [00:25<01:28,  3.67s/file]

  tasmax:  23%|██▎       | 7/30 [00:29<01:28,  3.87s/file]

  tasmax:  27%|██▋       | 8/30 [00:34<01:29,  4.08s/file]

  tasmax:  30%|███       | 9/30 [00:38<01:29,  4.24s/file]

  tasmax:  33%|███▎      | 10/30 [00:43<01:26,  4.33s/file]

  tasmax:  37%|███▋      | 11/30 [00:47<01:24,  4.46s/file]

  tasmax:  40%|████      | 12/30 [00:52<01:21,  4.54s/file]

  tasmax:  43%|████▎     | 13/30 [00:57<01:18,  4.65s/file]

  tasmax:  47%|████▋     | 14/30 [01:02<01:14,  4.67s/file]

  tasmax:  50%|█████     | 15/30 [01:07<01:10,  4.73s/file]

  tasmax:  53%|█████▎    | 16/30 [01:11<01:06,  4.73s/file]

  tasmax:  57%|█████▋    | 17/30 [01:16<01:01,  4.76s/file]

  tasmax:  60%|██████    | 18/30 [01:21<00:57,  4.80s/file]

  tasmax:  63%|██████▎   | 19/30 [01:26<00:52,  4.81s/file]

  tasmax:  67%|██████▋   | 20/30 [01:31<00:48,  4.83s/file]

  tasmax:  70%|███████   | 21/30 [01:36<00:43,  4.83s/file]

  tasmax:  73%|███████▎  | 22/30 [01:41<00:38,  4.87s/file]

  tasmax:  77%|███████▋  | 23/30 [01:45<00:33,  4.85s/file]

  tasmax:  80%|████████  | 24/30 [01:50<00:29,  4.85s/file]

  tasmax:  83%|████████▎ | 25/30 [01:55<00:24,  4.84s/file]

  tasmax:  87%|████████▋ | 26/30 [02:00<00:18,  4.74s/file]

  tasmax:  90%|█████████ | 27/30 [02:04<00:14,  4.72s/file]

  tasmax:  93%|█████████▎| 28/30 [02:09<00:09,  4.70s/file]

  tasmax:  97%|█████████▋| 29/30 [02:13<00:04,  4.66s/file]

  tasmax: 100%|██████████| 30/30 [02:18<00:00,  4.67s/file]

  tasmax: 100%|██████████| 30/30 [02:18<00:00,  4.62s/file]




  Combining data from 29 files...
  ✓ Extracted 10,592 daily records in 140.2 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_tasmax_SSP585.csv
  ✓ Rows: 10,592 | Time: 140.3s

----------------------------------------------------------------------
Task 2/6: Processing tasmin (SSP585)
----------------------------------------------------------------------
  Found 30 NetCDF files
  Using grid point: (-31.7500, 117.6000)
  Processing files...


  tasmin:   0%|          | 0/30 [00:00<?, ?file/s]

  tasmin:   3%|▎         | 1/30 [00:04<02:16,  4.72s/file]

  tasmin:   7%|▋         | 2/30 [00:09<02:15,  4.83s/file]

  tasmin:  10%|█         | 3/30 [00:14<02:10,  4.82s/file]

  tasmin:  13%|█▎        | 4/30 [00:18<02:02,  4.70s/file]

  tasmin:  17%|█▋        | 5/30 [00:20<01:32,  3.71s/file]

  tasmin:  20%|██        | 6/30 [00:22<01:14,  3.12s/file]

  tasmin:  23%|██▎       | 7/30 [00:24<01:02,  2.73s/file]

  tasmin:  27%|██▋       | 8/30 [00:26<00:54,  2.49s/file]

  tasmin:  30%|███       | 9/30 [00:28<00:49,  2.33s/file]

  tasmin:  33%|███▎      | 10/30 [00:30<00:44,  2.22s/file]

  tasmin:  37%|███▋      | 11/30 [00:32<00:40,  2.14s/file]

  tasmin:  40%|████      | 12/30 [00:34<00:37,  2.08s/file]

  tasmin:  43%|████▎     | 13/30 [00:36<00:34,  2.05s/file]

  tasmin:  47%|████▋     | 14/30 [00:38<00:32,  2.04s/file]

  tasmin:  50%|█████     | 15/30 [00:40<00:30,  2.03s/file]

  tasmin:  53%|█████▎    | 16/30 [00:42<00:28,  2.02s/file]

  tasmin:  57%|█████▋    | 17/30 [00:44<00:25,  2.00s/file]

  tasmin:  60%|██████    | 18/30 [00:46<00:24,  2.01s/file]

  tasmin:  63%|██████▎   | 19/30 [00:48<00:22,  2.03s/file]

  tasmin:  67%|██████▋   | 20/30 [00:50<00:20,  2.03s/file]

  tasmin:  70%|███████   | 21/30 [00:52<00:18,  2.04s/file]

  tasmin:  73%|███████▎  | 22/30 [00:54<00:16,  2.04s/file]

  tasmin:  77%|███████▋  | 23/30 [00:56<00:14,  2.03s/file]

  tasmin:  80%|████████  | 24/30 [00:58<00:12,  2.01s/file]

  tasmin:  83%|████████▎ | 25/30 [01:00<00:10,  2.01s/file]

  tasmin:  87%|████████▋ | 26/30 [01:02<00:08,  2.00s/file]

  tasmin:  90%|█████████ | 27/30 [01:04<00:06,  2.01s/file]

  tasmin:  97%|█████████▋| 29/30 [01:04<00:01,  1.11s/file]

  tasmin: 100%|██████████| 30/30 [01:05<00:00,  2.17s/file]




  Combining data from 26 files...
  ✓ Extracted 9,497 daily records in 65.1 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_tasmin_SSP585.csv
  ✓ Rows: 9,497 | Time: 65.1s

----------------------------------------------------------------------
Task 3/6: Processing pr (SSP585)
----------------------------------------------------------------------
  Found 28 NetCDF files
  Using grid point: (-31.7500, 117.6000)
  Processing files...


  pr:   0%|          | 0/28 [00:00<?, ?file/s]

  pr:   4%|▎         | 1/28 [00:02<00:55,  2.05s/file]

  pr:   7%|▋         | 2/28 [00:04<00:53,  2.07s/file]

  pr:  11%|█         | 3/28 [00:06<00:51,  2.07s/file]

  pr:  14%|█▍        | 4/28 [00:08<00:49,  2.08s/file]

  pr:  18%|█▊        | 5/28 [00:10<00:48,  2.10s/file]

  pr:  21%|██▏       | 6/28 [00:12<00:46,  2.11s/file]

  pr:  25%|██▌       | 7/28 [00:14<00:44,  2.10s/file]

  pr:  29%|██▊       | 8/28 [00:16<00:42,  2.13s/file]

  pr:  32%|███▏      | 9/28 [00:19<00:40,  2.15s/file]

  pr:  36%|███▌      | 10/28 [00:21<00:38,  2.11s/file]

  pr:  39%|███▉      | 11/28 [00:23<00:36,  2.12s/file]

  pr:  43%|████▎     | 12/28 [00:25<00:33,  2.11s/file]

  pr:  46%|████▋     | 13/28 [00:27<00:31,  2.12s/file]

  pr:  50%|█████     | 14/28 [00:29<00:29,  2.13s/file]

  pr:  54%|█████▎    | 15/28 [00:31<00:27,  2.12s/file]

  pr:  57%|█████▋    | 16/28 [00:33<00:25,  2.14s/file]

  pr:  61%|██████    | 17/28 [00:35<00:23,  2.11s/file]

  pr:  64%|██████▍   | 18/28 [00:38<00:23,  2.34s/file]

  pr:  68%|██████▊   | 19/28 [00:43<00:28,  3.16s/file]

  pr:  71%|███████▏  | 20/28 [00:48<00:29,  3.75s/file]

  pr:  75%|███████▌  | 21/28 [00:54<00:29,  4.15s/file]

  pr:  79%|███████▊  | 22/28 [00:59<00:26,  4.49s/file]

  pr:  82%|████████▏ | 23/28 [01:04<00:23,  4.65s/file]

  pr:  86%|████████▌ | 24/28 [01:09<00:19,  4.87s/file]

  pr:  89%|████████▉ | 25/28 [01:14<00:14,  4.99s/file]

  pr:  93%|█████████▎| 26/28 [01:20<00:10,  5.02s/file]

  pr:  96%|█████████▋| 27/28 [01:25<00:05,  5.03s/file]

  pr: 100%|██████████| 28/28 [01:30<00:00,  5.09s/file]

  pr: 100%|██████████| 28/28 [01:30<00:00,  3.23s/file]




  Combining data from 27 files...
  ✓ Extracted 9,862 daily records in 90.4 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_pr_SSP585.csv
  ✓ Rows: 9,862 | Time: 90.4s

Processing Scenario: SSP245

----------------------------------------------------------------------
Task 4/6: Processing tasmax (SSP245)
----------------------------------------------------------------------
  Found 30 NetCDF files
  Using grid point: (-31.7500, 117.6000)
  Processing files...


  tasmax:   0%|          | 0/30 [00:00<?, ?file/s]

  tasmax:   3%|▎         | 1/30 [00:04<02:19,  4.81s/file]

  tasmax:   7%|▋         | 2/30 [00:09<02:14,  4.80s/file]

  tasmax:  10%|█         | 3/30 [00:14<02:09,  4.81s/file]

  tasmax:  13%|█▎        | 4/30 [00:19<02:05,  4.82s/file]

  tasmax:  17%|█▋        | 5/30 [00:24<02:02,  4.89s/file]

  tasmax:  20%|██        | 6/30 [00:28<01:55,  4.82s/file]

  tasmax:  23%|██▎       | 7/30 [00:33<01:51,  4.86s/file]

  tasmax:  27%|██▋       | 8/30 [00:38<01:48,  4.91s/file]

  tasmax:  30%|███       | 9/30 [00:43<01:43,  4.92s/file]

  tasmax:  33%|███▎      | 10/30 [00:48<01:38,  4.93s/file]

  tasmax:  37%|███▋      | 11/30 [00:53<01:34,  4.95s/file]

  tasmax:  40%|████      | 12/30 [00:58<01:30,  5.02s/file]

  tasmax:  43%|████▎     | 13/30 [01:03<01:23,  4.94s/file]

  tasmax:  47%|████▋     | 14/30 [01:08<01:18,  4.91s/file]

  tasmax:  50%|█████     | 15/30 [01:13<01:12,  4.82s/file]

  tasmax:  53%|█████▎    | 16/30 [01:17<01:06,  4.78s/file]

  tasmax:  57%|█████▋    | 17/30 [01:22<01:01,  4.74s/file]

  tasmax:  60%|██████    | 18/30 [01:27<00:56,  4.75s/file]

  tasmax:  63%|██████▎   | 19/30 [01:32<00:52,  4.76s/file]

  tasmax:  67%|██████▋   | 20/30 [01:36<00:47,  4.79s/file]

  tasmax:  70%|███████   | 21/30 [01:42<00:43,  4.87s/file]

  tasmax:  73%|███████▎  | 22/30 [01:46<00:39,  4.89s/file]

  tasmax:  77%|███████▋  | 23/30 [01:51<00:33,  4.83s/file]

  tasmax:  80%|████████  | 24/30 [01:56<00:29,  4.84s/file]

  tasmax:  83%|████████▎ | 25/30 [02:01<00:24,  4.83s/file]

  tasmax:  87%|████████▋ | 26/30 [02:06<00:19,  4.81s/file]

  tasmax:  90%|█████████ | 27/30 [02:10<00:14,  4.74s/file]

  tasmax:  93%|█████████▎| 28/30 [02:15<00:09,  4.72s/file]

  tasmax:  97%|█████████▋| 29/30 [02:19<00:04,  4.67s/file]

  tasmax: 100%|██████████| 30/30 [02:24<00:00,  4.74s/file]

  tasmax: 100%|██████████| 30/30 [02:24<00:00,  4.83s/file]




  Combining data from 29 files...
  ✓ Extracted 10,592 daily records in 144.8 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_tasmax_SSP245.csv
  ✓ Rows: 10,592 | Time: 144.9s

----------------------------------------------------------------------
Task 5/6: Processing tasmin (SSP245)
----------------------------------------------------------------------
  Found 30 NetCDF files
  Using grid point: (-31.7500, 117.6000)
  Processing files...


  tasmin:   0%|          | 0/30 [00:00<?, ?file/s]

  tasmin:   3%|▎         | 1/30 [00:04<02:21,  4.87s/file]

  tasmin:   7%|▋         | 2/30 [00:09<02:15,  4.85s/file]

  tasmin:  10%|█         | 3/30 [00:14<02:10,  4.82s/file]

  tasmin:  13%|█▎        | 4/30 [00:19<02:04,  4.81s/file]

  tasmin:  17%|█▋        | 5/30 [00:24<02:01,  4.87s/file]

  tasmin:  20%|██        | 6/30 [00:29<01:58,  4.95s/file]

  tasmin:  23%|██▎       | 7/30 [00:34<01:54,  4.99s/file]

  tasmin:  27%|██▋       | 8/30 [00:39<01:49,  4.98s/file]

  tasmin:  30%|███       | 9/30 [00:44<01:43,  4.94s/file]

  tasmin:  33%|███▎      | 10/30 [00:49<01:38,  4.92s/file]

  tasmin:  37%|███▋      | 11/30 [00:53<01:33,  4.90s/file]

  tasmin:  40%|████      | 12/30 [00:58<01:27,  4.85s/file]

  tasmin:  43%|████▎     | 13/30 [01:03<01:22,  4.85s/file]

  tasmin:  47%|████▋     | 14/30 [01:08<01:17,  4.82s/file]

  tasmin:  50%|█████     | 15/30 [01:12<01:10,  4.70s/file]

  tasmin:  53%|█████▎    | 16/30 [01:17<01:06,  4.74s/file]

  tasmin:  57%|█████▋    | 17/30 [01:22<01:03,  4.85s/file]

  tasmin:  60%|██████    | 18/30 [01:27<00:58,  4.92s/file]

  tasmin:  63%|██████▎   | 19/30 [01:32<00:54,  4.95s/file]

  tasmin:  67%|██████▋   | 20/30 [01:37<00:49,  4.99s/file]

  tasmin:  70%|███████   | 21/30 [01:42<00:45,  5.00s/file]

  tasmin:  73%|███████▎  | 22/30 [01:48<00:40,  5.05s/file]

  tasmin:  77%|███████▋  | 23/30 [01:53<00:35,  5.05s/file]

  tasmin:  80%|████████  | 24/30 [01:58<00:30,  5.08s/file]

  tasmin:  83%|████████▎ | 25/30 [02:03<00:25,  5.08s/file]

  tasmin:  87%|████████▋ | 26/30 [02:08<00:20,  5.10s/file]

  tasmin:  90%|█████████ | 27/30 [02:13<00:15,  5.09s/file]

  tasmin:  93%|█████████▎| 28/30 [02:18<00:10,  5.10s/file]

  tasmin:  97%|█████████▋| 29/30 [02:23<00:05,  5.10s/file]

  tasmin: 100%|██████████| 30/30 [02:28<00:00,  5.10s/file]

  tasmin: 100%|██████████| 30/30 [02:28<00:00,  4.96s/file]




  Combining data from 29 files...
  ✓ Extracted 10,592 daily records in 148.9 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_tasmin_SSP245.csv
  ✓ Rows: 10,592 | Time: 148.9s

----------------------------------------------------------------------
Task 6/6: Processing pr (SSP245)
----------------------------------------------------------------------
  Found 30 NetCDF files
  Using grid point: (-31.7500, 117.6000)
  Processing files...


  pr:   0%|          | 0/30 [00:00<?, ?file/s]

  pr:   3%|▎         | 1/30 [00:04<02:20,  4.84s/file]

  pr:   7%|▋         | 2/30 [00:08<01:59,  4.26s/file]

  pr:  10%|█         | 3/30 [00:10<01:26,  3.19s/file]

  pr:  13%|█▎        | 4/30 [00:12<01:09,  2.69s/file]

  pr:  17%|█▋        | 5/30 [00:14<01:01,  2.45s/file]

  pr:  20%|██        | 6/30 [00:16<00:54,  2.27s/file]

  pr:  23%|██▎       | 7/30 [00:18<00:49,  2.14s/file]

  pr:  27%|██▋       | 8/30 [00:20<00:45,  2.09s/file]

  pr:  30%|███       | 9/30 [00:22<00:42,  2.04s/file]

  pr:  33%|███▎      | 10/30 [00:24<00:39,  2.00s/file]

  pr:  37%|███▋      | 11/30 [00:26<00:37,  1.99s/file]

  pr:  40%|████      | 12/30 [00:28<00:35,  1.98s/file]

  pr:  43%|████▎     | 13/30 [00:30<00:33,  1.96s/file]

  pr:  47%|████▋     | 14/30 [00:32<00:31,  1.97s/file]

  pr:  50%|█████     | 15/30 [00:34<00:29,  1.99s/file]

  pr:  53%|█████▎    | 16/30 [00:36<00:28,  2.03s/file]

  pr:  57%|█████▋    | 17/30 [00:39<00:33,  2.57s/file]

  pr:  60%|██████    | 18/30 [00:44<00:39,  3.27s/file]

  pr:  63%|██████▎   | 19/30 [00:49<00:40,  3.72s/file]

  pr:  67%|██████▋   | 20/30 [00:54<00:40,  4.01s/file]

  pr:  70%|███████   | 21/30 [00:59<00:39,  4.35s/file]

  pr:  73%|███████▎  | 22/30 [01:04<00:36,  4.59s/file]

  pr:  77%|███████▋  | 23/30 [01:09<00:32,  4.71s/file]

  pr:  80%|████████  | 24/30 [01:14<00:28,  4.80s/file]

  pr:  83%|████████▎ | 25/30 [01:19<00:24,  4.90s/file]

  pr:  87%|████████▋ | 26/30 [01:25<00:20,  5.03s/file]

  pr:  90%|█████████ | 27/30 [01:30<00:15,  5.16s/file]

  pr:  93%|█████████▎| 28/30 [01:34<00:09,  4.84s/file]

  pr:  97%|█████████▋| 29/30 [01:39<00:04,  4.98s/file]

  pr: 100%|██████████| 30/30 [01:44<00:00,  4.97s/file]

  pr: 100%|██████████| 30/30 [01:44<00:00,  3.50s/file]

  Combining data from 29 files...
  ✓ Extracted 10,592 daily records in 104.9 seconds
  Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
  Exporting to CSV...
  ✓ Exported to CSV: Anameka South_ACCESS CM2_pr_SSP245.csv
  ✓ Rows: 10,592 | Time: 105.0s

ALL TASKS COMPLETED
Total processing time: 694.6 seconds (11.6 minutes)





## 5. Summary Statistics


In [5]:
print("\n" + "="*70)
print("EXTRACTION SUMMARY")
print("="*70)
print(f"\nTarget coordinate: ({TARGET_LAT}, {TARGET_LON})")
print(f"\nFiles processed:")
for key, summary in results_summary.items():
    var, scen = key.split('_', 1)
    print(f"\n  {var.upper()} ({scen}):")
    print(f"      Rows: {summary['rows']}")
    print(f"      Date range: {summary['date_range']}")
    print(f"      Output file: {summary['output_file']}")

print(f"\nOutput directory: {OUTPUT_DIR}")
print("\nAll CSV files exported successfully!")



EXTRACTION SUMMARY

Target coordinate: (-31.75, 117.5999984741211)

Files processed:

  TASMAX (SSP585):
      Rows: 10592
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Anameka South_ACCESS CM2_tasmax_SSP585.csv

  TASMIN (SSP585):
      Rows: 9497
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Anameka South_ACCESS CM2_tasmin_SSP585.csv

  PR (SSP585):
      Rows: 9862
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Anameka South_ACCESS CM2_pr_SSP585.csv

  TASMAX (SSP245):
      Rows: 10592
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Anameka South_ACCESS CM2_tasmax_SSP245.csv

  TASMIN (SSP245):
      Rows: 10592
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Anameka South_ACCESS CM2_tasmin_SSP245.csv

  PR (SSP245):
      Rows: 10592
      Date range: 2035-01-01 00:00:00 to 2063-12-31 00:00:00
      Output file: Aname