# Anameka South - ACCESS CM2 SSP585 CMIP6 Data Extraction

This notebook extracts time series data for filtered grid points from CMIP6 climate model datasets.

## Model: ACCESS_CM2_SSP585
## Variables: tasmax, tasmin, pr


## 1. Imports and Configuration


In [1]:
import pandas as pd
import json
import os
from pathlib import Path

# Configuration
COORDINATES_FILE = r"C:\Users\ibian\Desktop\ClimAdapt\Anameka\Grid\filtered_grid_points.json"
DATA_DIR = r"C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 xlsx\ACCESS_CM2_SSP585_merged"
OUTPUT_DIR = r"C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585"
MODEL_NAME = "ACCESS_CM2_SSP585"
COORD_TOLERANCE = 0.001  # degrees (approximately 111 meters)

# Variables to process
VARIABLES = ['tasmax', 'tasmin', 'pr']

# Ensure output directory exists
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Configuration loaded:")
print(f"  - Coordinates file: {COORDINATES_FILE}")
print(f"  - Data directory: {DATA_DIR}")
print(f"  - Output directory: {OUTPUT_DIR}")
print(f"  - Coordinate tolerance: {COORD_TOLERANCE} degrees")
print(f"  - Variables: {', '.join(VARIABLES)}")


Configuration loaded:
  - Coordinates file: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Grid\filtered_grid_points.json
  - Data directory: C:\Users\ibian\Desktop\ClimAdapt\CMIP6\ACCESS CM2 xlsx\ACCESS_CM2_SSP585_merged
  - Output directory: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585
  - Coordinate tolerance: 0.001 degrees
  - Variables: tasmax, tasmin, pr


## 2. Load Filtered Coordinates


In [2]:
# Load filtered coordinates from JSON
with open(COORDINATES_FILE, 'r', encoding='utf-8') as f:
    filtered_coords = json.load(f)

# Convert to DataFrame for easier handling
coords_df = pd.DataFrame(filtered_coords)

print(f"Loaded {len(filtered_coords)} filtered grid points")
print(f"\nFirst few coordinates:")
print(coords_df.head())
print(f"\nCoordinate range:")
print(f"  Latitude: {coords_df['lat'].min():.4f} to {coords_df['lat'].max():.4f}")
print(f"  Longitude: {coords_df['lon'].min():.4f} to {coords_df['lon'].max():.4f}")


Loaded 46 filtered grid points

First few coordinates:
     lat         lon
0 -31.75  117.400002
1 -31.75  117.449997
2 -31.75  117.500000
3 -31.75  117.550003
4 -31.75  117.599998

Coordinate range:
  Latitude: -31.7500 to -31.4500
  Longitude: 117.4000 to 117.7000


## 3. Data Extraction Function


In [3]:
def extract_data_for_coordinates(excel_file_path, target_coords_df, tolerance=0.001):
    """
    Extract time series data for specific coordinates from Excel file.
    
    Parameters:
    -----------
    excel_file_path : str
        Path to the Excel file containing climate data
    target_coords_df : pd.DataFrame
        DataFrame with 'lat' and 'lon' columns for target coordinates
    tolerance : float
        Coordinate matching tolerance in degrees (default: 0.001)
    
    Returns:
    --------
    pd.DataFrame
        Filtered DataFrame with matching coordinates and all time series columns
    """
    # Load Excel file
    print(f"  Loading data from: {os.path.basename(excel_file_path)}")
    df = pd.read_excel(excel_file_path)
    
    print(f"  Total rows in dataset: {len(df):,}")
    
    # Find matching rows using approximate coordinate matching
    matched_indices = []
    
    for idx, target_row in target_coords_df.iterrows():
        target_lat = target_row['lat']
        target_lon = target_row['lon']
        
        # Find rows where coordinates match within tolerance
        lat_match = abs(df['lat'] - target_lat) < tolerance
        lon_match = abs(df['lon'] - target_lon) < tolerance
        matches = df[lat_match & lon_match].index.tolist()
        
        if len(matches) > 0:
            matched_indices.extend(matches)
            if len(matches) > 1:
                print(f"    Warning: {len(matches)} rows matched for ({target_lat}, {target_lon})")
        else:
            print(f"    Warning: No match found for ({target_lat}, {target_lon})")
    
    # Remove duplicates while preserving order
    matched_indices = list(dict.fromkeys(matched_indices))
    
    # Extract matched rows
    filtered_df = df.loc[matched_indices].copy()
    
    print(f"  Matched rows: {len(filtered_df)}")
    
    return filtered_df


## 4. Process Each Variable and Export to CSV


In [4]:
# Process each variable
results_summary = {}

for variable in VARIABLES:
    print(f"\n{'='*60}")
    print(f"Processing variable: {variable}")
    print(f"{'='*60}")
    
    # Construct file path - check merged directory first
    excel_file = os.path.join(DATA_DIR, f"{MODEL_NAME}_{variable}.xlsx")
    
    # If not found, check parent directory (for pr file)
    if not os.path.exists(excel_file):
        parent_dir = os.path.dirname(DATA_DIR)
        excel_file_alt = os.path.join(parent_dir, f"{MODEL_NAME}_{variable}.xlsx")
        if os.path.exists(excel_file_alt):
            excel_file = excel_file_alt
            print(f"  Found file in parent directory: {excel_file}")
        else:
            print(f"  ERROR: File not found in either location:")
            print(f"    - {os.path.join(DATA_DIR, f'{MODEL_NAME}_{variable}.xlsx')}")
            print(f"    - {excel_file_alt}")
            continue
    
    # Extract data for filtered coordinates
    filtered_data = extract_data_for_coordinates(
        excel_file, 
        coords_df, 
        tolerance=COORD_TOLERANCE
    )
    
    if len(filtered_data) == 0:
        print(f"  WARNING: No data extracted for {variable}")
        continue
    
    # Prepare output filename
    output_filename = f"Anameka_South_{MODEL_NAME}_{variable}.csv"
    output_path = os.path.join(OUTPUT_DIR, output_filename)
    
    # Export to CSV with explicit formatting
    filtered_data.to_csv(
        output_path, 
        index=False, 
        encoding='utf-8',
        float_format='%.6f'  # Preserve precision for lat/lon and data values
    )
    print(f"  Exported to CSV: {output_path}")
    print(f"  Rows: {len(filtered_data)}, Columns: {len(filtered_data.columns)}")
    print(f"  Format: CSV (UTF-8 encoding)")
    
    # Store summary
    results_summary[variable] = {
        'rows': len(filtered_data),
        'columns': len(filtered_data.columns),
        'output_file': output_filename
    }



Processing variable: tasmax
  Loading data from: ACCESS_CM2_SSP585_tasmax.xlsx


  Total rows in dataset: 612,226


  Matched rows: 46
  Exported to CSV: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585\Anameka_South_ACCESS_CM2_SSP585_tasmax.csv
  Rows: 46, Columns: 32
  Format: CSV (UTF-8 encoding)

Processing variable: tasmin
  Loading data from: ACCESS_CM2_SSP585_tasmin.xlsx


  Total rows in dataset: 612,226


  Matched rows: 46
  Exported to CSV: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585\Anameka_South_ACCESS_CM2_SSP585_tasmin.csv
  Rows: 46, Columns: 32
  Format: CSV (UTF-8 encoding)

Processing variable: pr
  Loading data from: ACCESS_CM2_SSP585_pr.xlsx


  Total rows in dataset: 612,226


  Matched rows: 46
  Exported to CSV: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585\Anameka_South_ACCESS_CM2_SSP585_pr.csv
  Rows: 46, Columns: 30
  Format: CSV (UTF-8 encoding)


## 5. Summary Statistics


In [5]:
print("\n" + "="*60)
print("EXTRACTION SUMMARY")
print("="*60)
print(f"\nFiltered coordinates: {len(filtered_coords)}")
print(f"\nVariables processed:")
for variable, summary in results_summary.items():
    print(f"  - {variable}:")
    print(f"      Rows extracted: {summary['rows']}")
    print(f"      Columns: {summary['columns']}")
    print(f"      Output file: {summary['output_file']} (CSV format)")

print(f"\nOutput directory: {OUTPUT_DIR}")
print(f"\nAll files exported as CSV format (UTF-8 encoding)")
print("CSV files are ready for analysis!")



EXTRACTION SUMMARY

Filtered coordinates: 46

Variables processed:
  - tasmax:
      Rows extracted: 46
      Columns: 32
      Output file: Anameka_South_ACCESS_CM2_SSP585_tasmax.csv (CSV format)
  - tasmin:
      Rows extracted: 46
      Columns: 32
      Output file: Anameka_South_ACCESS_CM2_SSP585_tasmin.csv (CSV format)
  - pr:
      Rows extracted: 46
      Columns: 30
      Output file: Anameka_South_ACCESS_CM2_SSP585_pr.csv (CSV format)

Output directory: C:\Users\ibian\Desktop\ClimAdapt\Anameka\Anameka_South_ACCESS_CM2_SSP585

All files exported as CSV format (UTF-8 encoding)
CSV files are ready for analysis!
