## extract and clip precipiaton rasters to buffered angola

In [3]:
## extract and clip precipiaton rasters to buffered angola

import os
import glob
import rioxarray as rxr
import geopandas as gpd

# -----------------------------
# File paths
# -----------------------------
# Already downloaded
precip1950_59_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/precipdata/wc2.1_cruts4.09_2.5m_prec_1950-1959"
precip1960_69_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/precipdata/wc2.1_cruts4.09_2.5m_prec_1960-1969"

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/buffered1km_angola_adm0.gpkg"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/gis_features_updated/precip_masked"

os.makedirs(output_folder, exist_ok=True)

# -----------------------------
# Load Angola vector
# -----------------------------
angola = gpd.read_file(angola_gpkg)
print("Angola CRS:", angola.crs)

# -----------------------------
# Function to clip rasters
# -----------------------------
def clip_precip_to_angola(folder_path, angola_shape, output_folder):
    tif_files = glob.glob(os.path.join(folder_path, "*.tif"))

    # List existing files in output folder
    existing_files = set(os.listdir(output_folder))

    for tif in tif_files:
        filename = os.path.basename(tif)
        
        # Skip if already processed
        if filename in existing_files:
            print(f"Skipping {filename}, already exists.")
            continue

        print("Processing:", filename)
        precip = rxr.open_rasterio(tif, masked=True)

        # Reproject precipitation raster to Angola CRS if needed
        if precip.rio.crs != angola_shape.crs:
            precip = precip.rio.reproject(angola_shape.crs)

        # Clip to Angola polygon (vector mask)
        precip_clipped = precip.rio.clip(angola_shape.geometry, angola_shape.crs, drop=True, invert=False)
        
        # Squeeze to remove extra dimensions (single-band)
        precip_clipped = precip_clipped.squeeze()

        # Save clipped raster
        out_path = os.path.join(output_folder, filename)
        precip_clipped.rio.to_raster(out_path)
        print("Saved:", out_path)

# -----------------------------
# Run for each decade
# -----------------------------
# Already downloaded 

clip_precip_to_angola(precip1950_59_path, angola, output_folder)
# clip_precip_to_angola(precip1960_69_path, angola, output_folder)

print("All new precipitation rasters clipped to Angola!")


Angola CRS: EPSG:32733
Skipping wc2.1_cruts4.09_2.5m_prec_1951-01.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-02.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-03.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-04.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-05.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-06.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-07.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-08.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-09.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-10.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-11.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1951-12.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1952-01.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1952-02.tif, already exists.
Skipping wc2.1_cruts4.09_2.5m_prec_1952-03.tif, already exists.
Skipping wc2.1_cr

Precipitation sum per year

In [1]:
import os
import re
import glob
import rioxarray as rxr
import numpy as np
import xarray as xr 

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
# Change these to your actual folders:
precip_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/gis_features_updated/precip_masked"

output_base   = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum"

os.makedirs(output_base, exist_ok=True)

import os
import re
import glob
import rioxarray as rxr
import numpy as np
import xarray as xr   # <-- ADD THIS LINE

# ---------------------------------------------------------
# Helper function to extract year from filename
# ---------------------------------------------------------
def extract_year(fname):
    match = re.search(r"(\d{4})", fname)
    return match.group(1) if match else None

# ---------------------------------------------------------
# Generic function to compute annual aggregation
# ---------------------------------------------------------
def annual_aggregate(input_folder, output_folder, variable):
    tif_files = sorted(glob.glob(os.path.join(input_folder, "*.tif")))
    yearly_groups = {}

    # group files by year
    for tif in tif_files:
        year = extract_year(tif)
        if not year:
            continue
        yearly_groups.setdefault(year, []).append(tif)

    for year, files in yearly_groups.items():
        print(f"\nProcessing {variable} for {year} ({len(files)} monthly files)...")

        # load and stack all months for this year
        arrays = [rxr.open_rasterio(f, masked=True).squeeze() for f in files]
        stacked = xr.concat(arrays, dim="month")   # <-- FIXED HERE

        # aggregate
        if variable == "precip":
            annual = stacked.sum(dim="month")  # total annual precip
        else:
            annual = stacked.mean(dim="month")  # average annual temperature

        # save
        out_path = os.path.join(output_folder, f"{variable}_{year}_annual.tif")
        annual.rio.to_raster(out_path)
        print(f"Saved: {out_path}")


# ---------------------------------------------------------
# Run for each variable
# ---------------------------------------------------------
os.makedirs(os.path.join(output_base, "precip"), exist_ok=True)

annual_aggregate(precip_folder, os.path.join(output_base, "precip"), "precip")

print("\n‚úÖ Annual climate rasters created for precip.")



Processing precip for 1951 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/precip_1951_annual.tif

Processing precip for 1952 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/precip_1952_annual.tif

Processing precip for 1953 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/precip_1953_annual.tif

Processing precip for 1954 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/precip_1954_annual.tif

Processing precip for 1955 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/precip_1955_annual.tif

Processing precip for 1956 (12 monthly files)...
Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip/pre

In [4]:
# =========================================================
# Average Precipitation (1950‚Äì1969)
# =========================================================
import os
import glob
import re
import rasterio
import numpy as np

# --- Paths ---
annual_precip_sums = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_precip_sum/precip"
output_base = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/average_total_precip"
os.makedirs(output_base, exist_ok=True)

def average_total_precip(annual_precip_sums, output_folder, start_year=1950, end_year=1969):
    """
    Compute the average of annual precipitation sum rasters (precip_YYYY_annual.tif)
    over a given year range and save the result as a GeoTIFF.
    """
    # --- Find all .tif files ---
    all_tifs = sorted(glob.glob(os.path.join(annual_precip_sums, "precip_*.tif")))

    # --- Filter by year range ---
    year_pattern = re.compile(r"precip_(\d{4})_annual\.tif$")
    selected_tifs = []
    for f in all_tifs:
        match = year_pattern.search(os.path.basename(f))
        if match:
            year = int(match.group(1))
            if start_year <= year <= end_year:
                selected_tifs.append(f)

    if not selected_tifs:
        raise FileNotFoundError(f"No raster files found between {start_year}‚Äì{end_year} in {annual_precip_sums}")

    print(f"Found {len(selected_tifs)} rasters for {start_year}‚Äì{end_year}.")

    # --- Initialize arrays ---
    with rasterio.open(selected_tifs[0]) as src0:
        profile = src0.profile
        profile.update(dtype=rasterio.float32, compress='lzw')
        data_sum = np.zeros(src0.shape, dtype=np.float64)
        valid_count = np.zeros(src0.shape, dtype=np.int32)
        nodata_val = src0.nodata

    # --- Accumulate all rasters ---
    for tif in selected_tifs:
        with rasterio.open(tif) as src:
            data = src.read(1).astype(np.float64)
            mask = np.isnan(data) | (data == nodata_val)
            data_sum[~mask] += data[~mask]
            valid_count[~mask] += 1
        print(f" ‚Üí Added {os.path.basename(tif)}")

    # --- Compute average ---
    with np.errstate(divide='ignore', invalid='ignore'):
        avg_data = np.where(valid_count > 0, data_sum / valid_count, np.nan)

    # --- Save result ---
    output_path = os.path.join(output_folder, f"avg_annual_precip_sum_{start_year}_{end_year}.tif")
    with rasterio.open(output_path, 'w', **profile) as dst:
        dst.write(avg_data.astype(rasterio.float32), 1)

    print(f"‚úÖ Saved average raster: {output_path}")

# --- Run function ---
average_total_precip(annual_precip_sums, output_base, 1950, 1969)


Found 19 rasters for 1950‚Äì1969.
 ‚Üí Added precip_1951_annual.tif
 ‚Üí Added precip_1952_annual.tif
 ‚Üí Added precip_1953_annual.tif
 ‚Üí Added precip_1954_annual.tif
 ‚Üí Added precip_1955_annual.tif
 ‚Üí Added precip_1956_annual.tif
 ‚Üí Added precip_1957_annual.tif
 ‚Üí Added precip_1958_annual.tif
 ‚Üí Added precip_1959_annual.tif
 ‚Üí Added precip_1960_annual.tif
 ‚Üí Added precip_1961_annual.tif
 ‚Üí Added precip_1962_annual.tif
 ‚Üí Added precip_1963_annual.tif
 ‚Üí Added precip_1964_annual.tif
 ‚Üí Added precip_1965_annual.tif
 ‚Üí Added precip_1966_annual.tif
 ‚Üí Added precip_1967_annual.tif
 ‚Üí Added precip_1968_annual.tif
 ‚Üí Added precip_1969_annual.tif
‚úÖ Saved average raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/average_total_precip/avg_annual_precip_sum_1950_1969.tif


## tmin clipped to angola buffer and extracted 2.5m res 


In [9]:
## ENDED UP RUNNING IN QGIS --TMAX_EXTRACT.PY

# =========================================================
# Clip and Reproject Tmin rasters (1950‚Äì1969) to Angola buffer (EPSG:32733)
# =========================================================

import os
import glob
import rioxarray as rxr
import geopandas as gpd
from rasterio.enums import Resampling  # ‚úÖ FIXED import

# -----------------------------
# File paths
# -----------------------------
tmin1950_59_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_2.5m_tmin_1950-1959"
tmin1960_69_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_2.5m_tmin_1960-1969"

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/gis_features_updated/processed_data/buffered1km_angola_adm0.gpkg"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin"
os.makedirs(output_folder, exist_ok=True)

# -----------------------------
# Load Angola boundary (EPSG:32733)
# -----------------------------
angola = gpd.read_file(angola_gpkg)
print("‚úÖ Angola CRS:", angola.crs)

# -----------------------------
# Function: reproject ‚Üí clip ‚Üí save
# -----------------------------
def reproject_and_clip_tmin(folder_path, angola_shape, output_folder):
    os.makedirs(output_folder, exist_ok=True)

    # Recursively find all tmin rasters
    tif_files = sorted(glob.glob(os.path.join(folder_path, "**", "wc2.1_cruts4.09_2.5m_tmin_*.tif"), recursive=True))

    if not tif_files:
        print(f"‚ö†Ô∏è No .tif files found in {folder_path}")
        return

    print(f"üîé Found {len(tif_files)} Tmin rasters in {folder_path}")

    for tif in tif_files:
        filename = os.path.basename(tif)
        parts = filename.split("_")
        ym_part = [p for p in parts if "-" in p]
        year_month = ym_part[0].replace(".tif", "") if ym_part else "unknown"

        out_name = f"tmin_{year_month}_angola_utm33s.tif"
        out_path = os.path.join(output_folder, out_name)

        if os.path.exists(out_path):
            print(f"‚è© Skipping {out_name}, already exists.")
            continue

        print(f"Processing: {filename}")
        tmin = rxr.open_rasterio(tif, masked=True).squeeze()

        # --- Step 1: ensure CRS is EPSG:4326 (the original) ---
        if not tmin.rio.crs:
            tmin = tmin.rio.write_crs("EPSG:4326")

        # --- Step 2: Reproject raster to EPSG:32733 (meters) ---
        tmin_utm = tmin.rio.reproject(
            dst_crs="EPSG:32733",
            resolution=1000,  # target 1 km pixel size
            resampling=Resampling.bilinear  # ‚úÖ FIXED
        )

        # --- Step 3: Clip using Angola boundary (same CRS now) ---
        geom_list = angola_shape.geometry.values
        tmin_clipped = tmin_utm.rio.clip(
            geom_list,
            angola_shape.crs,
            drop=True,
            invert=False
        )

        # --- Step 4: Save output raster ---
        tmin_clipped.rio.to_raster(out_path)
        print(f"‚úÖ Saved clipped raster: {out_path}")

# -----------------------------
# Run for both decades
# -----------------------------
reproject_and_clip_tmin(tmin1950_59_path, angola, output_folder)
reproject_and_clip_tmin(tmin1960_69_path, angola, output_folder)

print("üéâ All Tmin rasters (1950‚Äì1969) reprojected & clipped to Angola (EPSG:32733)!")


‚úÖ Angola CRS: EPSG:32733
üîé Found 108 Tmin rasters in /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_2.5m_tmin_1950-1959
Processing: wc2.1_cruts4.09_2.5m_tmin_1951-01.tif
‚úÖ Saved clipped raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin/tmin_1951-01_angola_utm33s.tif
Processing: wc2.1_cruts4.09_2.5m_tmin_1951-02.tif
‚úÖ Saved clipped raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin/tmin_1951-02_angola_utm33s.tif
Processing: wc2.1_cruts4.09_2.5m_tmin_1951-03.tif
‚úÖ Saved clipped raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin/tmin_1951-03_angola_utm33s.tif
Processing: wc2.1_cruts4.09_2.5m_tmin_1951-04.tif
‚úÖ Saved clipped raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin/tmin_1951-04_angola_utm33s.tif
Processing: wc2.1_cruts4.09_2.5m_tmin_1951-05.tif
‚úÖ Saved clipped raster: /Volumes/One_Touch/angola_soils_thesis/GI

: 

##¬†tmin months averaged by year

In [2]:
# =========================================================
# Compute Annual Mean Tmin from Monthly Rasters (1951‚Äì1969)
# =========================================================
import os
import re
import glob
import rioxarray as rxr
import xarray as xr
import numpy as np

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
tmin_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin_reprojected"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean"
os.makedirs(output_folder, exist_ok=True)

# ---------------------------------------------------------
# Helper function to extract year from filename
# ---------------------------------------------------------
def extract_year(fname):
    match = re.search(r"(\d{4})", fname)
    return match.group(1) if match else None

# ---------------------------------------------------------
# Aggregate monthly rasters into annual means
# ---------------------------------------------------------
def annual_mean_temperature(input_base, output_folder, variable_name="tmin"):
    # List all subfolders (each should correspond to a year)
    year_folders = sorted(
        [f for f in glob.glob(os.path.join(input_base, f"{variable_name}_*_reprojected")) if os.path.isdir(f)]
    )

    if not year_folders:
        raise FileNotFoundError(f"No yearly subfolders found in {input_base}")

    for year_folder in year_folders:
        year = re.search(r"(\d{4})", year_folder).group(1)
        tif_files = sorted(glob.glob(os.path.join(year_folder, "*.tif")))

        if len(tif_files) == 0:
            print(f"‚ö†Ô∏è No TIFF files found for {year}, skipping.")
            continue

        print(f"\nProcessing {variable_name} for {year} ({len(tif_files)} monthly rasters)...")

        # --- Load all monthly rasters ---
        arrays = []
        for f in tif_files:
            try:
                arr = rxr.open_rasterio(f, masked=True).squeeze()
                arrays.append(arr)
            except Exception as e:
                print(f"‚ö†Ô∏è Could not read {os.path.basename(f)}: {e}")

        if len(arrays) == 0:
            print(f"‚ö†Ô∏è No valid rasters loaded for {year}, skipping.")
            continue

        # --- Ensure common CRS & shape ---
        base = arrays[0]
        crs = base.rio.crs
        transform = base.rio.transform()
        arrays = [a.rio.reproject_match(base) for a in arrays]

        # --- Stack and compute mean ---
        stacked = xr.concat(arrays, dim="month")
        annual_mean = stacked.mean(dim="month", skipna=True)

        # --- Write metadata and save ---
        annual_mean.rio.write_crs(crs, inplace=True)
        annual_mean.rio.write_transform(transform, inplace=True)

        out_path = os.path.join(output_folder, f"{variable_name}_{year}_annual_mean.tif")
        annual_mean.rio.to_raster(out_path, compress="lzw")
        print(f"‚úÖ Saved annual mean raster: {out_path}")

    print(f"\nüéâ All annual {variable_name.upper()} rasters created successfully!")

# ---------------------------------------------------------
# RUN
# ---------------------------------------------------------
annual_mean_temperature(tmin_folder, output_folder, "tmin")



Processing tmin for 1951 (12 monthly rasters)...
‚úÖ Saved annual mean raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean/tmin_1951_annual_mean.tif

Processing tmin for 1952 (12 monthly rasters)...
‚úÖ Saved annual mean raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean/tmin_1952_annual_mean.tif

Processing tmin for 1953 (12 monthly rasters)...
‚úÖ Saved annual mean raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean/tmin_1953_annual_mean.tif

Processing tmin for 1954 (12 monthly rasters)...
‚úÖ Saved annual mean raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean/tmin_1954_annual_mean.tif

Processing tmin for 1955 (12 monthly rasters)...
‚úÖ Saved annual mean raster: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean/tmin_1955_annual_mean.tif

Processing tmin for 1956 (12 monthly rasters)...
‚úÖ Saved 

In [3]:
##tmin avg

# =========================================================
# Compute Multi-Year Mean Tmin (1951‚Äì1969)
# =========================================================
import os
import glob
import rioxarray as rxr
import xarray as xr

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
annual_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmin_mean"
output_path = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin_1951_1969_mean.tif"

# ---------------------------------------------------------
# LOAD AND STACK ANNUAL MEAN RASTERS
# ---------------------------------------------------------
tif_files = sorted(glob.glob(os.path.join(annual_folder, "tmin_*_annual_mean.tif")))

if not tif_files:
    raise FileNotFoundError(f"No annual mean rasters found in {annual_folder}")

print(f"Found {len(tif_files)} annual mean rasters (expected ~19 years).")

arrays = [rxr.open_rasterio(f, masked=True).squeeze() for f in tif_files]

# --- Ensure same grid and CRS ---
base = arrays[0]
arrays = [a.rio.reproject_match(base) for a in arrays]

# --- Stack along a new dimension (year) ---
stacked = xr.concat(arrays, dim="year")

# --- Compute mean across all years ---
multi_year_mean = stacked.mean(dim="year", skipna=True)

# --- Write CRS and transform ---
multi_year_mean.rio.write_crs(base.rio.crs, inplace=True)
multi_year_mean.rio.write_transform(base.rio.transform(), inplace=True)

# --- Save output raster ---
multi_year_mean.rio.to_raster(output_path, compress="lzw")

print(f"‚úÖ Saved multi-year average Tmin raster:\n{output_path}")
print("üéâ Tmin 1951‚Äì1969 climatology created successfully!")


Found 19 annual mean rasters (expected ~19 years).
‚úÖ Saved multi-year average Tmin raster:
/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmin_1951_1969_mean.tif
üéâ Tmin 1951‚Äì1969 climatology created successfully!


## tmax years and months into one averaged raster

In [4]:
## tmax years and months into one averaged raster
# =========================================================
# Compute Annual and Multi-Year Mean Tmax (1951‚Äì1969)
# =========================================================
import os
import re
import glob
import rioxarray as rxr
import xarray as xr
import numpy as np

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
# Paths to monthly Tmax folders
tmax_base_folders = [
    "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmax_1951_reprojected",
    "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmax_reprojected"
]

# Output folders
annual_output = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean"
multi_output = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmax_1951_1969_mean.tif"
os.makedirs(annual_output, exist_ok=True)

# ---------------------------------------------------------
# STEP 1 ‚Äî Compute Annual Means
# ---------------------------------------------------------
def compute_annual_means(base_folders, output_folder, variable_name="tmax"):
    # collect all yearly subfolders
    year_folders = []
    for base in base_folders:
        for sub in sorted(glob.glob(os.path.join(base, f"{variable_name}_*_reprojected"))):
            if os.path.isdir(sub):
                year_folders.append(sub)

    # include single year folder if present (e.g., tmax_1951_reprojected)
    for base in base_folders:
        if os.path.basename(base).startswith(f"{variable_name}_") and os.path.isdir(base):
            year_folders.append(base)

    if not year_folders:
        raise FileNotFoundError("No yearly Tmax folders found.")

    for year_folder in sorted(year_folders):
        year_match = re.search(r"(\d{4})", year_folder)
        if not year_match:
            continue
        year = year_match.group(1)

        tif_files = sorted(glob.glob(os.path.join(year_folder, "*.tif")))
        if len(tif_files) == 0:
            print(f"‚ö†Ô∏è No monthly Tmax rasters found for {year}, skipping.")
            continue

        print(f"\nProcessing Tmax for {year} ({len(tif_files)} monthly rasters)...")

        # load all 12 monthly rasters
        arrays = []
        for f in tif_files:
            try:
                arr = rxr.open_rasterio(f, masked=True).squeeze()
                arrays.append(arr)
            except Exception as e:
                print(f"‚ö†Ô∏è Could not read {os.path.basename(f)}: {e}")

        if not arrays:
            continue

        base = arrays[0]
        crs = base.rio.crs
        transform = base.rio.transform()
        arrays = [a.rio.reproject_match(base) for a in arrays]

        stacked = xr.concat(arrays, dim="month")
        annual_mean = stacked.mean(dim="month", skipna=True)
        annual_mean.rio.write_crs(crs, inplace=True)
        annual_mean.rio.write_transform(transform, inplace=True)

        out_path = os.path.join(output_folder, f"{variable_name}_{year}_annual_mean.tif")
        annual_mean.rio.to_raster(out_path, compress="lzw")
        print(f"‚úÖ Saved: {out_path}")

    print(f"\nüéâ All annual {variable_name.upper()} rasters created successfully!")

# ---------------------------------------------------------
# STEP 2 ‚Äî Compute Multi-Year Mean (1951‚Äì1969)
# ---------------------------------------------------------
def compute_multi_year_mean(annual_folder, output_path, variable_name="tmax"):
    tif_files = sorted(glob.glob(os.path.join(annual_folder, f"{variable_name}_*_annual_mean.tif")))
    if not tif_files:
        raise FileNotFoundError(f"No annual mean rasters found in {annual_folder}")

    print(f"\nFound {len(tif_files)} annual mean rasters for {variable_name}.")

    arrays = [rxr.open_rasterio(f, masked=True).squeeze() for f in tif_files]
    base = arrays[0]
    arrays = [a.rio.reproject_match(base) for a in arrays]
    stacked = xr.concat(arrays, dim="year")
    multi_year_mean = stacked.mean(dim="year", skipna=True)
    multi_year_mean.rio.write_crs(base.rio.crs, inplace=True)
    multi_year_mean.rio.write_transform(base.rio.transform(), inplace=True)
    multi_year_mean.rio.to_raster(output_path, compress="lzw")

    print(f"‚úÖ Saved multi-year average {variable_name.upper()} raster:")
    print(f"   {output_path}")

# ---------------------------------------------------------
# RUN BOTH STEPS
# ---------------------------------------------------------
compute_annual_means(tmax_base_folders, annual_output, "tmax")
compute_multi_year_mean(annual_output, multi_output, "tmax")

print("\nüå°Ô∏è All Tmax annual and multi-year means computed successfully!")



Processing Tmax for 1951 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1951_annual_mean.tif

Processing Tmax for 1952 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1952_annual_mean.tif

Processing Tmax for 1953 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1953_annual_mean.tif

Processing Tmax for 1954 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1954_annual_mean.tif

Processing Tmax for 1955 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1955_annual_mean.tif

Processing Tmax for 1956 (12 monthly rasters)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1956_an

In [3]:
##tmax months averaged by year

import os
import re
import glob
import rioxarray as rxr
import xarray as xr

# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
# Update these paths to your system
tmax_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/tmax"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean"

os.makedirs(output_folder, exist_ok=True)

# ---------------------------------------------------------
# Helper function to extract year from filename
# ---------------------------------------------------------
def extract_year(fname):
    match = re.search(r"(\d{4})", fname)
    return match.group(1) if match else None

# ---------------------------------------------------------
# Aggregate monthly rasters into annual means
# ---------------------------------------------------------
def annual_mean_temperature(input_folder, output_folder, variable_name="tmax"):
    tif_files = sorted(glob.glob(os.path.join(input_folder, "*.tif")))
    yearly_groups = {}

    # group files by year
    for tif in tif_files:
        year = extract_year(tif)
        if not year:
            continue
        yearly_groups.setdefault(year, []).append(tif)

    # process each year
    for year, files in yearly_groups.items():
        print(f"\nProcessing {variable_name} for {year} ({len(files)} monthly files)...")

        # load all 12 monthly rasters
        arrays = [rxr.open_rasterio(f, masked=True).squeeze() for f in files]

        # make sure all rasters share same geospatial properties
        arrays = [arr.rio.write_crs(arrays[0].rio.crs, inplace=False) for arr in arrays]

        # stack along a new dimension (month)
        stacked = xr.concat(arrays, dim="month")

        # compute mean across months
        annual_mean = stacked.mean(dim="month")

        # preserve metadata
        annual_mean.rio.write_crs(arrays[0].rio.crs, inplace=True)
        annual_mean.rio.write_transform(arrays[0].rio.transform(), inplace=True)

        # save to file
        out_path = os.path.join(output_folder, f"{variable_name}_{year}_annual_mean.tif")
        annual_mean.rio.to_raster(out_path)
        print(f"‚úÖ Saved: {out_path}")

# ---------------------------------------------------------
# RUN
# ---------------------------------------------------------
annual_mean_temperature(tmax_folder, output_folder, "tmax")

print("\nüéâ Annual Tmax rasters created successfully.")



Processing tmax for 1951 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1951_annual_mean.tif

Processing tmax for 1952 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1952_annual_mean.tif

Processing tmax for 1953 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1953_annual_mean.tif

Processing tmax for 1954 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1954_annual_mean.tif

Processing tmax for 1955 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1955_annual_mean.tif

Processing tmax for 1956 (12 monthly files)...
‚úÖ Saved: /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/annual_tmax_mean/tmax_1956_annual_mean.ti

In [3]:
import os
import glob
import zipfile
import rioxarray as rxr
import geopandas as gpd

# -----------------------------
# File paths
# -----------------------------
tmin1950_59_zip = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_2.5m_tmin_1950-1959.zip"
tmin1960_69_zip = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_2.5m_tmin_1960-1969.zip"

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/data_processed/tmin_masked"
os.makedirs(output_folder, exist_ok=True)

# Folders to extract zips
tmin1950_59_folder = "/Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1950_59"
tmin1960_69_folder = "/Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1960_69"
os.makedirs(tmin1950_59_folder, exist_ok=True)
os.makedirs(tmin1960_69_folder, exist_ok=True)

# -----------------------------
# Extract ZIP files
# -----------------------------
for zip_file, extract_folder in [(tmin1950_59_zip, tmin1950_59_folder),
                                 (tmin1960_69_zip, tmin1960_69_folder)]:
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    print(f"Extracted {zip_file} to {extract_folder}")

# -----------------------------
# Load Angola vector
# -----------------------------
angola = gpd.read_file(angola_gpkg)
print("Angola CRS:", angola.crs)

# -----------------------------
# Function to clip Tmin rasters
# -----------------------------
def tmin_clipped_to_angola(folder_path, angola_shape, output_folder):
    tif_files = glob.glob(os.path.join(folder_path, "*.tif"))

    for tif in tif_files:
        print("Processing:", os.path.basename(tif))
        tmin = rxr.open_rasterio(tif, masked=True)

        # Reproject raster to Angola CRS if needed
        if tmin.rio.crs != angola_shape.crs:
            tmin = tmin.rio.reproject(angola_shape.crs)

        # Clip to Angola polygon (vector mask)
        tmin_clipped = tmin.rio.clip(angola_shape.geometry, angola_shape.crs, drop=True, invert=False)
        
        # Squeeze to remove extra dimensions (single-band)
        tmin_clipped = tmin_clipped.squeeze()

        # Save clipped raster
        out_path = os.path.join(output_folder, os.path.basename(tif))
        tmin_clipped.rio.to_raster(out_path)
        print("Saved:", out_path)

# -----------------------------
# Run for each decade
# -----------------------------
tmin_clipped_to_angola(tmin1950_59_folder, angola, output_folder)
tmin_clipped_to_angola(tmin1960_69_folder, angola, output_folder)

print("All Tmin rasters clipped to Angola!")


Extracted /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_10m_tmin_1950-1959.zip to /Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1950_59
Extracted /Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_10m_tmin_1960-1969.zip to /Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1960_69
Angola CRS: EPSG:32733
Processing: wc2.1_cruts4.09_10m_tmin_1951-01.tif
Saved: /Volumes/One_Touch/angola_soils_thesis/data_processed/tmin_masked/wc2.1_cruts4.09_10m_tmin_1951-01.tif
Processing: wc2.1_cruts4.09_10m_tmin_1951-02.tif
Saved: /Volumes/One_Touch/angola_soils_thesis/data_processed/tmin_masked/wc2.1_cruts4.09_10m_tmin_1951-02.tif
Processing: wc2.1_cruts4.09_10m_tmin_1951-03.tif
Saved: /Volumes/One_Touch/angola_soils_thesis/data_processed/tmin_masked/wc2.1_cruts4.09_10m_tmin_1951-03.tif
Processing: wc2.1_cruts4.09_10m_tmin_1951-04.tif
Saved: /Volumes/One_Touch/angola_soils_thesis/data_processed/tmin

In [None]:
## need to extract tmean!

import os
import glob
import zipfile
import rioxarray as rxr
import geopandas as gpd

# -----------------------------
# File paths
# -----------------------------
tmin1950_59_zip = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_10m_tmin_1950-1959.zip"
tmin1960_69_zip = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_raw/climaticdata/tmindata/wc2.1_cruts4.09_10m_tmin_1960-1969.zip"

angola_gpkg = "/Volumes/One_Touch/angola_soils_thesis/GIS_Angola/data_processed/angola_soil_gpkg_stuff/angola_boundaries_32733.gpkg"
output_folder = "/Volumes/One_Touch/angola_soils_thesis/data_processed/tmin_masked"
os.makedirs(output_folder, exist_ok=True)

# Folders to extract zips
tmin1950_59_folder = "/Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1950_59"
tmin1960_69_folder = "/Volumes/One_Touch/angola_soils_thesis/data_raw/tmin1960_69"
os.makedirs(tmin1950_59_folder, exist_ok=True)
os.makedirs(tmin1960_69_folder, exist_ok=True)

# -----------------------------
# Extract ZIP files
# -----------------------------
for zip_file, extract_folder in [(tmin1950_59_zip, tmin1950_59_folder),
                                 (tmin1960_69_zip, tmin1960_69_folder)]:
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extract_folder)
    print(f"Extracted {zip_file} to {extract_folder}")

# -----------------------------
# Load Angola vector
# -----------------------------
angola = gpd.read_file(angola_gpkg)
print("Angola CRS:", angola.crs)

# -----------------------------
# Function to clip Tmin rasters
# -----------------------------
def tmin_clipped_to_angola(folder_path, angola_shape, output_folder):
    tif_files = glob.glob(os.path.join(folder_path, "*.tif"))

    for tif in tif_files:
        print("Processing:", os.path.basename(tif))
        tmin = rxr.open_rasterio(tif, masked=True)

        # Reproject raster to Angola CRS if needed
        if tmin.rio.crs != angola_shape.crs:
            tmin = tmin.rio.reproject(angola_shape.crs)

        # Clip to Angola polygon (vector mask)
        tmin_clipped = tmin.rio.clip(angola_shape.geometry, angola_shape.crs, drop=True, invert=False)
        
        # Squeeze to remove extra dimensions (single-band)
        tmin_clipped = tmin_clipped.squeeze()

        # Save clipped raster
        out_path = os.path.join(output_folder, os.path.basename(tif))
        tmin_clipped.rio.to_raster(out_path)
        print("Saved:", out_path)

# -----------------------------
# Run for each decade
# -----------------------------
tmin_clipped_to_angola(tmin1950_59_folder, angola, output_folder)
tmin_clipped_to_angola(tmin1960_69_folder, angola, output_folder)

print("All Tmin rasters clipped to Angola!")
