In [None]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
LOG_FILE = "processing_log.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d+)\.V\d{2}B\.HDF5")

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def parse_viirs_datetime(filename):
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError as e:
            log_message(f"Invalid VIIRS datetime in {filename}: {str(e)}")
    return None

def parse_imerg_datetime(filename):
    match = IMERG_PATTERN.match(filename)
    if not match:
        return None
    
    date_str, start_time, end_time, _ = match.groups()
    try:
        start_dt = datetime.strptime(f"{date_str}{start_time[:4]}", "%Y%m%d%H%M")
        end_dt = datetime.strptime(f"{date_str}{end_time[:4]}", "%Y%m%d%H%M")
        return start_dt + (end_dt - start_dt)/2
    except ValueError as e:
        log_message(f"Error parsing {filename}: {str(e)}")
        return None

def find_matching_imerg(viirs_time, imerg_files):
    best_match = None
    min_diff = timedelta(minutes=45)
    
    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if imerg_time and (abs(imerg_time - viirs_time) < min_diff):
            min_diff = abs(imerg_time - viirs_time)
            best_match = f
    return best_match

def extract_coordinates(file_path):
    lats, lons = [], []
    try:
        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 29:
                    try:
                        lats.append(float(parts[27]))
                        lons.append(float(parts[28]))
                    except (ValueError, IndexError):
                        continue
        return lats, lons
    except Exception as e:
        log_message(f"File read error: {str(e)}")
        return [], []

def process_imerg(imerg_path, lats, lons):
    try:
        ds = xr.open_dataset(imerg_path, engine='h5netcdf', group='Grid')
        
        # Convert to numpy arrays and get first/last elements
        lat_values = ds.lat.values
        lon_values = ds.lon.values
        
        # Handle latitude direction
        if lat_values[0] > lat_values[-1]:
            lat_slice = slice(max(lats), min(lats))
        else:
            lat_slice = slice(min(lats), max(lats))
            
        # Handle longitude direction
        if lon_values[0] > lon_values[-1]:
            lon_slice = slice(max(lons), min(lons))
        else:
            lon_slice = slice(min(lons), max(lons))
        
        precip_data = ds['precipitation'].sel(lat=lat_slice, lon=lon_slice)
        valid_values = precip_data.values[~np.isnan(precip_data.values)]
        
        return {
            'average': float(np.nanmean(precip_data)),
            'total': float(np.nansum(precip_data)),
            'max': float(np.nanmax(precip_data)),
            'values': valid_values.tolist(),
            'count': len(valid_values)
        }
    except Exception as e:
        log_message(f"IMERG processing error: {str(e)}")
        raise

def create_result_entry(file, imerg_file, folder, time, status, avg, total, max_val, values, lats, lons):
    """Create result entry with full folder path and subfolder name"""
    folder_name = os.path.basename(folder)
    min_lat = round(min(lats), 2) if lats else None
    max_lat = round(max(lats), 2) if lats else None
    min_lon = round(min(lons), 2) if lons else None
    max_lon = round(max(lons), 2) if lons else None
    
    return {
        'VIIRS_File': file,
        'IMERG_File': imerg_file,
        'Folder_Full': folder,
        'Folder_Name': folder_name,
        'Observation_Time': time.strftime("%Y-%m-%d %H:%M"),
        'Min_Lat': min_lat,
        'Max_Lat': max_lat,
        'Min_Lon': min_lon,
        'Max_Lon': max_lon,
        'Average_Precip_mmh': round(avg, 2) if avg is not None else None,
        'Total_Precip_mm': round(total, 2) if total is not None else None,
        'Max_Precip_mmh': round(max_val, 2) if max_val is not None else None,
        'Pixel_Values': ';'.join([f"{v:.2f}" for v in values]) if values else None,
        'Status': status
    }

def main():
    open(LOG_FILE, 'w').close()
    
    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")
    
    results = []
    processed = matched = 0
    
    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for file in files:
            if not file.startswith('VIIRS') or file.startswith(('.', '~')):
                continue
            
            processed += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")
            
            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                continue
                
            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                continue
                
            matched += 1
            imerg_path = os.path.join(IMERG_DIR, imerg_match)
            
            lats, lons = extract_coordinates(file_path)
            if not lats or not lons:
                continue
                
            try:
                precip = process_imerg(imerg_path, lats, lons)
                results.append(create_result_entry(
                    file, imerg_match, root, viirs_time, "Success",
                    precip['average'], precip['total'], precip['max'], 
                    precip['values'], lats, lons
                ))
            except Exception as e:
                log_message(f"Processing failed: {str(e)}")

    if results:
        df = pd.DataFrame(results)
        df.to_csv(OUTPUT_CSV, index=False)
        log_message(f"\nSaved {len(results)} results to {OUTPUT_CSV}")
    else:
        log_message("\nNo valid results to save")
        
    log_message("\n=== Processing Summary ===")
    log_message(f"Total VIIRS files processed: {processed}")
    log_message(f"VIIRS files with valid matches: {matched}")
    log_message(f"Successfully processed files: {len([r for r in results if r['Status'] == 'Success'])}")

if __name__ == "__main__":
    main()

Processing complete. Results saved to final_precipitation_results_final.csv
