In [9]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:/Tropos/Working/T-re_fires/pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
#VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.*\.txt")
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.*?\.txt")

IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d{4})\.V06B\.HDF5")

def parse_viirs_datetime(filename):
    """Extract datetime from VIIRS filename"""
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        return datetime.strptime(f"{date_str} {time_str}", "%Y%m%d %H%M")
    return None

def parse_imerg_datetime(filename):
    """Extract datetime from IMERG filename"""
    match = IMERG_PATTERN.match(filename)
    if match:
        date_str, start_time, end_time, product_time = match.groups()
        return datetime.strptime(date_str + start_time[:4], "%Y%m%d%H%M")
    return None

def find_matching_imerg(viirs_time, imerg_files):
    """Find IMERG file closest to VIIRS observation time"""
    best_match = None
    min_diff = timedelta.max
    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if imerg_time:
            time_diff = abs(viirs_time - imerg_time)
            if time_diff < min_diff:
                min_diff = time_diff
                best_match = f
    return best_match if min_diff < timedelta(minutes=30) else None

def main():
    # Get all IMERG files
    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    print(imerg_files)
    
    # Process VIIRS files and find matches
    results = []
    for root, _, files in os.walk(VIIRS_BASE_DIR):
        for file in files:
            if not VIIRS_PATTERN.match(file):
                continue
                
            viirs_path = os.path.join(root, file)
            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                continue
                
            # Find matching IMERG file
            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                continue
                
            # Process coordinates and precipitation
            try:
                lats, lons = extract_coordinates(viirs_path)
                if not lats or not lons:
                    raise ValueError("No valid coordinates found")
                
                precip_data = process_imerg(
                    os.path.join(IMERG_DIR, imerg_match),
                    lats, lons
                )
                
                results.append({
                    'VIIRS_File': file,
                    'IMERG_File': imerg_match,
                    'Folder': root,
                    'Observation_Time': viirs_time.strftime("%Y-%m-%d %H:%M"),
                    'Average_Precip_mmh': round(precip_data['average'], 4),
                    'Total_Precip_mm': round(precip_data['total'], 4),
                    'Max_Precip_mmh': round(precip_data['max'], 4),
                    'Pixel_Values': ';'.join(map(str, precip_data['values'])),
                    'Status': 'Success'
                })
                
            except Exception as e:
                results.append({
                    'VIIRS_File': file,
                    'IMERG_File': imerg_match or 'N/A',
                    'Folder': root,
                    'Status': f"Error: {str(e)}"
                })

    # Save results
    pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
    print(f"Processing complete. Results saved to {OUTPUT_CSV}")

# Keep the existing extract_coordinates, process_imerg, and helper functions from previous code

if __name__ == "__main__":
    main()

['3B-HHR.MS.MRG.3IMERG.20170101-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170101-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170102-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170102-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170103-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170103-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170104-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170104-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170105-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170105-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170106-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170106-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170107-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170107-S173000-E175959.1050.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170108-S170000-E172959.1020.V07B.HDF5', '3B-HHR.MS.MRG.3IMERG.20170108-S173000-

In [5]:
imerg_files

NameError: name 'imerg_files' is not defined

In [3]:
ls

 Volume in drive C is Windows
 Volume Serial Number is 966C-A36B

 Directory of c:\Users\chinthaparthy\AppData\Local\Programs\Microsoft VS Code

03/21/2025  11:13 AM    <DIR>          .
03/13/2025  04:30 PM    <DIR>          ..
03/13/2025  06:05 PM    <DIR>          bin
03/12/2025  02:01 PM           147,398 chrome_100_percent.pak
03/12/2025  02:01 PM           219,772 chrome_200_percent.pak
03/12/2025  02:32 PM       182,525,000 Code.exe
03/12/2025  02:00 PM               367 Code.VisualElementsManifest.xml
03/12/2025  02:29 PM         4,927,168 d3dcompiler_47.dll
03/12/2025  02:32 PM         2,832,928 ffmpeg.dll
03/21/2025  11:13 AM                 2 final_precipitation_results.csv
03/12/2025  02:01 PM        10,464,144 icudtl.dat
03/12/2025  02:32 PM           503,864 libEGL.dll
03/12/2025  02:29 PM         8,122,936 libGLESv2.dll
03/12/2025  02:01 PM        10,984,370 LICENSES.chromium.html
03/13/2025  06:05 PM    <DIR>          locales
03/13/2025  06:05 PM    <DIR>          polici

In [4]:
pwd

'c:\\Users\\chinthaparthy\\AppData\\Local\\Programs\\Microsoft VS Code'

In [3]:
VIIRS_BASE_DIR = r"C:/Tropos/Working/T-re_fires/pixeldir"


In [8]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
LOG_FILE = "processing_log.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.*?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d{4})\.V06B\.HDF5")

def log_message(message):
    print(message)
    with open(LOG_FILE, "a") as f:
        f.write(f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S')} - {message}\n")

def parse_viirs_datetime(filename):
    """Parse datetime from VIIRS filename with spaces in coordinates"""
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError:
            log_message(f"Invalid datetime in {filename}: {date_str} {time_str}")
    return None

def main():
    # Initialize logging
    open(LOG_FILE, 'w').close()
    
    # Get IMERG files
    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")
    
    results = []
    viirs_count = 0
    
    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        # Skip hidden directories
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for file in files:
            # Skip non-VIIRS files and hidden files
            if not file.startswith('VIIRS') or file.startswith('.'):
                continue
                
            viirs_count += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")
            
            # Parse datetime
            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                log_message("  → Skipped: Invalid datetime format")
                continue
                
            # Find IMERG match (keep your existing matching logic)
            # Add coordinate extraction and precipitation processing
            # ...
            
    log_message(f"\nTotal VIIRS files processed: {viirs_count}")
    log_message(f"Final results count: {len(results)}")
    
if __name__ == "__main__":
    main()

Found 2190 IMERG files

Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.01.txt

Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.02.txt

Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.03.txt

Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.04.txt

Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.01.txt

Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.02.txt

Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.03.txt

Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.04.txt

Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.05.txt

Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.01.txt

Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.02.txt

Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.01.txt

Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.02.txt

Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.03.txt

Processing: VIIRS_20170223_1734_ 295S_ 5968W_TRe.01.txt

Processing: VIIRS_20170223_1734_ 295S_ 5968W_TRe.02.txt

Processing: VIIRS_20170223_1734_ 295S_ 5968W_TRe.03.txt

Process

In [10]:
# Updated configuration
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.\d+\.V06B\.HDF5")

def parse_imerg_datetime(filename):
    """Improved IMERG time parsing"""
    match = IMERG_PATTERN.match(filename)
    if match:
        date_str, start_time, end_time, _ = match.groups()
        try:
            # Use middle of the observation period
            start_dt = datetime.strptime(date_str + start_time[:4], "%Y%m%d%H%M")
            end_dt = datetime.strptime(date_str + end_time[:4], "%Y%m%d%H%M")
            return start_dt + (end_dt - start_dt)/2
        except ValueError:
            return None
    return None

def find_matching_imerg(viirs_time, imerg_files):
    """Match using IMERG observation window"""
    for f in imerg_files:
        imerg_dt = parse_imerg_datetime(f)
        if imerg_dt and (abs(imerg_dt - viirs_time) <= timedelta(minutes=45)):
            return f
    return None

In [12]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
LOG_FILE = "processing_log.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.\d+\.V\d{2}B\.HDF5")



IMERG_PATTERN = re.compile(
    r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d+)\.V\d{2}B\.HDF5"
)

def parse_imerg_datetime(filename):
    """Extract precise datetime from IMERG filename with midpoint calculation"""
    match = IMERG_PATTERN.match(filename)
    if not match:
        return None
    
    # Extract components (now gets 4 groups)
    date_str, start_time, end_time, _ = match.groups()
    
    try:
        # Parse start and end times
        start_dt = datetime.strptime(f"{date_str}{start_time[:4]}", "%Y%m%d%H%M")
        end_dt = datetime.strptime(f"{date_str}{end_time[:4]}", "%Y%m%d%H%M")
        
        # Calculate midpoint time
        return start_dt + (end_dt - start_dt)/2
    except ValueError as e:
        log_message(f"Error parsing {filename}: {str(e)}")
        return None


def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def parse_viirs_datetime(filename):
    """Extract datetime from VIIRS filename with flexible pattern matching"""
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError as e:
            log_message(f"Invalid VIIRS datetime in {filename}: {str(e)}")
    return None



def find_matching_imerg(viirs_time, imerg_files):
    """Find best IMERG match within 45-minute window"""
    best_match = None
    min_diff = timedelta(minutes=45)  # Maximum allowed difference
    
    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if not imerg_time:
            continue
            
        time_diff = abs(viirs_time - imerg_time)
        if time_diff < min_diff:
            min_diff = time_diff
            best_match = f
    
    return best_match

def extract_coordinates(file_path):
    """Extract lat/lon from VIIRS file with error handling"""
    lats, lons = [], []
    try:
        with open(file_path, 'r') as f:
            for line_number, line in enumerate(f, 1):
                parts = line.strip().split()
                if len(parts) < 29:
                    continue
                try:
                    lat = float(parts[27])
                    lon = float(parts[28])
                    lats.append(lat)
                    lons.append(lon)
                except (ValueError, IndexError) as e:
                    log_message(f"Line {line_number}: {str(e)}")
        return lats, lons
    except Exception as e:
        log_message(f"File read error: {str(e)}")
        return [], []

def process_imerg(imerg_path, lats, lons):
    """Process IMERG data with comprehensive stats"""
    try:
        ds = xr.open_dataset(imerg_path, engine='h5netcdf', group='Grid')
        
        # Handle latitude/longitude orientation
        lat_slice = slice(max(lats), min(lats)) if ds.lat[0] > ds.lat[-1] else slice(min(lats), max(lats))
        lon_slice = slice(max(lons), min(lons)) if ds.lon[0] > ds.lon[-1] else slice(min(lons), max(lons))
        
        precip_data = ds['precipitation'].sel(lat=lat_slice, lon=lon_slice)
        valid_values = precip_data.values[~np.isnan(precip_data.values)]
        
        return {
            'average': np.nanmean(precip_data),
            'total': np.nansum(precip_data),
            'max': np.nanmax(precip_data),
            'values': valid_values.tolist(),
            'count': len(valid_values)
        }
    except Exception as e:
        log_message(f"IMERG processing error: {str(e)}")
        raise

def main():
    open(LOG_FILE, 'w').close()  # Reset log file
    
    # Load IMERG files
    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")
    
    results = []
    processed = matched = 0
    
    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        # Skip hidden directories
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for file in files:
            # Skip non-VIIRS files and system files
            if not file.startswith('VIIRS') or file.startswith(('.', '~')):
                continue
            
            processed += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")
            
            # Parse VIIRS time
            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                log_message("  → Skipped: Invalid datetime format")
                continue
                
            # Find IMERG match
            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                log_message("  → No IMERG match found")
                continue
                
            matched += 1
            imerg_path = os.path.join(IMERG_DIR, imerg_match)
            log_message(f"  → Matched with: {imerg_match}")
            
            # Extract coordinates
            lats, lons = extract_coordinates(file_path)
            if not lats or not lons:
                log_message("  → Skipped: No valid coordinates")
                results.append(create_result_entry(file, imerg_match, root, viirs_time, "No valid coordinates"))
                continue
                
            # Process precipitation
            try:
                precip = process_imerg(imerg_path, lats, lons)
                results.append(create_result_entry(
                    file, imerg_match, root, viirs_time, "Success",
                    precip['average'], precip['total'], precip['max'], precip['values']
                ))
                log_message(f"  → Success: {precip['count']} valid pixels")
            except Exception as e:
                log_message(f"  → Processing failed: {str(e)}")
                results.append(create_result_entry(file, imerg_match, root, viirs_time, str(e)))

    # Save results
    if results:
        pd.DataFrame(results).to_csv(OUTPUT_CSV, index=False)
        log_message(f"\nSaved {len(results)} results to {OUTPUT_CSV}")
    else:
        log_message("\nNo valid results to save")
        
    # Final report
    log_message("\n=== Processing Summary ===")
    log_message(f"Total VIIRS files processed: {processed}")
    log_message(f"VIIRS files with valid matches: {matched}")
    log_message(f"Successfully processed files: {len([r for r in results if r['Status'] == 'Success'])}")

def create_result_entry(file, imerg_file, folder, time, status, avg=None, total=None, max_val=None, values=None):
    return {
        'VIIRS_File': file,
        'IMERG_File': imerg_file,
        'Folder': folder,
        'Observation_Time': time.strftime("%Y-%m-%d %H:%M"),
        'Average_Precip_mmh': round(avg, 4) if avg is not None else None,
        'Total_Precip_mm': round(total, 4) if total is not None else None,
        'Max_Precip_mmh': round(max_val, 4) if max_val is not None else None,
        'Pixel_Values': ';'.join(map(str, values)) if values else None,
        'Status': status
    }

if __name__ == "__main__":
    main()

[2025-03-21 11:59:38] Found 2190 IMERG files
[2025-03-21 11:59:38] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.01.txt
[2025-03-21 11:59:38]   → Matched with: 3B-HHR.MS.MRG.3IMERG.20170202-S170000-E172959.1020.V07B.HDF5
[2025-03-21 11:59:38] Line 1: could not convert string to float: 'lat'
[2025-03-21 11:59:38]   → Success: 6 valid pixels
[2025-03-21 11:59:38] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.02.txt
[2025-03-21 11:59:38]   → Matched with: 3B-HHR.MS.MRG.3IMERG.20170202-S170000-E172959.1020.V07B.HDF5
[2025-03-21 11:59:38] Line 1: could not convert string to float: 'lat'
[2025-03-21 11:59:38]   → Success: 20 valid pixels
[2025-03-21 11:59:38] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.03.txt
[2025-03-21 11:59:38]   → Matched with: 3B-HHR.MS.MRG.3IMERG.20170202-S170000-E172959.1020.V07B.HDF5
[2025-03-21 11:59:38] Line 1: could not convert string to float: 'lat'
[2025-03-21 11:59:38]   → Success: 16 valid pixels
[2025-03-21 11:59:38] 
Processing: VIIRS_20170202_

In [16]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
LOG_FILE = "processing_log.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d+)\.V\d{2}B\.HDF5")

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def parse_viirs_datetime(filename):
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError as e:
            log_message(f"Invalid VIIRS datetime in {filename}: {str(e)}")
    return None

def parse_imerg_datetime(filename):
    match = IMERG_PATTERN.match(filename)
    if not match:
        return None
    
    date_str, start_time, end_time, _ = match.groups()
    try:
        start_dt = datetime.strptime(f"{date_str}{start_time[:4]}", "%Y%m%d%H%M")
        end_dt = datetime.strptime(f"{date_str}{end_time[:4]}", "%Y%m%d%H%M")
        return start_dt + (end_dt - start_dt)/2
    except ValueError as e:
        log_message(f"Error parsing {filename}: {str(e)}")
        return None

def find_matching_imerg(viirs_time, imerg_files):
    best_match = None
    min_diff = timedelta(minutes=45)
    
    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if imerg_time and (abs(imerg_time - viirs_time) < min_diff):
            min_diff = abs(imerg_time - viirs_time)
            best_match = f
    return best_match

def extract_coordinates(file_path):
    lats, lons = [], []
    try:
        with open(file_path, 'r') as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) >= 29:
                    try:
                        lats.append(float(parts[27]))
                        lons.append(float(parts[28]))
                    except (ValueError, IndexError):
                        continue
        return lats, lons
    except Exception as e:
        log_message(f"File read error: {str(e)}")
        return [], []

def process_imerg(imerg_path, lats, lons):
    try:
        ds = xr.open_dataset(imerg_path, engine='h5netcdf', group='Grid')
        
        # Convert to numpy arrays and get first/last elements
        lat_values = ds.lat.values
        lon_values = ds.lon.values
        
        # Handle latitude direction
        if lat_values[0] > lat_values[-1]:
            lat_slice = slice(max(lats), min(lats))
        else:
            lat_slice = slice(min(lats), max(lats))
            
        # Handle longitude direction
        if lon_values[0] > lon_values[-1]:
            lon_slice = slice(max(lons), min(lons))
        else:
            lon_slice = slice(min(lons), max(lons))
        
        precip_data = ds['precipitation'].sel(lat=lat_slice, lon=lon_slice)
        valid_values = precip_data.values[~np.isnan(precip_data.values)]
        
        return {
            'average': float(np.nanmean(precip_data)),
            'total': float(np.nansum(precip_data)),
            'max': float(np.nanmax(precip_data)),
            'values': valid_values.tolist(),
            'count': len(valid_values)
        }
    except Exception as e:
        log_message(f"IMERG processing error: {str(e)}")
        raise

def create_result_entry(file, imerg_file, folder, time, status, avg, total, max_val, values, lats, lons):
    """Create result entry with full folder path and subfolder name"""
    folder_name = os.path.basename(folder)
    min_lat = round(min(lats), 2) if lats else None
    max_lat = round(max(lats), 2) if lats else None
    min_lon = round(min(lons), 2) if lons else None
    max_lon = round(max(lons), 2) if lons else None
    
    return {
        'VIIRS_File': file,
        'IMERG_File': imerg_file,
        'Folder_Full': folder,
        'Folder_Name': folder_name,
        'Observation_Time': time.strftime("%Y-%m-%d %H:%M"),
        'Min_Lat': min_lat,
        'Max_Lat': max_lat,
        'Min_Lon': min_lon,
        'Max_Lon': max_lon,
        'Average_Precip_mmh': round(avg, 2) if avg is not None else None,
        'Total_Precip_mm': round(total, 2) if total is not None else None,
        'Max_Precip_mmh': round(max_val, 2) if max_val is not None else None,
        'Pixel_Values': ';'.join([f"{v:.2f}" for v in values]) if values else None,
        'Status': status
    }

def main():
    open(LOG_FILE, 'w').close()
    
    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")
    
    results = []
    processed = matched = 0
    
    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        dirs[:] = [d for d in dirs if not d.startswith('.')]
        
        for file in files:
            if not file.startswith('VIIRS') or file.startswith(('.', '~')):
                continue
            
            processed += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")
            
            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                continue
                
            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                continue
                
            matched += 1
            imerg_path = os.path.join(IMERG_DIR, imerg_match)
            
            lats, lons = extract_coordinates(file_path)
            if not lats or not lons:
                continue
                
            try:
                precip = process_imerg(imerg_path, lats, lons)
                results.append(create_result_entry(
                    file, imerg_match, root, viirs_time, "Success",
                    precip['average'], precip['total'], precip['max'], 
                    precip['values'], lats, lons
                ))
            except Exception as e:
                log_message(f"Processing failed: {str(e)}")

    if results:
        df = pd.DataFrame(results)
        df.to_csv(OUTPUT_CSV, index=False)
        log_message(f"\nSaved {len(results)} results to {OUTPUT_CSV}")
    else:
        log_message("\nNo valid results to save")
        
    log_message("\n=== Processing Summary ===")
    log_message(f"Total VIIRS files processed: {processed}")
    log_message(f"VIIRS files with valid matches: {matched}")
    log_message(f"Successfully processed files: {len([r for r in results if r['Status'] == 'Success'])}")

if __name__ == "__main__":
    main()

[2025-03-21 12:22:13] Found 2190 IMERG files
[2025-03-21 12:22:13] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.01.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.02.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.03.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.04.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.01.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.02.txt
[2025-03-21 12:22:13] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.03.txt
[2025-03-21 12:22:14] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.04.txt
[2025-03-21 12:22:14] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.05.txt
[2025-03-21 12:22:14] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.01.txt
[2025-03-21 12:22:14] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.02.txt
[2025-03-21 12:22:14] 
Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.01.txt
[2025-0

In [1]:
import csv
from collections import defaultdict

# Path to input and output CSV files
input_csv_path = r"C:\Users\chinthaparthy\Desktop\final_precipitation_results.csv"
output_csv_path = r"C:\Users\chinthaparthy\Desktop\aggregated_precipitation_results.csv"

# Initialize a dictionary to hold aggregated data
daily_data = defaultdict(lambda: {'total_precip': 0.0, 'max_precip': 0.0})

# Read and process the CSV file
with open(input_csv_path, newline='', encoding='utf-8') as csvfile:
    reader = csv.reader(csvfile)
    header = next(reader)  # Skip header
    
    for row in reader:
        if len(row) < 14:
            continue  # Skip invalid rows
        
        folder_name = row[3]
        date = folder_name.split('_')[0]
        
        try:
            total_precip = float(row[10])
            max_precip = float(row[11])
        except ValueError:
            continue  # Skip rows with invalid numeric values
        
        daily_data[date]['total_precip'] += total_precip
        if max_precip > daily_data[date]['max_precip']:
            daily_data[date]['max_precip'] = max_precip

# Write the aggregated results to a new CSV file
with open(output_csv_path, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["Date", "Total_Total_Precip_mm", "Max_Max_Precip_mmh"])
    
    for date in sorted(daily_data.keys()):
        writer.writerow([date, f"{daily_data[date]['total_precip']:.2f}", f"{daily_data[date]['max_precip']:.2f}"])

print(f"Aggregated data saved to {output_csv_path}")

Aggregated data saved to C:\Users\chinthaparthy\Desktop\aggregated_precipitation_results.csv


In [4]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results2.csv"
LOG_FILE = "processing_log2.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d+)\.V\d{2}B\.HDF5")

# Define the latitude and longitude box
LAT_BOX = (-3.47, -0.85)
LON_BOX = (-60.93, -57.35)

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def parse_viirs_datetime(filename):
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError as e:
            log_message(f"Invalid VIIRS datetime in {filename}: {str(e)}")
    return None

def parse_imerg_datetime(filename):
    match = IMERG_PATTERN.match(filename)
    if not match:
        return None

    date_str, start_time, end_time, _ = match.groups()
    try:
        start_dt = datetime.strptime(f"{date_str}{start_time[:4]}", "%Y%m%d%H%M")
        end_dt = datetime.strptime(f"{date_str}{end_time[:4]}", "%Y%m%d%H%M")
        return start_dt + (end_dt - start_dt)/2
    except ValueError as e:
        log_message(f"Error parsing {filename}: {str(e)}")
        return None

def find_matching_imerg(viirs_time, imerg_files):
    best_match = None
    min_diff = timedelta(minutes=45)

    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if imerg_time and (abs(imerg_time - viirs_time) < min_diff):
            min_diff = abs(imerg_time - viirs_time)
            best_match = f
    return best_match

def process_imerg(imerg_path):
    try:
        ds = xr.open_dataset(imerg_path, engine='h5netcdf', group='Grid')

        # Extract the latitude and longitude values
        lat_values = ds.lat.values
        lon_values = ds.lon.values

        # Define the latitude and longitude slices
        lat_slice = slice(LAT_BOX[0], LAT_BOX[1])
        lon_slice = slice(LON_BOX[0], LON_BOX[1])

        # Select the precipitation data within the box
        precip_data = ds['precipitation'].sel(lat=lat_slice, lon=lon_slice)
        valid_values = precip_data.values[~np.isnan(precip_data.values)]

        return {
            'max': float(np.nanmax(precip_data)),
            'total': float(np.nansum(precip_data))
        }
    except Exception as e:
        log_message(f"IMERG processing error: {str(e)}")
        raise

def create_result_entry(file, imerg_file, folder, time, status, max_val, total):
    """Create result entry with full folder path and subfolder name"""
    folder_name = os.path.basename(folder)

    return {
        'VIIRS_File': file,
        'IMERG_File': imerg_file,
        'Folder_Full': folder,
        'Folder_Name': folder_name,
        'Observation_Time': time.strftime("%Y-%m-%d %H:%M"),
        'Max_Precip_mmh': round(max_val, 2) if max_val is not None else None,
        'Total_Precip_mm': round(total, 2) if total is not None else None,
        'Status': status
    }

def main():
    open(LOG_FILE, 'w').close()

    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")

    results = []
    processed = matched = 0

    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        dirs[:] = [d for d in dirs if not d.startswith('.')]

        for file in files:
            if not file.startswith('VIIRS') or file.startswith(('.', '~')):
                continue

            processed += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")

            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                continue

            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                continue

            matched += 1
            imerg_path = os.path.join(IMERG_DIR, imerg_match)

            try:
                precip = process_imerg(imerg_path)
                results.append(create_result_entry(
                    file, imerg_match, root, viirs_time, "Success",
                    precip['max'], precip['total']
                ))
            except Exception as e:
                log_message(f"Processing failed: {str(e)}")

    if results:
        df = pd.DataFrame(results)
        df.to_csv(OUTPUT_CSV, index=False)
        log_message(f"\nSaved {len(results)} results to {OUTPUT_CSV}")
    else:
        log_message("\nNo valid results to save")

    log_message("\n=== Processing Summary ===")
    log_message(f"Total VIIRS files processed: {processed}")
    log_message(f"VIIRS files with valid matches: {matched}")
    log_message(f"Successfully processed files: {len([r for r in results if r['Status'] == 'Success'])}")

if __name__ == "__main__":
    main()


[2025-03-24 11:20:33] Found 2190 IMERG files
[2025-03-24 11:20:33] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.01.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.02.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.03.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.04.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.01.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.02.txt
[2025-03-24 11:20:33] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.03.txt
[2025-03-24 11:20:34] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.04.txt
[2025-03-24 11:20:34] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.05.txt
[2025-03-24 11:20:34] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.01.txt
[2025-03-24 11:20:34] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.02.txt
[2025-03-24 11:20:34] 
Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.01.txt
[2025-0

In [6]:
import os
import re
import pandas as pd
import xarray as xr
import numpy as np
from datetime import datetime, timedelta

# Configuration
VIIRS_BASE_DIR = r"C:\Tropos\Working\T-re_fires\pixeldir"
IMERG_DIR = r"c:/Tropos/Working/gpm/hdffiles/"
OUTPUT_CSV = "final_precipitation_results.csv"
LOG_FILE = "processing_log.txt"
VIIRS_PATTERN = re.compile(r"VIIRS_(\d{8})_(\d{4})_.+?\.txt")
IMERG_PATTERN = re.compile(r"3B-HHR\.MS\.MRG\.3IMERG\.(\d{8})-S(\d{6})-E(\d{6})\.(\d+)\.V\d{2}B\.HDF5")

# Define the latitude and longitude box
LAT_BOX = (-3.47, -0.85)
LON_BOX = (-60.93, -57.35)

def log_message(message):
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    full_msg = f"[{timestamp}] {message}"
    print(full_msg)
    with open(LOG_FILE, "a") as f:
        f.write(full_msg + "\n")

def parse_viirs_datetime(filename):
    match = VIIRS_PATTERN.match(filename)
    if match:
        date_str, time_str = match.groups()
        try:
            return datetime.strptime(f"{date_str}{time_str}", "%Y%m%d%H%M")
        except ValueError as e:
            log_message(f"Invalid VIIRS datetime in {filename}: {str(e)}")
    return None

def parse_imerg_datetime(filename):
    match = IMERG_PATTERN.match(filename)
    if not match:
        return None

    date_str, start_time, end_time, _ = match.groups()
    try:
        start_dt = datetime.strptime(f"{date_str}{start_time[:4]}", "%Y%m%d%H%M")
        end_dt = datetime.strptime(f"{date_str}{end_time[:4]}", "%Y%m%d%H%M")
        return start_dt + (end_dt - start_dt)/2
    except ValueError as e:
        log_message(f"Error parsing {filename}: {str(e)}")
        return None

def find_matching_imerg(viirs_time, imerg_files):
    best_match = None
    min_diff = timedelta(minutes=45)

    for f in imerg_files:
        imerg_time = parse_imerg_datetime(f)
        if imerg_time and (abs(imerg_time - viirs_time) < min_diff):
            min_diff = abs(imerg_time - viirs_time)
            best_match = f
    return best_match

def process_imerg(imerg_path):
    try:
        ds = xr.open_dataset(imerg_path, engine='h5netcdf', group='Grid')

        # Extract the latitude and longitude values
        lat_values = ds.lat.values
        lon_values = ds.lon.values

        # Define the latitude and longitude slices
        lat_slice = slice(LAT_BOX[0], LAT_BOX[1])
        lon_slice = slice(LON_BOX[0], LON_BOX[1])

        # Select the precipitation data within the box
        precip_data = ds['precipitation'].sel(lat=lat_slice, lon=lon_slice)
        valid_values = precip_data.values[~np.isnan(precip_data.values)]

        return {
            'max': float(np.nanmax(precip_data)),
            'total': float(np.nansum(precip_data))
        }
    except Exception as e:
        log_message(f"IMERG processing error: {str(e)}")
        raise

def main():
    open(LOG_FILE, 'w').close()

    imerg_files = [f for f in os.listdir(IMERG_DIR) if f.endswith(".HDF5")]
    log_message(f"Found {len(imerg_files)} IMERG files")

    results = []
    processed_dates = set()
    processed = matched = 0

    for root, dirs, files in os.walk(VIIRS_BASE_DIR):
        dirs[:] = [d for d in dirs if not d.startswith('.')]

        for file in files:
            if not file.startswith('VIIRS') or file.startswith(('.', '~')):
                continue

            processed += 1
            file_path = os.path.join(root, file)
            log_message(f"\nProcessing: {file}")

            viirs_time = parse_viirs_datetime(file)
            if not viirs_time:
                continue

            date_key = viirs_time.strftime("%Y-%m-%d")
            if date_key in processed_dates:
                continue

            imerg_match = find_matching_imerg(viirs_time, imerg_files)
            if not imerg_match:
                continue

            matched += 1
            imerg_path = os.path.join(IMERG_DIR, imerg_match)

            try:
                precip = process_imerg(imerg_path)
                results.append({
                    'Date': date_key,
                    'Max_Precip_mmh': round(precip['max'], 2),
                    'Total_Precip_mm': round(precip['total'], 2)
                })
                processed_dates.add(date_key)
            except Exception as e:
                log_message(f"Processing failed: {str(e)}")

    if results:
        df = pd.DataFrame(results)
        df.to_csv(OUTPUT_CSV, index=False)
        log_message(f"\nSaved {len(results)} results to {OUTPUT_CSV}")
    else:
        log_message("\nNo valid results to save")

    log_message("\n=== Processing Summary ===")
    log_message(f"Total VIIRS files processed: {processed}")
    log_message(f"VIIRS files with valid matches: {matched}")
    log_message(f"Successfully processed files: {len(results)}")

if __name__ == "__main__":
    main()


[2025-03-24 11:35:43] Found 2190 IMERG files
[2025-03-24 11:35:43] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.01.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.02.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.03.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170202_1726_ 305S_ 5970W_TRe.04.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.01.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.02.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.03.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.04.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170208_1712_ 283S_ 5953W_TRe.05.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.01.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170212_1739_ 313S_ 5974W_TRe.02.txt
[2025-03-24 11:35:43] 
Processing: VIIRS_20170213_1722_ 289S_ 5962W_TRe.01.txt
[2025-0