In [4]:
import os
import glob
import pandas as pd

# Directory containing the text files
data_path = '/Users/anastringer_1/Desktop/G10015'

# Dictionary to store processed and aggregated data for each borehole
borehole_data = {}

# Create output directory for GIPL input files
output_path = 'gipl_inputs/'
os.makedirs(output_path, exist_ok=True)

# Function to extract borehole code and log date from the file name
def extract_borehole_and_date(file_name):
    base_name = os.path.basename(file_name)
    
    # Extract borehole code (first 3 letters)
    borehole_code = base_name[:3]
    
    # Extract log date (from 4th to 11th character, assuming format XYZ_YYMMMDD)
    log_date_str = base_name[4:11]
    
    # Parse the date into a standard format
    log_date = pd.to_datetime(log_date_str, format='%y%b%d', errors='coerce')
    
    return borehole_code, log_date

# Get all text files in the directory
all_files = glob.glob(f'{data_path}/*.txt')

# Loop through each file and group by borehole code
for file in all_files:
    borehole_code, log_date = extract_borehole_and_date(file)
    
    if borehole_code not in borehole_data:
        borehole_data[borehole_code] = []

    try:
        # Read the file (skip first 10 rows if headers; adjust as needed)
        df = pd.read_csv(file, sep='\s+', skiprows=10, header=None, usecols=[0, 1], 
                         names=["Depth", "Temperature"], engine='python')

        # Convert columns to numeric, drop NaNs
        df["Depth"] = pd.to_numeric(df["Depth"], errors='coerce')
        df["Temperature"] = pd.to_numeric(df["Temperature"], errors='coerce')
        df.dropna(inplace=True)

        # Add log date
        df["Log_Date"] = log_date

        # Append the processed data
        borehole_data[borehole_code].append(df)

    except Exception as e:
        print(f"Error reading {file}: {e}")

# Aggregate data for each borehole
for borehole_code, dataframes in borehole_data.items():
    if dataframes:
        # Combine all dataframes for the borehole
        combined_df = pd.concat(dataframes, ignore_index=True)

        # Group by Depth and calculate mean temperature
        aggregated_df = combined_df.groupby("Depth", as_index=False).agg({
            "Temperature": "mean",
            "Log_Date": "first"  # Keep the first log date for each depth
        })

        # Write to GIPL-compatible format
        output_file = f"{output_path}/{borehole_code}_gipl_input.txt"
        aggregated_df.to_csv(output_file, index=False, sep=' ', header=False)
        print(f"GIPL input file created for borehole {borehole_code} at {output_file}")

print("All borehole data processed for GIPL input.")

GIPL input file created for borehole NIN at gipl_inputs//NIN_gipl_input.txt
GIPL input file created for borehole ESN at gipl_inputs//ESN_gipl_input.txt
GIPL input file created for borehole LBN at gipl_inputs//LBN_gipl_input.txt
GIPL input file created for borehole ETK at gipl_inputs//ETK_gipl_input.txt
GIPL input file created for borehole JWD at gipl_inputs//JWD_gipl_input.txt
GIPL input file created for borehole SME at gipl_inputs//SME_gipl_input.txt
GIPL input file created for borehole err at gipl_inputs//err_gipl_input.txt
GIPL input file created for borehole DRP at gipl_inputs//DRP_gipl_input.txt
GIPL input file created for borehole TLK at gipl_inputs//TLK_gipl_input.txt
GIPL input file created for borehole IKP at gipl_inputs//IKP_gipl_input.txt
GIPL input file created for borehole KOL at gipl_inputs//KOL_gipl_input.txt
GIPL input file created for borehole KAG at gipl_inputs//KAG_gipl_input.txt
GIPL input file created for borehole WDS at gipl_inputs//WDS_gipl_input.txt
GIPL input f