In [None]:
import pandas as pd
import os

# Define the directory containing the CSV files and the output file path
directory_path = r'csv'  # Update this to your directory
output_file_path = r'rh_distributed.csv'   # Path for the output CSV file

# Function to find the elevation at which the given percentile of the cumulative amplitude is reached
def find_elevation_at_percentile(df, cumulative_percentile):
    row = df[df['Cumulative_Amplitude'] >= cumulative_percentile].iloc[0]
    return row['Elevation']

# List all CSV files in the directory
csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

# Initialize a list to store the results
rh_percentiles_results = []

# Process each CSV file
for csv_file in csv_files:
    file_path = os.path.join(directory_path, csv_file)
    data = pd.read_csv(file_path)

    # Sort data by 'Elevation' in ascending order
    sorted_data = data.sort_values(by='Elevation', ascending=True)

    # Calculate cumulative sum normalized by its last element
    sorted_data['Cumulative_Amplitude'] = sorted_data['Rxwaveform'].cumsum()
    sorted_data['Cumulative_Amplitude'] /= sorted_data['Cumulative_Amplitude'].iloc[-1]

    # Define the percentiles
    percentiles = {'RH10': 0.10, 'RH20': 0.20, 'RH25': 0.25, 'RH30': 0.30, 'RH40': 0.40, 'RH50': 0.50, 'RH60': 0.60, 'RH70': 0.70, 'RH75': 0.75, 'RH80': 0.80, 'RH90': 0.90, 'RH95': 0.95, 'RH98': 0.98}
    rh_values = [csv_file.replace('.csv', '')]  # Start with the file name (without .csv)
    
    for name, percentile in percentiles.items():
        elevation = find_elevation_at_percentile(sorted_data, percentile)
        rh_values.append(elevation)
    
    # Append the results
    rh_percentiles_results.append(rh_values)

# Convert the results to a DataFrame
rh_percentiles_df = pd.DataFrame(rh_percentiles_results, columns=['FileName'] + list(percentiles.keys()))

# Save the DataFrame to a new CSV file
rh_percentiles_df.to_csv(output_file_path, index=False)

print(f"RH percentiles calculated and saved to {output_file_path}")


In [6]:
import pandas as pd

# Load the data from the CSV file
file_path = r"ABBY_001_2017_D.las.csv"  # Replace with the path to your actual CSV file
data = pd.read_csv(file_path)

# Sort the DataFrame based on Amplitude_Percent in descending order
sorted_data = data.sort_values(by='Amplitude_Percent', ascending=False)

# Calculate the cumulative sum of Amplitude_Percent
sorted_data['Cumulative_Amplitude'] = sorted_data['Amplitude_Percent'].cumsum()

# Display the sorted data with cumulative sum to manually inspect
print(sorted_data.head(30))  # You can adjust the number of rows to display as needed

# Define a function to find the elevation at which the given percentile of the cumulative amplitude is reached
def find_elevation_at_percentile(df, percentile):
    threshold = df['Cumulative_Amplitude'].iloc[-1] * (percentile / 100.0)
    return df[df['Cumulative_Amplitude'] >= threshold]['Elevation'].iloc[0]

# Calculate and print the elevations for each percentile
percentiles = [10, 25, 50, 75, 90, 95, 98]
for percentile in percentiles:
    elevation = find_elevation_at_percentile(sorted_data, percentile)
    print(f'RH{percentile}: {elevation}')


     Rxwaveform  Amplitude_Percent  Elevation  Cumulative_Amplitude
488    0.057138         100.000000   4.399994            100.000000
489    0.057113          99.956428   4.249994            199.956428
487    0.056678          99.194905   4.549994            299.151333
490    0.056601          99.059913   4.099994            398.211246
486    0.055751          97.572014   4.699994            495.783260
491    0.055615          97.333364   3.949994            593.116623
485    0.054386          95.182351   4.849994            688.298974
492    0.054185          94.831032   3.799994            783.130006
484    0.052620          92.093037   4.999994            875.223043
493    0.052359          91.635929   3.649994            966.858972
483    0.050499          88.380934   5.149994           1055.239906
494    0.050195          87.848105   3.499994           1143.088011
482    0.048071          84.130636   5.299994           1227.218647
495    0.047767          83.598655   3.349994   