In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from plotly.subplots import make_subplots


### Exploratory analysis

In [2]:
priority_wells = ['AW5D', 'AW5O', 'AW5S','AW6D', 'AW6O', 'AW6S', 'LRS69D', 'LRS69O', 'LRS69S', 'LRS70D', 'LRS70S']

#### Caliper

In [3]:
# path = r'notebooks\sandbox\concatenate_caliper_all.csv'
caliper= pd.read_csv('concatenate_caliper_all.csv')
caliper.sample(5)

Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m]
5371,5371,44.2734,5.52087,AW6D_caliper_20210910.LAS,6,6.904963,17.538606,15.08272,-13.494532
20268,20268,75.0836,6.64295,LRS65D_caliper_20210910.LAS,6,8.077783,20.517569,34.629717,-22.885481
780,780,7.08319,5.61566,AW1D_caliper_20210910.LAS,6,7.00404,17.79026,16.733992,-2.158956
9681,9681,10.8709,7.11409,BW10D_caliper_20211020.LAS,6,8.570228,21.768378,42.837128,-3.31345
5707,5707,10.7712,6.76066,AW6D_caliper_20210910.LAS,6,8.200816,20.830072,36.680263,-3.283062


In [4]:
# Retain only the wells defined as priority for the current analysis
# Extract well name from 'source_file' by taking characters before the first underscore
caliper['Well'] = caliper['source_file'].str.extract(r'^([^_]+)')
caliper_priority = caliper[caliper['Well'].isin(priority_wells)].copy()
caliper_priority.sample(5)


Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m],Well
5400,5400,41.3819,5.12829,AW6D_caliper_20210910.LAS,6,6.494631,16.496363,8.243849,-12.613203,AW6D
5366,5366,44.772,5.61761,AW6D_caliper_20210910.LAS,6,7.006078,17.795437,16.767962,-13.646506,AW6D
6660,6660,5.78569,4.53552,AW6S_caliper_20210910.LAS,6,5.875056,14.922643,-2.082396,-1.763478,AW6S
4734,4734,9.3765,2.84499,AW5O_caliper_20210910.LAS,4,4.108081,10.434527,2.702036,-2.857957,AW5O
6167,6167,40.7951,2.75691,AW6O_caliper_20210910.LAS,4,4.016019,10.200687,0.400463,-12.434346,AW6O


In [5]:
caliper.columns

Index(['Unnamed: 0', 'Depth_ft', 'Caliper_in', 'source_file',
       'Diameter_auger_in', 'calibrated_in', 'calibrated_cm',
       'Well_Diameter_Deviation_Percentage', 'Depth [m]', 'Well'],
      dtype='object')

In [6]:
print(f"Row count for each well:\n{caliper_priority['Well'].value_counts()}\n\n"
      f"Total priority wells defined: {len(priority_wells)}\n"
      f"Total priority wells after filtering: {caliper_priority['Well'].nunique()}")


Row count for each well:
Well
LRS70D    1160
AW5D       900
AW6D       898
LRS69D     865
AW6O       753
AW5O       343
LRS69S     245
LRS70S     203
AW6S       147
AW5S        95
Name: count, dtype: int64

Total priority wells defined: 11
Total priority wells after filtering: 10


In [7]:
# It seems LRS69O is missing from the caliper data — let's check which LRS69 wells are actually present
caliper[caliper['source_file'].str.startswith('LRS69')]['Well'].unique()


array(['LRS69D', 'LRS69S'], dtype=object)

#### Conductivity

In [8]:
## Standarizing column names across csvs
# Set to collect column names
column_names = set()

# Path to CSV folder
csv_folder = Path("../../data/raw")
csv_paths = list(csv_folder.glob("*.csv"))

# Loop through only priority well files
for path in csv_paths:
    well_id = path.stem.split("_")[0]  # Get well ID from filename
    if well_id in priority_wells:
        try:
            df = pd.read_csv(path, nrows=1)  # Read only the header
            print(f"{path.name}: {len(df.columns)} columns")
            column_names.update(df.columns)
        except Exception as e:
            print(f"{path.name}: ❌ Error - {e}")
print(column_names)

AW5D_YSI_20250225.csv: 20 columns
AW5O_YSI_20250225.csv: 20 columns
AW5S_YSI_20250225.csv: 23 columns
AW6D_YSI_20250226.csv: 20 columns
LRS69D_YSI_20250222R.csv: 21 columns
LRS69O_YSI_20250222R.csv: 21 columns
LRS69S_YSI_20250222R.csv: 27 columns
LRS70D_YSI_20250317.csv: 21 columns
LRS70S_YSI_20250317.csv: 21 columns
{'Unnamed: 20', 'Pressure psi a', 'pH', 'Unnamed: 21', 'Sal psu', 'ODO % sat', 'SpCond_muS/cm', 'Site Name', 'SpCond_muS/cm.1', 'Resistivity ohms-cm', 'ODO mg/L', 'ORP mV', 'pH mV', 'Vertical Position m', 'Cable Pwr V', 'Time (HH:mm:ss)', 'Unnamed: 23', 'Date (MM/DD/YYYY)', 'Time (HH:MM:SS)', 'Unnamed: 24', 'Cond_muS/cm', 'ODO % CB', 'nLF_Cond_muS/cm', 'Time (Fract. Sec)', 'Depth m.1', 'TDS mg/L', 'Temp_Celcius', 'Unnamed: 25', 'Depth m', 'Battery V'}


In [9]:
rename_dict = {
    # Conductividad específica
    'SpCond_muS/cm': 'SpCond µS/cm',
    'nLF_Cond_muS/cm': 'nLF Cond µS/cm',
    'Cond_muS/cm': 'Cond µS/cm',
    #'Corrected sp Cond [µS/cm]': 'SpCond_muS/cm',
   
    # Temperatura
    'Temp °C': 'Temperature_C',
    'Temp_Celcius': 'Temperature_C',

    # Profundidad
    #'Depth from GL (m)': 'Depth_m',
    #'Corrected Depth': 'Depth_m',

    # Tiempo
    'Time (HH:MM:SS)': 'Time (HH:mm:ss)',
    'Date (MM/DD/YYYY)': 'Date',


}


In [10]:
dfs=[]
for path in csv_paths:
    well_id = path.stem.split("_")[0]
    if well_id in priority_wells:
        df = pd.read_csv(path)
        df["Well"] = well_id
        df.rename(columns=rename_dict, inplace=True)
        df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], inplace=True)
        dfs.append(df)

# Combine all into a single DataFrame
if dfs:
    conductivity = pd.concat(dfs, ignore_index=True)
    
else:
    print("⚠️ No matching CSV files from priority wells were found or loaded.")

conductivity.sample(6)

Unnamed: 0,Date,Time (HH:mm:ss),Time (Fract. Sec),Site Name,Cond µS/cm,Depth m,ODO % sat,ODO mg/L,ORP mV,Pressure psi a,...,Temperature_C,Vertical Position m,Battery V,Cable Pwr V,Resistivity ohms-cm,Well,Depth m.1,nLF Cond µS/cm,ODO % CB,SpCond_muS/cm.1
106564,26/02/2025,12:54:21,0.0,Default Site,13125.7,11.487,1.1,0.08,438.8,16.378,...,25.662,11.489,2.53,1.1,,LRS70D,,12944.8,1.1,
11397,25/02/2025,14:12:25,0.0,Default Site,28774.5,21.288,0.0,0.0,-116.5,30.592,...,25.013,21.289,2.44,0.4,35.0,AW5D,,,,
48721,22/02/2025,13:33:04,0.5,Default Site,1168.3,5.57,54.3,4.39,399.4,7.9,...,26.058,5.572,2.76,1.1,,LRS69D,,1142.7,54.3,
64260,22/02/2025,14:37:49,0.25,Default Site,24676.4,14.246,1.4,0.11,-129.2,20.424,...,25.586,14.53,2.74,1.1,,LRS69D,,24375.0,1.4,
72947,22/02/2025,10:37:31,0.0,Default Site,1013.8,0.693,53.6,4.34,400.0,0.983,...,25.906,0.788,2.85,1.1,,LRS69O,,994.7,53.6,
96348,22/02/2025,10:04:21,0.5,Default Site,934.0,1.752,78.0,6.33,376.7,2.485,...,25.877,1.751,2.87,1.1,,LRS69S,1.752,917.0,78.0,918.6


### Comparison graph

In [11]:
# ---------------------
# 🔁 Standardize Depth
# ---------------------

# Caliper and Conductivity dataframes use different depth conventions
# Caliper: "Depth [m]" (negative values), Conductivity: "Vertical Position m" (positive values)
# We'll standardize both to a common column: Depth_m, with positive depth (downward)

caliper["Depth_m"] = caliper["Depth [m]"].abs()
conductivity["Depth_m"] = conductivity["Vertical Position m"]

# Filter only wells present in both datasets
available_wells = sorted(set(caliper["Well"]).intersection(set(conductivity["Well"])))

# Create dropdown widget to select well
dropdown = widgets.Dropdown(
    options=available_wells,
    description='Well:',
    style={'description_width': 'initial'}
)


def plot_logs_separate_axes(well):
    # Filter data for selected well
    cal = caliper[caliper["Well"] == well]
    cond = conductivity[conductivity["Well"] == well]

    # Create 1 row x 2 columns subplot with shared y-axis
    fig = make_subplots(
        rows=1, cols=2,
        shared_yaxes=True,
        horizontal_spacing=0.05,
        subplot_titles=("Conductivity (µS/cm)", "Caliper (cm)")
    )

    # Conductivity trace (left panel)
    fig.add_trace(
        go.Scatter(
            x=cond["SpCond µS/cm"],
            y=cond["Depth_m"],
            mode="markers",
            name="Conductivity",
            line=dict(color="red")
        ),
        row=1, col=1
    )

    # Caliper trace (right panel)
    fig.add_trace(
        go.Scatter(
            x=cal["calibrated_cm"],
            y=cal["Depth_m"],
            mode="markers",
            name="Caliper",
            line=dict(color="blue")
        ),
        row=1, col=2
    )

    # Layout settings
    fig.update_layout(
        height=600,
        title_text=f"{well} – Caliper and Conductivity",
        yaxis=dict(title="Depth (m)", autorange="reversed"),
        showlegend=False,
        margin=dict(t=80, l=60, r=60, b=60)
    )

    fig.update_xaxes(title_text="µS/cm", row=1, col=1)
    fig.update_xaxes(title_text="cm", row=1, col=2)

    fig.show()

# Interactive widget for this version
widgets.interact(plot_logs_separate_axes, well=dropdown);


interactive(children=(Dropdown(description='Well:', options=('AW5D', 'AW5O', 'AW5S', 'AW6D', 'LRS69D', 'LRS69S…