In [1]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from plotly.subplots import make_subplots


### Exploratory analysis

In [2]:
priority_wells = ['AW5D', 'AW5O', 'AW5S','AW6D', 'AW6O', 'AW6S', 'LRS69D', 'LRS69O', 'LRS69S', 'LRS70D', 'LRS70S']

#### Caliper

In [3]:
# path = r'notebooks\sandbox\concatenate_caliper_all.csv'
caliper= pd.read_csv('concatenate_caliper_all.csv')
caliper.sample(5)

Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m]
26111,26111,74.6847,5.04609,LRS89D_caliper_20210910.LAS,6,6.408714,16.278133,6.811898,-22.763897
19807,19807,10.1729,8.22023,LRS33D_caliper_20210910.LAS,6,9.726387,24.705022,62.106445,-3.1007
8553,8553,27.3229,6.64966,BW10D_caliper_20210922.LAS,6,8.084796,20.535383,34.746607,-8.32802
7314,7314,22.2377,5.13612,AW7D_caliper_20210910.LAS,6,6.502815,16.51715,8.38025,-6.778051
25060,25060,76.1804,5.32738,LRS79D_caliper_20210910.LAS,6,6.702724,17.024918,11.712061,-23.219786


In [4]:
# Retain only the wells defined as priority for the current analysis
# Extract well name from 'source_file' by taking characters before the first underscore
caliper['Well'] = caliper['source_file'].str.extract(r'^([^_]+)')
caliper_priority = caliper[caliper['Well'].isin(priority_wells)].copy()
caliper_priority.sample(5)


Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m],Well
22609,22609,79.9693,8.04184,LRS70D_caliper_20210910.LAS,8,9.53993,24.231422,19.249125,-24.374643,LRS70D
23143,23143,26.7246,7.48541,LRS70D_caliper_20210910.LAS,8,8.958339,22.75418,11.979232,-8.145658,LRS70D
22491,22491,91.735,10.9755,LRS70D_caliper_20210910.LAS,8,12.606248,32.019871,57.578105,-27.960828,LRS70D
22147,22147,11.2723,5.31675,LRS69S_caliper_20210910.LAS,6,6.691613,16.996697,11.526883,-3.435797,LRS69S
4854,4854,6.38394,5.07097,AW5S_caliper_20210910.LAS,6,6.434719,16.344186,7.245316,-1.945825,AW5S


In [5]:
caliper.columns

Index(['Unnamed: 0', 'Depth_ft', 'Caliper_in', 'source_file',
       'Diameter_auger_in', 'calibrated_in', 'calibrated_cm',
       'Well_Diameter_Deviation_Percentage', 'Depth [m]', 'Well'],
      dtype='object')

In [6]:
print(f"Row count for each well:\n{caliper_priority['Well'].value_counts()}\n\n"
      f"Total priority wells defined: {len(priority_wells)}\n"
      f"Total priority wells after filtering: {caliper_priority['Well'].nunique()}")


Row count for each well:
Well
LRS70D    1160
AW5D       900
AW6D       898
LRS69D     865
AW6O       753
AW5O       343
LRS69S     245
LRS70S     203
AW6S       147
AW5S        95
Name: count, dtype: int64

Total priority wells defined: 11
Total priority wells after filtering: 10


In [7]:
# It seems LRS69O is missing from the caliper data — let's check which LRS69 wells are actually present
caliper[caliper['source_file'].str.startswith('LRS69')]['Well'].unique()


array(['LRS69D', 'LRS69S'], dtype=object)

#### Conductivity

In [8]:
## Standarizing column names across csvs
# Set to collect column names
column_names = set()

# Path to CSV folder
csv_folder = Path("../../data/raw")
csv_paths = list(csv_folder.glob("*.csv"))

# Loop through only priority well files
for path in csv_paths:
    well_id = path.stem.split("_")[0]  # Get well ID from filename
    if well_id in priority_wells:
        try:
            df = pd.read_csv(path, nrows=1)  # Read only the header
            print(f"{path.name}: {len(df.columns)} columns")
            column_names.update(df.columns)
        except Exception as e:
            print(f"{path.name}: ❌ Error - {e}")
print(column_names)

AW5D_YSI_20250225.csv: 20 columns
AW5O_YSI_20250225.csv: 20 columns
AW5S_YSI_20250225.csv: 23 columns
AW6D_YSI_20250226.csv: 20 columns
LRS69D_YSI_20250222R.csv: 21 columns
LRS69O_YSI_20250222R.csv: 21 columns
LRS69S_YSI_20250222R.csv: 27 columns
LRS70D_YSI_20250317.csv: 21 columns
LRS70S_YSI_20250317.csv: 21 columns
{'ORP mV', 'Time (HH:mm:ss)', 'Date (MM/DD/YYYY)', 'Unnamed: 23', 'Depth m', 'ODO mg/L', 'Temp_Celcius', 'Unnamed: 20', 'Time (Fract. Sec)', 'Depth m.1', 'ODO % CB', 'pH mV', 'Time (HH:MM:SS)', 'ODO % sat', 'Vertical Position m', 'pH', 'SpCond_muS/cm.1', 'Cable Pwr V', 'TDS mg/L', 'Site Name', 'Sal psu', 'Resistivity ohms-cm', 'Unnamed: 21', 'Cond_muS/cm', 'nLF_Cond_muS/cm', 'Pressure psi a', 'Unnamed: 24', 'SpCond_muS/cm', 'Battery V', 'Unnamed: 25'}


In [9]:
rename_dict = {
    # Conductividad específica
    'SpCond_muS/cm': 'SpCond µS/cm',
    'nLF_Cond_muS/cm': 'nLF Cond µS/cm',
    'Cond_muS/cm': 'Cond µS/cm',
    #'Corrected sp Cond [µS/cm]': 'SpCond_muS/cm',
   
    # Temperatura
    'Temp °C': 'Temperature_C',
    'Temp_Celcius': 'Temperature_C',

    # Profundidad
    #'Depth from GL (m)': 'Depth_m',
    #'Corrected Depth': 'Depth_m',

    # Tiempo
    'Time (HH:MM:SS)': 'Time (HH:mm:ss)',
    'Date (MM/DD/YYYY)': 'Date',


}


In [10]:
dfs=[]
for path in csv_paths:
    well_id = path.stem.split("_")[0]
    if well_id in priority_wells:
        df = pd.read_csv(path)
        df["Well"] = well_id
        df.rename(columns=rename_dict, inplace=True)
        df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], inplace=True)
        dfs.append(df)

# Combine all into a single DataFrame
if dfs:
    conductivity = pd.concat(dfs, ignore_index=True)
    
else:
    print("⚠️ No matching CSV files from priority wells were found or loaded.")

conductivity.sample(6)

Unnamed: 0,Date,Time (HH:mm:ss),Time (Fract. Sec),Site Name,Cond µS/cm,Depth m,ODO % sat,ODO mg/L,ORP mV,Pressure psi a,...,Temperature_C,Vertical Position m,Battery V,Cable Pwr V,Resistivity ohms-cm,Well,Depth m.1,nLF Cond µS/cm,ODO % CB,SpCond_muS/cm.1
67650,22/02/2025,14:51:56,0.75,Default Site,54707.7,24.031,1.8,0.12,-212.3,34.997,...,25.388,24.026,2.73,1.1,,LRS69D,,54263.6,1.8,
57192,22/02/2025,14:08:22,0.25,Default Site,6092.1,8.565,3.1,0.25,-96.8,12.174,...,25.796,8.565,2.75,1.1,,LRS69D,,5991.4,3.1,
41004,22/02/2025,13:00:55,0.25,Default Site,1152.7,2.556,56.1,4.53,369.4,3.625,...,26.076,2.556,2.78,1.1,,LRS69D,,1127.1,56.1,
95691,22/02/2025,09:58:53,0.0,Default Site,933.6,1.754,78.2,6.34,371.4,2.487,...,25.885,1.755,2.87,1.1,,LRS69S,1.754,916.5,78.2,918.1
74273,22/02/2025,10:43:02,0.5,Default Site,1020.0,1.558,53.7,4.35,403.0,2.209,...,25.966,1.557,2.85,1.1,,LRS69O,,999.5,53.7,
83538,22/02/2025,11:21:38,0.75,Default Site,1036.3,3.608,54.0,4.36,422.1,5.118,...,26.048,3.605,2.82,1.1,,LRS69O,,1013.9,54.0,


### Comparison graph

In [12]:
# ---------------------
# 🔁 Standardize Depth
# ---------------------

# Caliper and Conductivity dataframes use different depth conventions
# Caliper: "Depth [m]" (negative values), Conductivity: "Vertical Position m" (positive values)
# We'll standardize both to a common column: Depth_m, with positive depth (downward)

caliper["Depth_m"] = caliper["Depth [m]"].abs()
conductivity["Depth_m"] = conductivity["Vertical Position m"]

# Create the base figure with shared Y axis
fig = make_subplots(
    rows=1, cols=2,
    shared_yaxes=True,
    horizontal_spacing=0.05,
    subplot_titles=("Conductivity (µS/cm)", "Caliper")
)

# Store visibility masks and buttons
visibility = []
buttons = []

# Loop through wells and add their data as hidden traces by default
for i, well in enumerate(sorted(set(caliper["Well"]).intersection(conductivity["Well"]))):
    cal = caliper[caliper["Well"] == well]
    cond = conductivity[conductivity["Well"] == well]

    # Add conductivity trace (col 1)
    fig.add_trace(
        go.Scatter(
            x=cond["SpCond µS/cm"],
            y=cond["Depth_m"],
            mode="markers",
            name=f"{well} - Conductivity",
            marker=dict(color="red", size=4),
            visible=(i == 0)  # Only the first well is visible initially
        ),
        row=1, col=1
    )

    # Add caliper trace (col 2)
    fig.add_trace(
        go.Scatter(
            x=cal["calibrated_cm"],
            y=cal["Depth_m"],
            mode="markers",
            name=f"{well} - Caliper",
            marker=dict(color="blue", size=4),
            visible=(i == 0)
        ),
        row=1, col=2
    )

    # Build visibility array for this well (2 traces per well)
    vis = [False] * (2 * i) + [True, True] + [False] * (2 * (len(set(caliper["Well"])) - i - 1))
    visibility.append(vis)

    # Add button for this well
    buttons.append(dict(
        label=well,
        method="update",
        args=[{"visible": vis},
              {"title": f"{well} – Caliper and Conductivity (Scatter View)"}]
    ))

# Add interactive menu to the figure
fig.update_layout(
    updatemenus=[dict(
        type="dropdown",
        direction="down",
        buttons=buttons,
        x=0.5,
        xanchor="center",
        y=1.15,
        yanchor="top"
    )],
    title="Caliper and Conductivity Logs",
    height=600,
    yaxis=dict(title="Depth (m)", autorange="reversed"),
    showlegend=False,
    margin=dict(t=100, l=60, r=60, b=60)
)

# Set axis titles
fig.update_xaxes(title_text="Conductivity (µS/cm)", row=1, col=1)
fig.update_xaxes(title_text="Caliper", row=1, col=2)

# Export to HTML
fig.write_html("caliper_conductivity_logs_interactive.html")
