In [38]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from plotly.subplots import make_subplots


### Exploratory analysis

In [39]:
priority_wells = ['AW5D', 'AW5O', 'AW5S','AW6D', 'AW6O', 'AW6S','BW3D', 'LRS69D', 'LRS69O', 'LRS69S','LRS69DR', 'LRS69OR', 'LRS69SR' ,'LRS70D', 'LRS70S']

#### Caliper

In [40]:
# path = r'notebooks\sandbox\concatenate_caliper_all.csv'
caliper= pd.read_csv('concatenate_caliper_all.csv')
caliper.sample(5)

Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m]
19334,19334,57.3353,5.03966,LRS33D_caliper_20210910.LAS,6,6.401993,16.261063,6.699886,-17.475799
2413,2413,45.3728,3.89018,AW2O_caliper_20210910.LAS,4,5.200534,13.209357,30.013358,-13.829629
23180,23180,23.0354,7.89365,LRS70D_caliper_20210910.LAS,8,9.385039,23.837999,17.312987,-7.02119
8818,8818,0.899942,7.76391,BW10D_caliper_20210922.LAS,6,9.249432,23.493558,54.157203,-0.274302
17854,17854,82.2639,4.80423,BW7D_caliper_20210910.LAS,6,6.155917,15.63603,2.598619,-25.074037


In [41]:
# Retain only the wells defined as priority for the current analysis
# Extract well name from 'source_file' by taking characters before the first underscore
caliper['Well'] = caliper['source_file'].str.extract(r'^([^_]+)')
caliper_priority = caliper[caliper['Well'].isin(priority_wells)].copy()
caliper_priority.sample(5)


Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m],Well
13648,13648,31.9069,7.05649,BW3D_caliper_20210910.LAS,6,8.510023,21.615458,41.833717,-9.725223,BW3D
22941,22941,46.8659,7.54357,LRS70D_caliper_20210910.LAS,8,9.019129,22.908586,12.739106,-14.284726,LRS70D
23516,23516,9.77406,7.53295,LRS70S_caliper_20210910.LAS,8,9.008028,22.880392,12.600353,-2.979133,LRS70S
4948,4948,86.4504,5.57735,AW6D_caliper_20210910.LAS,6,6.963997,17.688553,16.06662,-26.350082,AW6D
13764,13764,20.3407,8.70787,BW3D_caliper_20210910.LAS,6,10.236077,25.999637,70.601291,-6.199845,BW3D


In [42]:
caliper.columns

Index(['Unnamed: 0', 'Depth_ft', 'Caliper_in', 'source_file',
       'Diameter_auger_in', 'calibrated_in', 'calibrated_cm',
       'Well_Diameter_Deviation_Percentage', 'Depth [m]', 'Well'],
      dtype='object')

In [43]:
print(f"Row count for each well:\n{caliper_priority['Well'].value_counts()}\n\n"
      f"Total priority wells defined: {len(priority_wells)}\n"
      f"Total priority wells after filtering: {caliper_priority['Well'].nunique()}")


Row count for each well:
Well
LRS70D    1160
BW3D      1002
AW5D       900
AW6D       898
LRS69D     865
AW6O       753
AW5O       343
LRS69S     245
LRS70S     203
AW6S       147
AW5S        95
Name: count, dtype: int64

Total priority wells defined: 15
Total priority wells after filtering: 11


In [44]:
# It seems LRS69O is missing from the caliper data — let's check which LRS69 wells are actually present
caliper[caliper['source_file'].str.startswith('LRS70')]['Well'].unique()


array(['LRS70D', 'LRS70S'], dtype=object)

#### Conductivity

In [45]:
## Standarizing column names across csvs
# Set to collect column names
column_names = set()

# Path to CSV folder
csv_folder = Path("../../data/raw")
csv_paths = list(csv_folder.glob("*.csv"))

# Loop through only priority well files
for path in csv_paths:
    well_id = path.stem.split("_")[0]  # Get well ID from filename
    if well_id in priority_wells:
        try:
            df = pd.read_csv(path, nrows=1)  # Read only the header
            print(f"{path.name}: {len(df.columns)} columns")
            column_names.update(df.columns)
        except Exception as e:
            print(f"{path.name}: ❌ Error - {e}")
print(column_names)

AW5D_YSI_20250225.csv: 20 columns
AW5O_YSI_20250225.csv: 20 columns
AW5S_YSI_20250225.csv: 23 columns
AW6D_YSI_20250226.csv: 20 columns
AW6O_YSI_20250226.csv: 20 columns
AW6S_YSI_20250226.csv: 20 columns
BW3D_YSI_20250222.csv: 23 columns
LRS69DR_YSI_20250222R.csv: 21 columns
LRS69D_YSI_20250222.csv: 23 columns
LRS69OR_YSI_20250222R.csv: 21 columns
LRS69O_YSI_20250222.csv: 23 columns
LRS69SR_YSI_20250222R.csv: 27 columns
LRS69S_YSI_20250222.csv: 23 columns
LRS70D_YSI_20250317.csv: 21 columns
LRS70S_YSI_20250317.csv: 21 columns
{'pH', 'Time (HH:mm:ss)', 'ORP mV', 'Depth m', 'Time (Fract. Sec)', 'Temp °C', 'Density Sigma', 'Vertical Position m', 'SpCond_muS/cm', 'Unnamed: 24', 'Depth m.1', 'Temp_Celcius', 'Rhodamine WT ug/L', 'Density-T Sigma-T', 'Unnamed: 23', 'Unnamed: 20', 'ODO mg/L', 'Sal psu', 'TDS mg/L', 'Resistivity ohms-cm', 'Unnamed: 25', 'Rhodamine WT RFU', 'Cable Pwr V', 'Battery V', 'ODO % sat', 'nLF_Cond_muS/cm', 'Date (MM/DD/YYYY)', 'Site Name', 'Pressure psi a', 'Unnamed: 2

In [46]:
rename_dict = {
    # Conductividad específica
    'SpCond_muS/cm': 'SpCond µS/cm',
    'nLF_Cond_muS/cm': 'nLF Cond µS/cm',
    'Cond_muS/cm': 'Cond µS/cm',
    #'Corrected sp Cond [µS/cm]': 'SpCond_muS/cm',
   
    # Temperatura
    'Temp °C': 'Temperature_C',
    'Temp_Celcius': 'Temperature_C',

    # Profundidad
    #'Depth from GL (m)': 'Depth_m',
    #'Corrected Depth': 'Depth_m',

    # Tiempo
    'Time (HH:MM:SS)': 'Time (HH:mm:ss)',
    'Date (MM/DD/YYYY)': 'Date',


}


In [47]:
dfs=[]
for path in csv_paths:
    well_id = path.stem.split("_")[0]
    if well_id in priority_wells:
        df = pd.read_csv(path)
        df["Well"] = well_id
        df.rename(columns=rename_dict, inplace=True)
        df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], inplace=True)
        dfs.append(df)

# Combine all into a single DataFrame
if dfs:
    conductivity = pd.concat(dfs, ignore_index=True)
    
else:
    print("⚠️ No matching CSV files from priority wells were found or loaded.")

conductivity.sample(6)

Unnamed: 0,Date,Time (HH:mm:ss),Time (Fract. Sec),Site Name,Cond µS/cm,Depth m,ODO % sat,ODO mg/L,ORP mV,Pressure psi a,...,Resistivity ohms-cm,Well,Depth m.1,Rhodamine WT RFU,Rhodamine WT ug/L,Density Sigma,Density-T Sigma-T,nLF Cond µS/cm,ODO % CB,SpCond_muS/cm.1
55772,22/2/2025,18:41:48,0.5,Default Site,14566.5,14.634,-0.1,-0.01,-135.2,20.883,...,,BW3D,,-2.2,-21.97,3.4,3.3,,,
17092,25/02/2025,10:22:56,0.0,Default Site,658.8,6.81,10.2,0.84,95.1,9.659,...,1518.0,AW5O,,,,,,,,
147890,26/02/2025,13:39:03,0.0,Default Site,50920.2,13.693,1.1,0.07,256.8,19.897,...,,LRS70D,,,,,,50271.9,1.1,
42625,22/2/2025,16:52:15,0.0,Default Site,503.8,3.999,37.4,3.03,237.0,5.671,...,,BW3D,,-2.2,-22.02,-3.0,-3.0,,,
77401,22/02/2025,12:59:27,0.5,Default Site,1153.9,2.555,56.2,4.54,367.6,3.624,...,,LRS69DR,,,,,,1128.1,56.2,
35401,26/2/2025,15:05:28,0.0,Default Site,1371.5,10.789,25.7,2.11,108.6,15.308,...,729.0,AW6O,,,,,,,,


### Comparison graph

In [48]:
# ---------------------
# 🔁 Standardize Depth
# ---------------------

# Caliper and Conductivity dataframes use different depth conventions
# Caliper: "Depth [m]" (negative values), Conductivity: "Vertical Position m" (positive values)
# We'll standardize both to a common column: Depth_m, with positive depth (downward)

caliper["Depth_m"] = caliper["Depth [m]"].abs()
conductivity["Depth_m"] = conductivity["Vertical Position m"]

# Create the base figure with shared Y axis
fig = make_subplots(
    rows=1, cols=2,
    shared_yaxes=True,
    horizontal_spacing=0.05,
    subplot_titles=("Conductivity (µS/cm)", "Caliper")
)

# Store visibility masks and buttons
visibility = []
buttons = []

# Loop through wells and add their data as hidden traces by default
for i, well in enumerate(sorted(set(caliper["Well"]).intersection(conductivity["Well"]))):
    cal = caliper[caliper["Well"] == well]
    cond = conductivity[conductivity["Well"] == well]

    # Add conductivity trace (col 1)
    fig.add_trace(
        go.Scatter(
            x=cond["SpCond µS/cm"],
            y=cond["Depth_m"],
            mode="markers",
            name=f"{well} - Conductivity",
            marker=dict(color="red", size=4),
            visible=(i == 0)  # Only the first well is visible initially
        ),
        row=1, col=1
    )

    # Add caliper trace (col 2)
    fig.add_trace(
        go.Scatter(
            x=cal["calibrated_cm"],
            y=cal["Depth_m"],
            mode="markers",
            name=f"{well} - Caliper",
            marker=dict(color="blue", size=4),
            visible=(i == 0)
        ),
        row=1, col=2
    )

    # Build visibility array for this well (2 traces per well)
    vis = [False] * (2 * i) + [True, True] + [False] * (2 * (len(set(caliper["Well"])) - i - 1))
    visibility.append(vis)

    # Add button for this well
    buttons.append(dict(
        label=well,
        method="update",
        args=[{"visible": vis},
              {"title": f"{well} – Caliper and Conductivity (Scatter View)"}]
    ))

# Add interactive menu to the figure
fig.update_layout(
    updatemenus=[dict(
        type="dropdown",
        direction="down",
        buttons=buttons,
        x=0.5,
        xanchor="center",
        y=1.15,
        yanchor="top"
    )],
    title="Caliper and Conductivity Logs",
    height=600,
    yaxis=dict(title="Depth (m)", autorange="reversed"),
    showlegend=False,
    margin=dict(t=100, l=60, r=60, b=60)
)

# Set axis titles
fig.update_xaxes(title_text="Conductivity (µS/cm)", row=1, col=1)
fig.update_xaxes(title_text="Caliper", row=1, col=2)

# Export to HTML
fig.write_html("caliper_conductivity_logs_interactive.html")
