In [26]:
import pandas as pd
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from plotly.subplots import make_subplots


### Exploratory analysis

In [27]:
priority_wells = ['AW5D', 'AW5O', 'AW5S','AW6D', 'AW6O', 'AW6S','BW3D', 'LRS69D', 'LRS69O', 'LRS69S','LRS69DR', 'LRS69OR', 'LRS69SR' 'LRS70D', 'LRS70S']

#### Caliper

In [28]:
# path = r'notebooks\sandbox\concatenate_caliper_all.csv'
caliper= pd.read_csv('concatenate_caliper_all.csv')
caliper.sample(5)

Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m]
1799,1799,56.2385,4.70552,AW2D_caliper_20210910.LAS,6,6.052744,15.373969,0.879059,-17.141495
25425,25425,39.7865,5.81446,LRS79D_caliper_20210910.LAS,6,7.211829,18.318046,20.197153,-12.126925
9860,9860,1.69761,5.38414,BW10S_caliper_20210910.LAS,6,6.76205,17.175608,12.700839,-0.517432
26694,26694,16.5543,5.96098,LRS89D_caliper_20210910.LAS,6,7.364975,18.707036,22.749578,-5.045751
10359,10359,29.1189,5.77503,BW11D_caliper_20210910.LAS,6,7.170616,18.213365,19.510269,-8.875441


In [29]:
# Retain only the wells defined as priority for the current analysis
# Extract well name from 'source_file' by taking characters before the first underscore
caliper['Well'] = caliper['source_file'].str.extract(r'^([^_]+)')
caliper_priority = caliper[caliper['Well'].isin(priority_wells)].copy()
caliper_priority.sample(5)


Unnamed: 0.1,Unnamed: 0,Depth_ft,Caliper_in,source_file,Diameter_auger_in,calibrated_in,calibrated_cm,Well_Diameter_Deviation_Percentage,Depth [m],Well
6316,6316,25.9384,2.81955,AW6O_caliper_20210910.LAS,4,4.081491,10.366987,2.037276,-7.906024,AW6O
5083,5083,72.9897,5.12662,AW6D_caliper_20210910.LAS,6,6.492885,16.491929,8.214757,-22.247261,AW6D
6074,6074,50.0681,2.82346,AW6O_caliper_20210910.LAS,4,4.085578,10.377368,2.139446,-15.260757,AW6O
21630,21630,38.3931,5.59105,LRS69D_caliper_20210910.LAS,6,6.978317,17.724924,16.305278,-11.702217,LRS69D
5757,5757,5.78569,6.99246,AW6D_caliper_20210910.LAS,6,8.443098,21.445468,40.718294,-1.763478,AW6D


In [30]:
caliper.columns

Index(['Unnamed: 0', 'Depth_ft', 'Caliper_in', 'source_file',
       'Diameter_auger_in', 'calibrated_in', 'calibrated_cm',
       'Well_Diameter_Deviation_Percentage', 'Depth [m]', 'Well'],
      dtype='object')

In [31]:
print(f"Row count for each well:\n{caliper_priority['Well'].value_counts()}\n\n"
      f"Total priority wells defined: {len(priority_wells)}\n"
      f"Total priority wells after filtering: {caliper_priority['Well'].nunique()}")


Row count for each well:
Well
BW3D      1002
AW5D       900
AW6D       898
LRS69D     865
AW6O       753
AW5O       343
LRS69S     245
LRS70S     203
AW6S       147
AW5S        95
Name: count, dtype: int64

Total priority wells defined: 14
Total priority wells after filtering: 10


In [32]:
# It seems LRS69O is missing from the caliper data — let's check which LRS69 wells are actually present
caliper[caliper['source_file'].str.startswith('LRS69')]['Well'].unique()


array(['LRS69D', 'LRS69S'], dtype=object)

#### Conductivity

In [33]:
## Standarizing column names across csvs
# Set to collect column names
column_names = set()

# Path to CSV folder
csv_folder = Path("../../data/raw")
csv_paths = list(csv_folder.glob("*.csv"))

# Loop through only priority well files
for path in csv_paths:
    well_id = path.stem.split("_")[0]  # Get well ID from filename
    if well_id in priority_wells:
        try:
            df = pd.read_csv(path, nrows=1)  # Read only the header
            print(f"{path.name}: {len(df.columns)} columns")
            column_names.update(df.columns)
        except Exception as e:
            print(f"{path.name}: ❌ Error - {e}")
print(column_names)

AW5D_YSI_20250225.csv: 20 columns
AW5O_YSI_20250225.csv: 20 columns
AW5S_YSI_20250225.csv: 23 columns
AW6D_YSI_20250226.csv: 20 columns
AW6O_YSI_20250226.csv: 20 columns
AW6S_YSI_20250226.csv: 20 columns
BW3D_YSI_20250222.csv: 23 columns
LRS69DR_YSI_20250222R.csv: 21 columns
LRS69D_YSI_20250222.csv: 23 columns
LRS69OR_YSI_20250222R.csv: 21 columns
LRS69O_YSI_20250222.csv: 23 columns
LRS69S_YSI_20250222.csv: 23 columns
LRS70S_YSI_20250317.csv: 21 columns
{'pH', 'Time (HH:mm:ss)', 'ORP mV', 'Depth m', 'Time (Fract. Sec)', 'Temp °C', 'Density Sigma', 'Vertical Position m', 'SpCond_muS/cm', 'Depth m.1', 'Temp_Celcius', 'Rhodamine WT ug/L', 'Density-T Sigma-T', 'Unnamed: 20', 'ODO mg/L', 'Sal psu', 'TDS mg/L', 'Resistivity ohms-cm', 'Rhodamine WT RFU', 'Cable Pwr V', 'Battery V', 'ODO % sat', 'nLF_Cond_muS/cm', 'Date (MM/DD/YYYY)', 'Site Name', 'Pressure psi a', 'Unnamed: 21', 'pH mV', 'Cond µS/cm', 'Cond_muS/cm', 'ODO % CB', 'Time (HH:MM:SS)', 'SpCond µS/cm'}


In [34]:
rename_dict = {
    # Conductividad específica
    'SpCond_muS/cm': 'SpCond µS/cm',
    'nLF_Cond_muS/cm': 'nLF Cond µS/cm',
    'Cond_muS/cm': 'Cond µS/cm',
    #'Corrected sp Cond [µS/cm]': 'SpCond_muS/cm',
   
    # Temperatura
    'Temp °C': 'Temperature_C',
    'Temp_Celcius': 'Temperature_C',

    # Profundidad
    #'Depth from GL (m)': 'Depth_m',
    #'Corrected Depth': 'Depth_m',

    # Tiempo
    'Time (HH:MM:SS)': 'Time (HH:mm:ss)',
    'Date (MM/DD/YYYY)': 'Date',


}


In [35]:
dfs=[]
for path in csv_paths:
    well_id = path.stem.split("_")[0]
    if well_id in priority_wells:
        df = pd.read_csv(path)
        df["Well"] = well_id
        df.rename(columns=rename_dict, inplace=True)
        df.drop(columns=[col for col in df.columns if col.startswith('Unnamed')], inplace=True)
        dfs.append(df)

# Combine all into a single DataFrame
if dfs:
    conductivity = pd.concat(dfs, ignore_index=True)
    
else:
    print("⚠️ No matching CSV files from priority wells were found or loaded.")

conductivity.sample(6)

Unnamed: 0,Date,Time (HH:mm:ss),Time (Fract. Sec),Site Name,Cond µS/cm,Depth m,ODO % sat,ODO mg/L,ORP mV,Pressure psi a,...,Cable Pwr V,Resistivity ohms-cm,Well,Depth m.1,Rhodamine WT RFU,Rhodamine WT ug/L,Density Sigma,Density-T Sigma-T,nLF Cond µS/cm,ODO % CB
83871,22/02/2025,13:26:25,0.0,Default Site,1166.8,5.568,54.3,4.38,394.9,7.897,...,1.1,,LRS69DR,,,,,,1141.3,54.3
39463,22/2/2025,16:25:54,0.0,Default Site,502.7,1.008,26.8,2.16,214.4,1.429,...,0.8,,BW3D,,-2.21,-22.05,-3.1,-3.1,,
4700,25/02/2025,12:20:48,0.0,Default Site,699.1,9.713,1.1,0.09,147.3,13.775,...,0.5,1430.0,AW5D,,,,,,,
61118,22/2/2025,19:26:21,0.5,Default Site,32954.0,18.708,-0.3,-0.02,-169.7,26.937,...,0.5,,BW3D,,-2.2,-22.03,12.5,12.4,,
39868,22/2/2025,16:29:16,0.5,Default Site,501.0,1.729,26.1,2.11,216.6,2.452,...,0.8,,BW3D,,-2.2,-22.04,-3.1,-3.1,,
66744,22/2/2025,20:13:14,0.5,Default Site,52095.6,23.646,-0.4,-0.03,-266.8,34.396,...,0.5,,BW3D,,-2.2,-21.97,22.8,22.7,,


### Comparison graph

In [36]:
# ---------------------
# 🔁 Standardize Depth
# ---------------------

# Caliper and Conductivity dataframes use different depth conventions
# Caliper: "Depth [m]" (negative values), Conductivity: "Vertical Position m" (positive values)
# We'll standardize both to a common column: Depth_m, with positive depth (downward)

caliper["Depth_m"] = caliper["Depth [m]"].abs()
conductivity["Depth_m"] = conductivity["Vertical Position m"]

# Create the base figure with shared Y axis
fig = make_subplots(
    rows=1, cols=2,
    shared_yaxes=True,
    horizontal_spacing=0.05,
    subplot_titles=("Conductivity (µS/cm)", "Caliper")
)

# Store visibility masks and buttons
visibility = []
buttons = []

# Loop through wells and add their data as hidden traces by default
for i, well in enumerate(sorted(set(caliper["Well"]).intersection(conductivity["Well"]))):
    cal = caliper[caliper["Well"] == well]
    cond = conductivity[conductivity["Well"] == well]

    # Add conductivity trace (col 1)
    fig.add_trace(
        go.Scatter(
            x=cond["SpCond µS/cm"],
            y=cond["Depth_m"],
            mode="markers",
            name=f"{well} - Conductivity",
            marker=dict(color="red", size=4),
            visible=(i == 0)  # Only the first well is visible initially
        ),
        row=1, col=1
    )

    # Add caliper trace (col 2)
    fig.add_trace(
        go.Scatter(
            x=cal["calibrated_cm"],
            y=cal["Depth_m"],
            mode="markers",
            name=f"{well} - Caliper",
            marker=dict(color="blue", size=4),
            visible=(i == 0)
        ),
        row=1, col=2
    )

    # Build visibility array for this well (2 traces per well)
    vis = [False] * (2 * i) + [True, True] + [False] * (2 * (len(set(caliper["Well"])) - i - 1))
    visibility.append(vis)

    # Add button for this well
    buttons.append(dict(
        label=well,
        method="update",
        args=[{"visible": vis},
              {"title": f"{well} – Caliper and Conductivity (Scatter View)"}]
    ))

# Add interactive menu to the figure
fig.update_layout(
    updatemenus=[dict(
        type="dropdown",
        direction="down",
        buttons=buttons,
        x=0.5,
        xanchor="center",
        y=1.15,
        yanchor="top"
    )],
    title="Caliper and Conductivity Logs",
    height=600,
    yaxis=dict(title="Depth (m)", autorange="reversed"),
    showlegend=False,
    margin=dict(t=100, l=60, r=60, b=60)
)

# Set axis titles
fig.update_xaxes(title_text="Conductivity (µS/cm)", row=1, col=1)
fig.update_xaxes(title_text="Caliper", row=1, col=2)

# Export to HTML
fig.write_html("caliper_conductivity_logs_interactive.html")
