# 1. Load & Concatenate

In [None]:
import pandas as pd
import glob
from pathlib import Path

files = glob.glob('*_joined.csv')  

# 2. Read and tag each
df_list = []
for fp in files:
    well = Path(fp).stem.replace('_joined','')
    df = pd.read_csv(fp, index_col=0)  # assuming index_col=0 holds the match index or depth
    df['Well'] = well
    df_list.append(df)

# 3. Concatenate
all_wells = pd.concat(df_list, axis=0)
all_wells.reset_index(drop=True, inplace=True)

print(all_wells['Well'].value_counts())


# 2. Cross-Well Summary Statistics

In [None]:
# Choose a few lab and log variables common to all wells
lab_vars = [c for c in all_wells.columns if c.startswith('Lab_')]
log_vars = [c for c in all_wells.columns if c.startswith('Log_')]

# 1. Per-well means & std
summary = all_wells.groupby('Well')[lab_vars + log_vars] \
                  .agg(['mean','std','count']) \
                  .round(3)
summary


# 3. Compare Distributions (Facet Grids)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Example: compare distribution of one Log measure across wells
sns.displot(
    data=all_wells,
    x='Log_GR',
    col='Well',
    kind='kde',
    height=4,
    aspect=1
)
plt.suptitle('Log_GR Distribution by Well', y=1.02)
plt.show()

# Or for multiple labs in one grid:
sns.catplot(
    data=all_wells.melt(id_vars='Well', value_vars=lab_vars[:4]),
    x='Well', y='value', col='variable',
    kind='box', sharey=False,
    height=4, aspect=0.8
)
plt.show()


# 4. Cross-Well Correlation Comparison

In [None]:
import numpy as np

# Build a dict of corr DataFrames
corrs = {
    well: df[log_vars + lab_vars].corr().loc[log_vars, lab_vars]
    for well, df in all_wells.groupby('Well')
}

# Plot them in a grid
n = len(corrs)
fig, axes = plt.subplots(1, n, figsize=(6*n, 6))
for ax, (well, cm) in zip(axes, corrs.items()):
    sns.heatmap(cm, ax=ax, vmin=-1, vmax=1, cmap='RdYlGn',
                cbar=(ax is axes[-1]),  # show colorbar only on last
                square=True)
    ax.set_title(f'{well} Correlation')
plt.tight_layout()
plt.show()

# 5. Advanced: Cluster Wells by Pattern

In [None]:
from sklearn.cluster import AgglomerativeClustering
from sklearn.preprocessing import StandardScaler

# 1. Flatten each corr into a vector
vecs = []
wells = []
for well, cm in corrs.items():
    v = cm.values.flatten()
    vecs.append(v)
    wells.append(well)

# 2. Scale & cluster
X = StandardScaler().fit_transform(np.vstack(vecs))
cl = AgglomerativeClustering(n_clusters=2).fit(X)
cluster_df = pd.DataFrame({'Well': wells, 'Cluster': cl.labels_})
print(cluster_df)