In [None]:
# --- Set up directories ---

import os

# === Define input and output folders ===

folder_balldrop = r'C:\...\balldrop' #update the path to 'balldrop' folder
folder_AE = r'C:\...\AE' #update the path to 'AE' folder
folder_processed = r'C:\...\output' #update the path to 'output' folder
folder_mecha = r'C:\...\mecha' #update the path to 'mecha' folder

# ========================================

# Verify that input folders exist
for folder in [folder_balldrop, folder_AE]:
    if not os.path.exists(folder):
        raise FileNotFoundError(f"Input folder not found: {folder}")

# Create the output folder if it doesn't exist
os.makedirs(folder_processed, exist_ok=True)

# Confirm setup
print("Folder set-up completed.")


In [None]:
# --- Set up ball drop experiment properties ---

import math
import numpy as np

# === Input properties ===

density2 = 2650        # Density (kg/m³) of rock sample
E2 = 6.2e10            # Young's modulus (Pa)
nu2 = 0.3              # Poisson's ratio
d = 25                 # Sample diameter (mm)
cut_deg = 30           # Fault cut angle (degrees)
kappa = 2.34           # Constant to calculate source radius (Brune model)

# ========================

# Calculate S-wave velocity (v_s)
v_s = np.sqrt(E2 / (2 * density2 * (1 + nu2)))

# Calculate P-wave velocity (v_p)
v_p = np.sqrt((E2 * (1 - nu2)) / (density2 * (1 + nu2) * (1 - 2 * nu2)))

# Calculate shear modulus
shear_modulus = density2 * (v_s ** 2)

# Calculate fault area (converted to m², then mm² for display)
fault_area = math.pi * (d / 2) * ((d / math.sin(math.radians(cut_deg))) / 2) * 1e-6  # in m²
fault_area_mm = fault_area * 1e6  # in mm²

# Display calculated parameters
print(f"S-wave velocity (v_s): {v_s:.2f} m/s")
print(f"P-wave velocity (v_p): {v_p:.2f} m/s")
print(f"Brune Constant (K): {kappa:.2f}")
print(f"Shear modulus (G): {shear_modulus:.2f} Pa")
print(f"Fault Area: {fault_area_mm:.2f} mm²")


In [None]:
# --- Compute other AE Parameters and Generate Histograms ---

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

# === File names ===

occtime_file_name = 'OccTime_SNR_PAmp_data.csv'
fitted_params_file_name = 'fitted_brune_parameters.csv'
moment_magnitude_file_name = 'moment_magnitude_results.csv'
SNR_file_name = 'SNR_results.csv'
output_file_name = 'waveform_parameters_export.csv'

# ====================

# File paths
occtime_file = os.path.join(folder_mecha, occtime_file_name)
fitted_params_file = os.path.join(folder_processed, fitted_params_file_name)
moment_magnitude_file = os.path.join(folder_processed, moment_magnitude_file_name)
SNR_file = os.path.join(folder_processed, SNR_file_name)
output_file_path = os.path.join(folder_processed, output_file_name)

# Load datasets
occ_time_df = pd.read_csv(occtime_file)
fitted_params_df = pd.read_csv(fitted_params_file)
moment_magnitude_df = pd.read_csv(moment_magnitude_file)
SNR_df = pd.read_csv(SNR_file)

# Merge datasets on 'waveform_num'
merged_df = pd.merge(occ_time_df, fitted_params_df, on="waveform_num", how="inner")
merged_df = pd.merge(merged_df, moment_magnitude_df, on="waveform_num", how="inner")
merged_df = pd.merge(merged_df, SNR_df, on="waveform_num", how="inner")
merged_df = merged_df.dropna()

# Compute derived parameters
merged_df["stress_drop"] = (7 / 16) * merged_df["mean_M0"] * (merged_df["source_radius"] ** -3)
merged_df["radiated_energy"] = 1.6e-5 * merged_df["mean_M0"]
merged_df["V_a"] = (merged_df["mean_M0"] ** 2) / (2 * shear_modulus * merged_df["radiated_energy"])
merged_df["source_radius_alt"] = ((3 / (4 * np.pi)) * merged_df["V_a"]) ** (1 / 3)
merged_df["seismic_moment_alt"] = 2.4766946983546622e-05 * merged_df["PAmp"]
merged_df["source_radius_mm"] = merged_df["source_radius"] * 1e3
merged_df["source_area"] = np.pi * merged_df["source_radius"] ** 2
merged_df["source_area_ratio"] = merged_df["source_area"] / fault_area

# Select columns for export
export_df = merged_df[[
    'waveform_num', 'OccTime', 'snr_spec', 'snr_spec_norm',
    'fitted_f0', 'source_radius', 'source_area', 'source_area_ratio',
    'mean_M0', 'moment_magnitude', 'stress_drop', 'radiated_energy',
    'V_a', 'source_radius_alt', 'seismic_moment_alt'
]]

# Save to CSV
export_df.to_csv(output_file_path, index=False)
print(f"Exported CSV file saved to: {output_file_path}")
print("\nCSV Header Preview:")
print(export_df.head())

# Plot Histograms

# 1. Source Radius
plt.figure(figsize=(7, 4))
plt.hist(export_df["source_radius"], bins=50, color="blue", alpha=0.7, edgecolor="black")
plt.xlabel("Source Radius (m)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Source Radius (From Corner Frequency)", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.xlim(0, np.percentile(export_df["source_radius"], 100))
plt.show()

# 2. Moment Magnitude
plt.figure(figsize=(7, 4))
plt.hist(export_df["moment_magnitude"], bins=50, color="green", alpha=0.7, edgecolor="black")
plt.xlabel("Moment Magnitude (Mw)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Moment Magnitude", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.xlim(export_df["moment_magnitude"].min(), export_df["moment_magnitude"].max())
plt.show()

# 3. Seismic Moment (M0)
plt.figure(figsize=(7, 4))
M0_data = export_df[export_df["mean_M0"] > 0]["mean_M0"]
bins_M0 = np.logspace(np.log10(M0_data.min()), np.log10(M0_data.max()), 50)
plt.hist(M0_data, bins=bins_M0, color="red", alpha=0.7, edgecolor="black")
plt.xscale("log")
plt.xlabel("Seismic Moment (M0) [Nm]", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Seismic Moment (M0)", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

# 4. Stress Drop
plt.figure(figsize=(7, 4))
plt.hist(export_df["stress_drop"], bins=200, color="purple", alpha=0.7, edgecolor="black")
plt.xlabel("Stress Drop (Pa)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Stress Drop", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.xlim(0, np.percentile(export_df["stress_drop"], 99))
plt.show()

# 5. Radiated Energy
plt.figure(figsize=(7, 4))
plt.hist(export_df["radiated_energy"], bins=200, color="orange", alpha=0.7, edgecolor="black")
plt.xlabel("Radiated Energy (J)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Radiated Energy", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.xlim(0, np.percentile(export_df["radiated_energy"], 99))
plt.show()

# 6. Source Radius (Alt)
plt.figure(figsize=(7, 4))
plt.hist(export_df["source_radius_alt"], bins=100, color="orange", alpha=0.7, edgecolor="black")
plt.xlabel("Source Radius (m)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Source Radius (From Moment Magnitude)", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()

# 7. Seismic Moment (Alt)
plt.figure(figsize=(7, 4))
M0_alt_data = export_df[export_df["seismic_moment_alt"] > 0]["seismic_moment_alt"]
bins_alt = np.logspace(np.log10(M0_alt_data.min()), np.log10(M0_alt_data.max()), 50)
plt.hist(M0_alt_data, bins=bins_alt, color="red", alpha=0.7, edgecolor="black")
plt.xscale("log")
plt.xlabel("Seismic Moment Alt (M0)", fontsize=12)
plt.ylabel("Frequency", fontsize=12)
plt.title("Histogram of Seismic Moment Alt (M0)", fontsize=14)
plt.grid(True, linestyle="--", alpha=0.5)
plt.show()


In [None]:
# --- AE Data Check and Visualization ---

import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# === File names and paths ===

waveform_params_file = os.path.join(folder_processed, 'waveform_parameters_export.csv')
flow_rate_file = os.path.join(folder_mecha, 'PL02_mechanical_filtered.csv')

# ============================

# Load datasets
df_flow = pd.read_csv(flow_rate_file)
df_waveform = pd.read_csv(waveform_params_file)

# Filter AE events in the specified time range
df_waveform = df_waveform[(df_waveform['OccTime'] >= 5833) & (df_waveform['OccTime'] <= 7600)]

# Set up figure and subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(7, 8), sharex=True)

# Plot 1: Pressure and Flow Rate
p_line, = ax1.plot(df_flow['t_relative_s'], df_flow['p_up'] / 1e3, label='Upstream Pressure', color='blue')
ax1.set_ylabel('Pressure (MPa)', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')
ax1.set_ylim(3.5, 10.5)

ax1_2 = ax1.twinx()
f_line, = ax1_2.plot(df_flow['t_relative_s'], df_flow['q_b'], label='Flow Rate', color='green')
ax1_2.set_ylabel('Flow Rate (m³/min)', color='green')
ax1_2.tick_params(axis='y', labelcolor='green')
ax1_2.set_ylim(-0.05, 0.75)

ax1.set_title('Pressure and Flow Rate over Time')
ax1.set_xlim(5800, 7600)
ax1.grid(True)

# Legend
lines1, labels1 = ax1.get_legend_handles_labels()
lines1_2, labels1_2 = ax1_2.get_legend_handles_labels()
ax1.legend(lines1 + lines1_2, labels1 + labels1_2, loc='upper left')

# Plot 2: Displacement, Flow Rate, and Moment Magnitude
d_line, = ax2.plot(df_flow['t_relative_s'], df_flow['d_shear'] * 1000, label='Displacement', color='blue')
ax2.set_ylabel('Cum. Displacement (mm)', color='blue')
ax2.tick_params(axis='y', labelcolor='blue')
ax2.set_ylim(7.8, 8.5)

ax2_2 = ax2.twinx()
f_line2, = ax2_2.plot(df_flow['t_relative_s'], df_flow['q_b'], label='Flow Rate', linestyle='--', color='green')
ax2_2.set_ylabel('Flow Rate (m³/min)', color='green')
ax2_2.tick_params(axis='y', labelcolor='green')
ax2_2.set_ylim(-0.05, 0.75)

ax2_3 = ax2.twinx()
ax2_3.spines["right"].set_position(("outward", 60))
m_scatter = ax2_3.scatter(df_waveform['OccTime'], df_waveform['moment_magnitude'],
                          label='AE Events', color='red', s=4, alpha=0.3, zorder=5)
ax2_3.set_ylabel('Moment Magnitude', color='red')
ax2_3.tick_params(axis='y', labelcolor='red')
ax2_3.set_ylim(-7, -5)

ax2.set_title('Displacement, Flow Rate, and Moment Magnitude')
ax2.set_xlabel('Time (s)')
ax2.grid(True)

# Legend
lines2, labels2 = ax2.get_legend_handles_labels()
lines2_2, labels2_2 = ax2_2.get_legend_handles_labels()
lines2_3, labels2_3 = ax2_3.get_legend_handles_labels()
ax2.legend(lines2 + lines2_2 + lines2_3, labels2 + labels2_2 + labels2_3, loc='upper left')

# Plot 3: Histogram of AE Events
bins = np.arange(5833, 7660, 60)
ax3.hist(df_waveform['OccTime'], bins=bins, color='gray', edgecolor='black')
ax3.set_xlabel('Time (s)')
ax3.set_ylabel('Number of AE Events (log)')
ax3.set_yscale('log')
ax3.set_title('Histogram of AE Events over Time')
ax3.grid(True)

# Final layout
plt.tight_layout()
plt.show()


In [None]:
# --- Mechanical Data Check and Visualization ---

import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import find_peaks

# === Load Input Files ===

waveform_params_file = os.path.join(folder_processed, 'waveform_parameters_export.csv')
flow_rate_file = os.path.join(folder_mecha, 'PL02_mechanical_filtered.csv')
perm_file = os.path.join(folder_mecha, 'pl02_perm.csv')

# ==============================

df1 = pd.read_csv(flow_rate_file)
df2 = pd.read_csv(waveform_params_file)
perm_df = pd.read_csv(perm_file)

# Filter and Preprocess Data
df2 = df2[(df2['OccTime'] >= 5833) & (df2['OccTime'] <= 7600)]
df1 = df1.drop_duplicates(subset='t_relative_s')  # Remove duplicate times
velocity = np.gradient(df1['d_shear'], df1['t_relative_s'])  # Slip velocity in m/s

# Normalize displacement to start at zero at time = 5833
disp_at_5833 = df1.loc[df1["t_relative_s"] >= 5833, "d_shear"].iloc[0]
df1["d_shear_normalized"] = df1["d_shear"] - disp_at_5833

# Plot 1: Pressure, Flow Rate, Permeability
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(7, 8))
plt.subplots_adjust(hspace=0.4)

# Pressure
ax1.plot(df1['t_relative_s'], df1['p_up'] / 1e3, color='black', label='Pressure')
ax1.set_ylabel('Pressure (MPa)', color='black', fontsize=14)
ax1.tick_params(axis='y', labelcolor='black', labelsize=14)
ax1.set_ylim(3.5, 10.5)

# Flow Rate
ax1_2 = ax1.twinx()
ax1_2.plot(df1['t_relative_s'], df1['q_b'], linestyle='--', color='#1565C0', label='Flow Rate')
ax1_2.set_ylabel(r'Flow Rate (m$^3$/min)', color='#1565C0', fontsize=14)
ax1_2.tick_params(axis='y', labelcolor='#1565C0', labelsize=14)
ax1_2.set_ylim(-0.05, 0.9)

# Permeability
ax1_3 = ax1.twinx()
ax1_3.spines["right"].set_position(("outward", 60))
ax1_3.plot(perm_df["time"], perm_df["perm"], linestyle='-', color='#d95f02', label='Permeability')
ax1_3.set_ylabel('Permeability (m²)', fontsize=14, color='#d95f02')
ax1_3.tick_params(axis='y', labelcolor='#d95f02', labelsize=14)

ax1.set_xlim(5800, 7600)
ax1.grid(True)
# ax1.set_title('Pressure, Flow Rate, and Permeability Over Time')

# Plot 2: Displacement and Slip Velocity

# Displacement
ax2.plot(df1['t_relative_s'], df1['d_shear_normalized'] * 1000, color='black', label='Displacement')
ax2.set_ylabel('Displacement (mm)', color='black', fontsize=14)
ax2.tick_params(axis='y', labelcolor='black', labelsize=14)
ax2.set_ylim(-0.02, 0.4)

# Slip Velocity
ax2_2 = ax2.twinx()
ax2_2.plot(df1['t_relative_s'], velocity * 1000, linestyle='--', color='#1565C0', label='Slip Velocity')
ax2_2.set_ylabel('Slip Velocity (mm/s)', color='#1565C0', fontsize=14)
ax2_2.tick_params(axis='y', labelcolor='#1565C0', labelsize=14)

ax2.set_xlim(5800, 7600)
ax2.set_xlabel('Time (s)', fontsize=14)
ax2.grid(True)
# ax2.set_title('Displacement and Slip Velocity')

# Detect Flow Rate Peaks
df1_filtered = df1[(df1['t_relative_s'] >= 5800) & (df1['t_relative_s'] <= 7600)]
peaks_indices, _ = find_peaks(df1_filtered['q_b'], height=0.1)

peak_times = df1_filtered['t_relative_s'].iloc[peaks_indices].values
peak_values = df1_filtered['q_b'].iloc[peaks_indices].values

# Print peak summary
print("Detected Flow Rate Peaks (within 5800–7600 s):")
for i, (t, q) in enumerate(zip(peak_times, peak_values), 1):
    print(f"Peak {i}: Time = {t:.2f} s, Flow Rate = {q:.3f} m³/min")


In [None]:
# --- Source Radius Comparison: f₀ vs. M₀ approach ---

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os

# === Load waveform parameter data ===

waveform_file = os.path.join(folder_processed, "waveform_parameters_export.csv")
waveform_df = pd.read_csv(waveform_file)

# ====================================

# Filter: keep only valid entries (positive radii and SNR)
plot_df = waveform_df[
    (waveform_df["source_radius"] > 0) &
    (waveform_df["source_radius_alt"] > 0) &
    (waveform_df["snr_spec"] > 0)
]

# Convert radius values to millimeters
x_vals_mm = plot_df["source_radius"] * 1000
y_vals_mm = plot_df["source_radius_alt"] * 1000

# Create Comparison Scatter Plot
plt.figure(figsize=(5, 4))
scatter = plt.scatter(
    x_vals_mm,
    y_vals_mm,
    c=plot_df["snr_spec"],
    cmap="rainbow",
    alpha=0.5,
    edgecolor="k",
    s=40
)

# 1:1 reference line
max_val_mm = max(x_vals_mm.max(), y_vals_mm.max())
plt.plot([0, max_val_mm], [0, max_val_mm], "r--", linewidth=1.5, label="1:1 Line")

# Labels, grid, and colorbar
plt.xlabel(r"Source Radius (from f$_0$) [mm]", fontsize=12)
plt.ylabel(r"Source Radius (from M$_0$) [mm]", fontsize=12)
plt.grid(True, linestyle="--", alpha=0.4)

cbar = plt.colorbar(scatter)
cbar.set_label("Signal-Noise Ratio", fontsize=12)

plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# --- Seismic Moment vs. Corner Frequency & Comparison to other studies ---

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from mpl_toolkits.axes_grid1 import make_axes_locatable


# === Load Primary Dataset ===

compiled_df = pd.read_csv(os.path.join(folder_processed, "waveform_parameters_export.csv"))
required_columns = ["mean_M0", "fitted_f0"]
if not all(col in compiled_df.columns for col in required_columns):
    raise ValueError(f"Missing required columns in compiled_df: {required_columns}")

# Load Reference Datasets 
data2 = pd.read_csv(os.path.join(folder_mecha, "goodfellow young 2014.csv"))
data3 = pd.read_csv(os.path.join(folder_mecha, "mclaskey 2014.csv"))
data4 = pd.read_csv(os.path.join(folder_mecha, "gibowicz 1991.csv"))
data5 = pd.read_csv(os.path.join(folder_mecha, "collins 2000.csv"))
data6 = pd.read_csv(os.path.join(folder_mecha, "imanishi 2006.csv"))

# ============================

# Check for required columns
for d, name in zip([data2, data3, data4, data5, data6],
                   ["goodfellow young 2014", "mclaskey 2014", "gibowicz 1991", "collins 2000", "imanishi 2006"]):
    if not {"mean_M0", "fitted_f0"}.issubset(d.columns):
        raise ValueError(f"Missing required columns in {name}")

# Define Constants and Conversions 
def frequency_to_radius(f0, kappa, v_s):  
    return kappa * v_s / (2 * np.pi * f0)

def radius_to_frequency(r, kappa, v_s):
    return kappa * v_s / (2 * np.pi * r)

# Plot Setup 
fig, ax = plt.subplots(figsize=(7, 6))

# Plot This Study
ax.scatter(compiled_df["mean_M0"], compiled_df["fitted_f0"],
           label='This study', marker='o', color='blue', alpha=0.5, edgecolor='k', s=25)

# Reference Datasets
ax.scatter(data2["mean_M0"], data2["fitted_f0"],
           label='Goodfellow & Young, 2014', marker='s', color='red', alpha=0.8, edgecolor='k', s=50)
ax.scatter(data3["mean_M0"], data3["fitted_f0"],
           label='McLaskey et al., 2014', marker='^', color='green', alpha=0.8, edgecolor='k', s=50)
ax.scatter(data4["mean_M0"], data4["fitted_f0"],
           label='Gibowicz et al., 1991', marker='D', color='purple', alpha=0.8, edgecolor='k', s=50)
ax.scatter(data5["mean_M0"], data5["fitted_f0"],
           label='Collins & Young, 2000', marker='v', color='orange', alpha=0.8, edgecolor='k', s=50)
ax.scatter(data6["mean_M0"], data6["fitted_f0"],
           label='Imanishi & Ellsworth, 2006', marker='p', color='brown', alpha=0.8, edgecolor='k', s=50)

# Secondary Y-axis: Source Radius 
secax_y = ax.secondary_yaxis("right", 
    functions=(lambda f0: frequency_to_radius(f0, kappa, v_s), 
               lambda r: radius_to_frequency(r, kappa, v_s)))
secax_y.set_ylabel("Source Radius (m)", fontsize=12)

# Constant Stress Drop Lines 
def M0_from_stress_drop(stress_drop, f_c, kappa, v_s):
    return (16.0/7.0) * stress_drop * (kappa * v_s / (2.0*np.pi))**3 / (f_c**3)

stress_drop_values = [1e5, 1e6, 1e7, 1e8]  # in Pa
f_c_range = np.logspace(1, 6, 100)

for sd in stress_drop_values:
    M0_vals = M0_from_stress_drop(sd, f_c_range, kappa, v_s)
    ax.plot(M0_vals, f_c_range, linestyle='--', color='grey', label='_nolegend_')

# Final Plot Formatting 
ax.set_xlabel("Seismic Moment (Nm)", fontsize=12)
ax.set_ylabel("Corner Frequency (Hz)", fontsize=12)
ax.set_xscale("log")
ax.set_yscale("log")
ax.set_title("Comparison to Other Studies", fontsize=14)
ax.grid(True, linestyle="--", alpha=0.5)
ax.legend(fontsize=10)

plt.tight_layout()
plt.show()


In [None]:
# --- Source Radius and Pressure Over Time (Colored by SNR) ---

import pandas as pd
import matplotlib.pyplot as plt
import os

# === Define Input Files ===

waveform_params_file_name = 'waveform_parameters_export.csv' 
mecha_file_name = 'PL02_mechanical_filtered.csv'

# =========================

# Construct File Paths 
waveform_params_file = os.path.join(folder_processed, waveform_params_file_name)
mecha_file = os.path.join(folder_mecha, mecha_file_name)

# Load Data 
waveform_df = pd.read_csv(waveform_params_file)
mecha_df = pd.read_csv(mecha_file)

# Check Required Columns 
required_columns = ["OccTime", "waveform_num", "source_radius", "snr_spec"]
if not all(col in waveform_df.columns for col in required_columns):
    raise ValueError(f"Missing required columns in waveform data: {required_columns}")

#  Sort by Occurrence Time 
waveform_df = waveform_df.sort_values(by="OccTime")

#  Time Filter 
start_time = 5825  
end_time = 7600    

filtered_waveform_df = waveform_df[(waveform_df["OccTime"] >= start_time) & (waveform_df["OccTime"] <= end_time)]
filtered_mecha_df = mecha_df[(mecha_df["t_relative_s"] >= start_time) & (mecha_df["t_relative_s"] <= end_time)]

if filtered_waveform_df.empty or filtered_mecha_df.empty:
    raise ValueError("No data available in the selected time range.")

#  Plot Setup 
fig, ax1 = plt.subplots(figsize=(6, 4))

#  Plot Source Radius (colored by SNR) 
sc = ax1.scatter(
    filtered_waveform_df["OccTime"],
    filtered_waveform_df["source_radius"] * 1000,  # Convert to mm
    c=filtered_waveform_df["snr_spec"],
    cmap='rainbow',
    s=12,
    alpha=0.5,
    edgecolors='black',
    linewidths=0.3,
    label="Source Radius"
)

ax1.set_xlabel("Time (s)", fontsize=12)
ax1.set_ylabel("Source Radius (mm)", fontsize=12, color="black")
ax1.tick_params(axis="y", labelcolor="black")
ax1.grid(True)

#  Plot Pressure (secondary y-axis) 
ax2 = ax1.twinx()
ax2.plot(filtered_mecha_df["t_relative_s"], filtered_mecha_df["p_up"], 
         label="Pressure (p_up)", color="grey", linestyle="--")
ax2.set_ylabel("Pressure (MPa)", fontsize=12, color="grey")
ax2.tick_params(axis="y", labelcolor="grey")

#  Add Colorbar 
cbar_ax = fig.add_axes([1.05, 0.15, 0.02, 0.7])
cbar = plt.colorbar(sc, cax=cbar_ax)
cbar.set_label("Signal-Noise Ratio")

ax1.set_title("Source Radius and Pressure Over Time", fontsize=14)
ax2.legend(loc="upper left")

plt.subplots_adjust(right=0.9)
plt.show()


In [None]:
# --- Binned Source Area Statistics (3-second intervals) ---

import pandas as pd
import matplotlib.pyplot as plt
import os

# === File Path === 

waveform_params_file_name = 'waveform_parameters_export.csv' 
waveform_params_file = os.path.join(folder_processed, waveform_params_file_name)

# =================

# Load Data 
waveform_df = pd.read_csv(waveform_params_file)

# Check Required Columns 
required_columns = ["OccTime", "waveform_num", "source_area"]
if not all(col in waveform_df.columns for col in required_columns):
    raise ValueError(f"Missing required columns in waveform data: {required_columns}")

# Sort and Filter by Time 
waveform_df = waveform_df.sort_values(by="OccTime")
start_time = 5825  
end_time = 7600  
start_bin_time = 5833

filtered_waveform_df = waveform_df[
    (waveform_df["OccTime"] >= start_bin_time) & 
    (waveform_df["OccTime"] <= end_time)
].copy()

# Bin Data 
bin_size = 3  # seconds
filtered_waveform_df["time_bin"] = (filtered_waveform_df["OccTime"] // bin_size) * bin_size
binned_data = (
    filtered_waveform_df
    .groupby("time_bin")["source_area"]
    .agg(["min", "max", "mean", "sum", "std", "count"])
    .reset_index()
)

# Convert from m² to mm² 
binned_data["min_mm2"] = binned_data["min"] * 1e6
binned_data["max_mm2"] = binned_data["max"] * 1e6
binned_data["mean_mm2"] = binned_data["mean"] * 1e6
binned_data["sum_mm2"] = binned_data["sum"] * 1e6

# Plotting 
fig, ax = plt.subplots(figsize=(10, 4))

# Plot summed source area (bar chart)
ax.bar(
    binned_data["time_bin"] + (bin_size / 2),
    binned_data["sum_mm2"], 
    width=bin_size * 0.8,
    color="gray",
    alpha=0.6,
    label="Summed Source Area"
)

# Plot min-max range as shaded area
ax.fill_between(
    binned_data["time_bin"] + (bin_size / 2),
    binned_data["min_mm2"],
    binned_data["max_mm2"],
    color="blue",
    alpha=0.4,
    label="Min–Max Range"
)

# Plot mean source area
ax.plot(
    binned_data["time_bin"] + (bin_size / 2),
    binned_data["mean_mm2"],
    color="orange",
    linestyle="-",
    markersize=3,
    label="Mean Source Area"
)

# Final Formatting 
ax.set_ylabel("Source Area (mm²)", fontsize=12)
ax.set_xlabel("Time [s]", fontsize=12)
ax.set_yscale("log")
ax.set_xlim(start_time, end_time)
ax.grid(True, linestyle="--", alpha=0.5)

# Horizontal threshold line
ax.axhline(y=981.75, color='red', linestyle='--', linewidth=1)

# Move x-axis to top
ax.xaxis.set_ticks_position('top')
ax.xaxis.set_label_position('top')

# Title and legend
# ax.set_title(f"Source Area Statistics (per {bin_size}s)", fontsize=14)
ax.legend(loc="upper left")

plt.tight_layout()
plt.show()


In [None]:
# --- Cumulative Source Radius, Moment, and Permeability Plot ---

import pandas as pd
import matplotlib.pyplot as plt
import os
import matplotlib.ticker as mticker

# === File Paths ===

occtime_file_name = 'OccTime_SNR_PAmp_data.csv'
waveform_params_file_name = 'waveform_parameters_export.csv'
flow_rate_file_name = 'PL02_mechanical_filtered.csv'
perm_file_name = 'pl02_perm.csv'

# ==================

occtime_file = os.path.join(folder_mecha, occtime_file_name)
waveform_params_file = os.path.join(folder_processed, waveform_params_file_name)
flow_rate_file = os.path.join(folder_mecha, flow_rate_file_name)
perm_file = os.path.join(folder_mecha, perm_file_name)

# Load Data 
waveform_df = pd.read_csv(waveform_params_file)
flow_rate_df = pd.read_csv(flow_rate_file)
perm_df = pd.read_csv(perm_file)

# Verify Columns 
required_columns = ["OccTime", "waveform_num", "source_radius", "source_area_ratio"]
if not all(col in waveform_df.columns for col in required_columns):
    raise ValueError(f"Missing required columns in waveform data: {required_columns}")

waveform_df = waveform_df.sort_values(by="OccTime")

# Filter by Time 
start_time = 5825  
end_time = 7600
filtered_waveform_df = waveform_df[
    (waveform_df["OccTime"] >= start_time) & 
    (waveform_df["OccTime"] <= end_time)
].copy()

# Compute Cumulative Values 
filtered_waveform_df["cum_source_radius"] = filtered_waveform_df["source_radius"].cumsum()

if "mean_M0" not in filtered_waveform_df.columns:
    filtered_waveform_df["mean_M0"] = filtered_waveform_df["source_area_ratio"] * 0.5  # fallback
filtered_waveform_df["cum_mean_M0"] = filtered_waveform_df["mean_M0"].cumsum()

# Create Plot
fig, ax = plt.subplots(figsize=(7, 4))

# Plot 1: Cumulative Source Radius (left y-axis) 
line_source, = ax.plot(
    filtered_waveform_df["OccTime"], 
    filtered_waveform_df["cum_source_radius"],
    label="Cumulative Slip Patch Radius (m)",
    linestyle="-", color="black"
)
ax.set_ylabel("Cumulative Slip Patch Radius (m)", fontsize=14, color="black")
ax.tick_params(axis="y", labelcolor="black", labelsize=14)

# Plot 2: Permeability (right y-axis) 
ax_perm = ax.twinx()
line_perm, = ax_perm.plot(
    perm_df["time"], perm_df["perm"],
    label="Permeability (m$^2$)",
    linestyle="-", color='#d95f02'
)
ax_perm.set_ylabel("Permeability (m$^2$)", fontsize=14, color='#d95f02')
ax_perm.tick_params(axis="y", labelcolor='#d95f02', labelsize=14)
ax_perm.ticklabel_format(axis='y', style='sci', scilimits=(-18, -18))

# Plot 3: Cumulative Moment (third y-axis) 
ax_mean = ax.twinx()
ax_mean.spines["right"].set_position(("outward", 60))
line_mean, = ax_mean.plot(
    filtered_waveform_df["OccTime"], 
    filtered_waveform_df["cum_mean_M0"],
    label="Cumulative Moment (Nm)",
    linestyle="-", color='#1565C0'
)
ax_mean.set_ylabel("Cumulative Moment (Nm)", fontsize=14, color='#1565C0')
ax_mean.tick_params(axis="y", labelcolor='#1565C0', labelsize=14)

# Common X-axis 
ax.set_xlabel("Time (s)", fontsize=14)
ax.tick_params(axis="x", labelcolor='black', labelsize=14)
ax.grid(True, linestyle="--", alpha=0.5)

# Combine Legends 
lines = [line_source, line_perm, line_mean]
labels = [line.get_label() for line in lines]
ax.legend(lines, labels, loc="upper left")

plt.tight_layout()
plt.show()


In [None]:
# --- pressure step splitting ---

import pandas as pd
import os

# === Define input and output path ===

source_file = os.path.join(folder_processed, 'waveform_parameters_export.csv')
df = pd.read_csv(source_file)

# ====================================

# OccTime boundaries
boundaries = [5833, 6133, 6433, 6733, 7033, 7333, 7600]

# Loop over step intervals
for i in range(len(boundaries) - 1):
    start = boundaries[i]
    end = boundaries[i + 1]
    
    # Filter data
    df_step = df[(df['OccTime'] >= start) & (df['OccTime'] < end if i < 5 else df['OccTime'] <= end)]
    
    # Save to file
    output_file = os.path.join(folder_processed, f'waveform_parameters_export_step_{i+1}.csv')
    df_step.to_csv(output_file, index=False)
    print(f"Saved step {i+1} with OccTime from {start} to {end}")


In [None]:
# --- Inter-event Time Analysis with Gamma Fit ---

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma, kstest

# === Load Data ===

waveform_params_file_name = 'waveform_parameters_export_step_5.csv'
waveform_params_file = os.path.join(folder_processed, waveform_params_file_name)

# =================

data = pd.read_csv(waveform_params_file)
data = data.sort_values('OccTime')
data['OccTime_s'] = data['OccTime']  # alias for clarity

# Time Filter: Analyze only early segment
start_time = data['OccTime_s'].min()
end_time = start_time + 3  # x seconds window
data = data[(data['OccTime_s'] >= start_time) & (data['OccTime_s'] <= end_time)]

# Compute Inter-event Times (τ)
data['inter_event_time'] = data['OccTime_s'].diff()
tau = data['inter_event_time'].dropna()
tau = tau[tau > 0].values  # remove invalid values

# Summary Stats
print("Start time:", start_time)
print("End time:", end_time)
print("Number of AE events analyzed:", len(data))
print("Number of inter-event times:", len(tau))

# Fit Gamma Distribution
shape, loc, scale = gamma.fit(tau, floc=0)
print("Gamma shape (γ):", shape)
print("Gamma scale (β):", scale)

# Plot: PDF Fit
plt.figure(figsize=(6, 4))
plt.hist(tau, bins=100, density=True, alpha=0.6, color='gray', edgecolor='k', label="Observed Data")
x = np.linspace(tau.min(), tau.max(), 1500)
pdf_fitted = gamma.pdf(x, shape, loc=loc, scale=scale)
plt.plot(x, pdf_fitted, lw=2, linestyle='--', color='red', label=f"Gamma Fit\n(γ={shape:.2f})")
plt.xlabel("Inter-event Time (s)")
plt.ylabel("Probability Density")
plt.xlim(0, 0.05)
plt.ylim(0, 400)
plt.title("Gamma Distribution Fit to AE Inter-event Times")
plt.legend()
plt.tight_layout()
plt.show()



In [None]:
# --- Multiple Gamma Plot across AE Steps ---

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma

# === Define step file names ===

step_files = [f"waveform_parameters_export_step_{i}.csv" for i in range(1, 7)]
colors = ['r', 'g', 'b', 'purple', 'orange', 'cyan']  # Colors for each step

# ==============================

# Create a single figure for all gamma distribution fits
plt.figure(figsize=(6, 4))

for i, file_name in enumerate(step_files):
    file_path = os.path.join(folder_processed, file_name)

    # Load and sort data by OccTime
    data = pd.read_csv(file_path)
    data = data.sort_values('OccTime')
    data['OccTime_s'] = data['OccTime']  # Add alias for clarity

    # Keep only the first 3 seconds of data
    start_time = data['OccTime_s'].min()
    end_time = start_time + 3
    data = data[(data['OccTime_s'] >= start_time) & (data['OccTime_s'] <= end_time)]

    # Compute inter-event times (Δt)
    data['inter_event_time'] = data['OccTime_s'].diff()
    tau = data['inter_event_time'].dropna().values
    tau = tau[tau > 0]  # Remove non-positive values

    # Skip empty results
    if len(tau) == 0:
        print(f"{file_name}: No valid inter-event data.")
        continue

    # Fit a gamma distribution (forcing loc = 0)
    shape, loc, scale = gamma.fit(tau, floc=0)

    # Plot fitted PDF
    x = np.linspace(0, 0.1, 1000)
    pdf_fitted = gamma.pdf(x, shape, loc=loc, scale=scale)
    plt.plot(x, pdf_fitted, linestyle='--', lw=1,
             color=colors[i % len(colors)],
             label=f"Step {i+1} (γ={shape:.3f})")

    # Print summary
    print(f"{file_name} → Events: {len(data)}, Mean Δt: {np.mean(tau) * 1000:.2f} ms")

# Final plot formatting
plt.xlabel("Inter-event time (seconds)")
plt.ylabel("Probability Density")
plt.title("Gamma Fits Across AE Steps")
plt.xlim(0, 0.02)
plt.ylim(0, 400)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# --- bootstaping and plotting gamma vs permeability changes per step ---

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gamma
from sklearn.model_selection import train_test_split

# === Step Setup ===

step_files = [f"waveform_parameters_export_step_{i}.csv" for i in range(1, 7)]
n_trials = 100 # update if needed
time_range = 3 #set "3" to consider only events during growth time, set "300" to consider all events 
step_results = []

# ===================

for i, file_name in enumerate(step_files):
    file_path = os.path.join(folder_processed, file_name)
    data = pd.read_csv(file_path)

    data = data.sort_values('OccTime')
    data['OccTime_s'] = data['OccTime']

    start_time = data['OccTime_s'].min()
    end_time = start_time + time_range
    data = data[(data['OccTime_s'] >= start_time) & (data['OccTime_s'] <= end_time)]

    data['inter_event_time'] = data['OccTime_s'].diff()
    tau = data['inter_event_time'].dropna().values
    tau = tau[tau > 0]

    if len(tau) < 10:
        continue

    shapes = []
    for trial in range(n_trials):
        tau_train, tau_test = train_test_split(tau, test_size=0.2)
        shape, loc, scale = gamma.fit(tau_train, floc=0)
        shapes.append(shape)

    step_results.append({
        "step": i + 1,
        "gamma_shape_mean": np.mean(shapes),
        "gamma_shape_std": np.std(shapes),
        "gamma_shape_min": np.min(shapes),
        "gamma_shape_max": np.max(shapes)
    })

# Plot Gamma vs Permeability Change 
permeability = np.array([1.98E-19, 2.55E-19, 3.11E-19, 3.29E-19, 1.02E-18, 1.20E-18])
delta_k = np.diff(permeability)  # For steps 2–6

gamma_mean = np.array([step_results[i]["gamma_shape_mean"] for i in range(1, 6)])
gamma_min = np.array([step_results[i]["gamma_shape_min"] for i in range(1, 6)])
gamma_max = np.array([step_results[i]["gamma_shape_max"] for i in range(1, 6)])

gamma_err_lower = gamma_mean - gamma_min
gamma_err_upper = gamma_max - gamma_mean

# Assign colors to each pressure step
colors = ['red', 'orange', 'green', 'blue', 'purple']
step_labels = [f"Step {i}" for i in range(2, 7)]

plt.figure(figsize=(6, 4))

# Plot each point with its own color and label
for i in range(5):
    plt.errorbar(delta_k[i], gamma_mean[i],
                 yerr=[[gamma_err_lower[i]], [gamma_err_upper[i]]],
                 fmt='o', color=colors[i], capsize=5, label=step_labels[i])

plt.xscale('log')
plt.xlabel("Δk [m²] (Absolute Permeability Change, log scale)")
plt.ylabel("Gamma Shape Parameter (γ)")
plt.grid(True, linestyle='--', alpha=0.6)
plt.legend(title="Pressure Step")
plt.tight_layout()
plt.show()


In [None]:
# --- plotting seismic moment vs permeability changes ---

import matplotlib.pyplot as plt
import numpy as np

# === Original data ===

permeability = np.array([1.98E-19, 2.55E-19, 3.11E-19, 3.29E-19, 1.02E-18, 1.20E-18])
cumulative_m0_full = np.array([125.969986, 177.8429989, 142.6452355, 85.66775314, 296.7030765, 137.6861371])
cumulative_m0_growth = np.array([58.43467805, 110.495434, 104.7007122, 46.38268656, 195.5031405, 97.91110084])

# =====================

# Compute Δk = change in permeability between steps
delta_k = np.diff(permeability)  # 5 values (step 2–6)

# Corresponding cumulative M0 values for steps 2–6
m0_full_delta = cumulative_m0_full[1:]
m0_growth_delta = cumulative_m0_growth[1:]

# Scatter plot
plt.figure(figsize=(5, 4))
plt.scatter(delta_k, m0_full_delta, color='white', edgecolor='black', s=80, label='Full time')
plt.scatter(delta_k, m0_growth_delta, color='gray', marker='^', edgecolor='black', s=80, label='Growth time')

# Log scale for x-axis
plt.xscale('log')

# Labels and title
plt.xlabel('Permeability Change Δk [m²]', fontsize=12)
plt.ylabel('Cumulative $M_0$ [Nm]', fontsize=12)
#plt.title('Cumulative $M_0$ vs Δk (Permeability Change)', fontsize=14)
plt.grid(True, which='both', linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
#---plotting source radius vs permeability changes---

import matplotlib.pyplot as plt
import numpy as np

# === Original data ===

permeability = np.array([1.98E-19, 2.55E-19, 3.11E-19, 3.29E-19, 1.02E-18, 1.20E-18])
cumulative_r0_full = np.array([1.56515991, 1.676792692, 1.14713536, 0.75052032, 3.821860258, 1.515661631])
cumulative_r0_growth = np.array([0.798898016, 0.93133956, 0.824383528, 0.388764009, 2.536861027, 1.098200382])

# =====================

# Compute Δk = change in permeability between steps
delta_k = np.diff(permeability)  # 5 values (step 2–6)

# Corresponding cumulative M0 values for steps 2–6
r0_full_delta = cumulative_r0_full[1:]
r0_growth_delta = cumulative_r0_growth[1:]

# Scatter plot
plt.figure(figsize=(5, 4))
plt.scatter(delta_k, r0_full_delta, color='white', edgecolor='black', s=80, label='Full time')
plt.scatter(delta_k, r0_growth_delta, color='gray', marker='^', edgecolor='black', s=80, label='Growth time')

# Log scale for x-axis
plt.xscale('log')

# Labels and title
plt.xlabel('Permeability Change Δk [m²]', fontsize=12)
plt.ylabel('Cumulative $r_0$ [Nm]', fontsize=12)
#plt.title('Cumulative $M_0$ vs Δk (Permeability Change)', fontsize=14)
plt.grid(True, which='both', linestyle='--', alpha=0.6)
plt.legend()
plt.tight_layout()
plt.show()


In [None]:
# --- plotting moment, source radius, and permeability changes ---

import matplotlib.pyplot as plt
import numpy as np

# === Data input ===

# Cumulative source radius [m]
r0_full = np.array([1.56515991, 1.676792692, 1.14713536, 0.75052032, 3.821860258, 1.515661631])
r0_growth = np.array([0.798898016, 0.93133956, 0.824383528, 0.388764009, 2.536861027, 1.098200382])

# Cumulative seismic moment [Nm]
m0_full = np.array([125.969986, 177.8429989, 142.6452355, 85.66775314, 296.7030765, 137.6861371])
m0_growth = np.array([58.43467805, 110.495434, 104.7007122, 46.38268656, 195.5031405, 97.91110084])

# Permeability values [m2]
perm = np.array([1.98E-19, 2.55E-19, 3.11E-19, 3.29E-19, 1.02E-18, 1.20E-18])
delta_k = np.diff(perm)  # 5 values

# ====================================

# Data for steps 2–6
r0_full_delta = r0_full[1:]
r0_growth_delta = r0_growth[1:]
m0_full_delta = m0_full[1:]
m0_growth_delta = m0_growth[1:]

# Combined plot
plt.figure(figsize=(5, 4))

# Full time: circles
sc1 = plt.scatter(r0_full_delta, m0_full_delta, c=delta_k, cmap='rainbow', marker='o',
                  s=80, edgecolor='black', label='Full time')

# Growth time: squares
sc2 = plt.scatter(r0_growth_delta, m0_growth_delta, c=delta_k, cmap='viridis', marker='^',
                  s=80, edgecolor='black', label='Growth time')

# Colorbar (shared)
cbar = plt.colorbar(sc1)
cbar.set_label('Δk [m²]', fontsize=12)

# Labels and formatting
plt.xlabel('Cumulative $r_0$ [m]', fontsize=12)
plt.ylabel('Cumulative $M_0$ [Nm]', fontsize=12)
#plt.title('$M_0$ vs $r_0$ with Color = Δk (Permeability Change)', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()
plt.tight_layout()
plt.show()
