# The Fundamentals of Generative AI and Machine Learning That Pertain to Geologists and Engineers

### John T. Foster  
Hildbrand Department of Petroleum Engineering  
The University of Texas at Austin  
<a href="mailto:john.foster@utexas.edu">john.foster@utexas.e</a>

# Machine Learning 101

$$
{\vec{y}} = f(\vec{X}_1, \vec{X}_2, \ldots, \vec{X}_n)
$$

$$
\min \sum_i \left(y_i - \hat{y}_i \right)^2
$$

|||
|:-|:-|
|${\vec{y}}$| response features (outputs)|
|${X_1, X_2, \ldots X_n}$| predictor features (inputs)|
|$f$| estimator (model)|
|$\hat{y}$| data |

# Example: Polynomial regression

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display, Math
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# Use the widget backend for Matplotlib
%matplotlib widget

# Turn off interactive mode to suppress auto-plotting
plt.ioff()

# Read the data
data = pd.read_csv('data.csv')
X = data.iloc[:, 0].values.reshape(-1, 1)
y = data.iloc[:, 1].values

# Define fixed train/test indices for realization 1
FIXED_TRAIN_INDICES = [0, 2, 3, 4, 6, 7, 9, 11]
FIXED_TEST_INDICES = [1, 5, 8, 10]

# Create widgets
poly_order = widgets.Dropdown(
    options=[1, 3, 5, 7, 9],
    value=1,
    description='Polynomial Order:',
    style={'description_width': 'initial'}
)

split_realization = widgets.Dropdown(
    options=[1, 2, 3, 4, 5],
    value=1,
    description='Test/Train Split Realization:',
    style={'description_width': 'initial'}
)

# Output widgets for displaying the plot and LaTeX equation
output = widgets.Output()
equation_output = widgets.Output()

# Arrange dropdowns in a vertical box
selectors = widgets.VBox([poly_order, split_realization])

# Right-justifying the equation, making the container responsive, and enabling scrolling
equation_container = widgets.Box(
    [equation_output],
    layout=widgets.Layout(
        display='flex',
        align_items='center',  # Vertically center the equation
        justify_content='flex-end',  # Right justify the equation
        height='auto',  # Match the height of the selectors
        overflow='auto',  # Enable scrolling if the equation is too wide
        padding='10px'  # Optional padding for spacing
    )
)

# Ensure the height of the container matches the height of the selector box
selectors.layout = widgets.Layout(
    height='auto',  # Automatically adjust to fit the content
    padding='10px'  # Optional: Add padding for spacing
)

def get_train_test_indices(realization):
    """Generate train and test indices based on the realization number."""
    if realization == 1:
        return FIXED_TRAIN_INDICES, FIXED_TEST_INDICES
    else:
        all_indices = np.arange(len(X))
        train_indices, test_indices = train_test_split(all_indices, test_size=4, random_state=realization)
        return train_indices, test_indices

def fit_polynomial(X_train, y_train, X_test, degree):
    """Fit a polynomial of the given degree and return predictions."""
    coeffs = np.polyfit(X_train.flatten(), y_train, degree)
    poly = np.poly1d(coeffs)
    y_train_pred = poly(X_train)
    y_test_pred = poly(X_test)
    return poly, coeffs, y_train_pred, y_test_pred


def plot_results(X, y, train_idx, test_idx, poly_order):
    """Plot the polynomial fit and residuals."""
    # Split data into training and testing sets
    X_train, X_test = X[train_idx], X[test_idx]
    y_train, y_test = y[train_idx], y[test_idx]
    
    # Fit polynomial and get predictions
    poly, coeffs, y_train_pred, y_test_pred = fit_polynomial(X_train, y_train, X_test, poly_order)
    
    # Create a new figure
    fig = plt.figure(figsize=(12*3/4, 8*3/4))
    grid = plt.GridSpec(2, 2, hspace=0.4, wspace=0.3)
    
    # First row: Data and polynomial curve
    ax1 = fig.add_subplot(grid[0, :])
    X_smooth = np.linspace(X.min(), X.max(), 200).reshape(-1, 1)
    ax1.plot(X_smooth, poly(X_smooth), 'b-', label=f'Polynomial (order {poly_order})')
    ax1.plot(X_train, y_train, 'ko', label='Training data')
    ax1.plot(X_test, y_test, 'ro', label='Test data')
    ax1.set_xlabel(r'Depth, $D$ (m)')
    ax1.set_ylabel(r'Permeability, $\kappa$ (mD)')
    ax1.legend()
    ax1.grid(True)
    
    # Second row, left: Training residuals (Stem plot)
    ax2 = fig.add_subplot(grid[1, 0])
    train_residuals = y_train - y_train_pred.flatten()
    ax2.stem(
        X_train.flatten(),
        train_residuals,
        linefmt='k-',
        markerfmt='ko',
        basefmt='b-'
    )
    ax2.set_ylim([-1000, 1000])
    ax2.set_xlabel(r'Depth, $D$ (m)')
    ax2.set_ylabel(r'Residual Error')
    ax2.set_title('Training Set')
    ax2.grid(True)
    
    # Second row, right: Test residuals (Stem plot)
    ax3 = fig.add_subplot(grid[1, 1])
    test_residuals = y_test - y_test_pred.flatten()
    ax3.stem(
        X_test.flatten(),
        test_residuals,
        linefmt='r-',
        markerfmt='ro',
        basefmt='b-'
    )
    ax3.set_ylim([-1000, 1000])
    ax3.set_xlabel(r'Depth, $D$ (m)')
    ax3.set_ylabel(r'Residual Error')
    ax3.set_title('Test Set')
    ax3.grid(True)
    
    return fig, coeffs

def format_polynomial(coeffs):
    """Format the polynomial coefficients into a LaTeX equation."""
    terms = []
    degree = len(coeffs) - 1
    for i, coeff in enumerate(coeffs):
        if abs(coeff) < 1e-6:  # Skip near-zero terms
            continue
        # Format each term
        power = degree - i
        if power == 0:
            terms.append(f'{coeff:.2f}')
        elif power == 1:
            terms.append(f"{coeff:.2f}D")
        else:
            terms.append(f"{coeff:.2f}D^{power}")
    equation = " + ".join(terms).replace(" + -", " - ")  # Clean up the signs
    return f"\\kappa = {equation}"

def update_plot(change=None):
    """Update the plot and LaTeX equation."""
    with output:
        # Clear any previous output in the widget
        output.clear_output(wait=True)
        
        # Close all previous matplotlib figures to prevent duplication
        plt.close("all")
        
        # Get the current train/test indices and plot the results
        train_idx, test_idx = get_train_test_indices(split_realization.value)
        fig, coeffs = plot_results(X, y, train_idx, test_idx, poly_order.value)
        
        # Display the plot explicitly
        display(fig)
    
    with equation_output:
        # Clear any previous output in the equation widget
        equation_output.clear_output(wait=True)
        
        # Format the polynomial equation and display it
        equation = format_polynomial(coeffs)
        display(Math(equation))

# Set up widget observers
poly_order.observe(update_plot, names='value')
split_realization.observe(update_plot, names='value')

# Arrange the selectors vertically and add the equation to the right
layout = widgets.HBox([selectors, equation_container])  # Horizontal layout: dropdowns on the left, right-aligned equation on the right

# Display the layout and the output widget
display(layout, output)

# Display the initial plot and equation (inside the output widgets)
with output:
    update_plot()

HBox(children=(VBox(children=(Dropdown(description='Polynomial Order:', options=(1, 3, 5, 7, 9), style=Descripâ€¦

Output()

# Neural Networks

<img src="aapg_nn.png" alt="Example Image" width="500"/>

$$
\begin{align}
\vec{Y} &= \sigma(\vec{Z} \cdot \vec{w}_Z + b_Z) \\
        &= \sigma(\sigma(\vec{X} \cdot \vec{w}_X + b_X) \cdot w_Z + b_Z)
\end{align}
$$

|||
|-|-|
|$\sigma$|activation function, i.e. $\tanh$, ReLU|
|$\vec{w}_Z, \vec{w}_X$| weights |
|$b_Z, b_X$| biases |

# Neural Networks (cont'd)

## Architectures

* Feed foward
* Recurrent NN
* LSTM
* Convolution
* Autoencoder
* Transformer

## Parameters

* Weights and Biases

## Hyperparameters

* Number of layers
* Number of neurons
* Activation functions
* Loss function

# Example: Seismic Downscaling

<img src="aapg_downscaling.png" alt="Example Image" width="600"/>

# Transformers / Large Language Models (i.e. ChatGPT)

<br>

<img src="aapg_transformers.png" alt="Example Image" width="600"/>