# **Key Inputs**

In [None]:
##NOTE - INPUT IS REQUIRED IN THIS SECTION
#=============================================================================
#Input data csv must be saved to your Google Drive, and the file path
#and name below updated as needed, in order for this program to run.

##INPUT DATA
#============
#Root directory (leave blank for local PC run):
file_path = ''#'/content/drive/My Drive/Colab Notebooks/GNN_Geomodeling/'
#Input drillhole training data (each is optional, but must at least provide an empty file with headers):
input_rock_unit_filename = 'Input_Data/MSOP_drillhole_data.csv'#'Input_Data/Folded_Rock_Unit.csv'#
input_geologic_contact_filename = 'Input_Data/Folded_Geologic_Contacts.csv'
input_orientations_filename = 'Input_Data/Folded_Orientations.csv'
#'Input_Data/MSOP_drillhole_data.csv'
#'Input_Data/GeoLogic_int_drillhole_data.csv'

#Input data attributes (Column names)
#X,Y,Z must have the same name in all 3 input files
input_name_x = "X"
input_name_y = "Y"
input_name_z = "Z"
#rock unit:
input_name_rock_unit = "RockUnit"
#geologic contacts:
input_name_geologic_contact = "FieldValue"
#orientations:
input_name_x_vec = "XVec"
input_name_y_vec = "YVec"
input_name_z_vec = "ZVec"

##MESH SETTINGS
#===============
#Mesh extents and size in X,Y,Z (if less than the above dataset extents, data will be filtered)
min_extents = [6200,7000,1400]#[1000,2300,1690]##[2400,5100,2100]
max_extents = [6800,7400,1700]#[1100,2480,1770]##[3200,5600,2700]
max_volume = 1000#50# #max volume of each tetrahedra in the tetrahedral mesh

#Set x-slice for mesh/graph visualization on 3D plots
min_viewing_x = 6725#1040#2825#
max_viewing_x = 6745#1050#2845#

mUseGPUIfAvailable = True


# **Configure & install dependencies**

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
from AML.TrainModel import GraphSAGE
import torch
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from mpl_toolkits.mplot3d.art3d import Line3DCollection
from meshpy.tet import MeshInfo, build
from meshpy.geometry import GeometryBuilder, Marker, make_box
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import math
import sys
import os
from sklearn.metrics import confusion_matrix
import seaborn as sns
from collections import Counter
import torch.nn.functional as F
from torch_geometric.data import Data
from torch.nn import Linear, Dropout
from torch_geometric.nn import SAGEConv
from torch_geometric.loader import NeighborLoader
from scipy.spatial import cKDTree



In [None]:
print(torch.cuda.is_available())
device = torch.device("cuda:0" if (torch.cuda.is_available() and mUseGPUIfAvailable) else "cpu")
print(device)

In [None]:
#Load data from file

# Load the array back from the file
mesh_points = np.load('AML/Intermediate_Data/mesh_points.npy')

#import input data
df_rock_unit = pd.read_csv(file_path + input_rock_unit_filename)
df_geologic_contacts = pd.read_csv(file_path + input_geologic_contact_filename)
df_orientations = pd.read_csv(file_path + input_orientations_filename)

# Load prepared input Data from file
data_rock_unit = torch.load(file_path + 'AML/Intermediate_Data/data_rock_unit.dat').to(device)
data_scalar_field = torch.load(file_path + 'AML/Intermediate_Data/data_scalar_field.dat').to(device)
data_orientations = torch.load(file_path + 'AML/Intermediate_Data/data_orientations.dat').to(device)

# Create an instance of your GNN model
graphsage = GraphSAGE

train_losses = torch.load(file_path + 'AML/Output_Data/train_losses.vec')
val_losses = torch.load(file_path + 'AML/Output_Data/val_losses.vec')

# Load the saved state dictionary into your model
#graphsage.load_state_dict(torch.load(file_path + 'Output_Data/trained_model.pt'))
graphsage = torch.load(file_path + 'AML/Output_Data/trained_model.pt').to(device)
graphsage.eval()  # Put the model in evaluation mode if needed





In [None]:
#Accepts Pandas dataframe and filters based on max ans min x,y,z
def FilterInputData(df):
  return df[
    (df[input_name_x].between(min_extents[0], max_extents[0])) &
    (df[input_name_y].between(min_extents[1], max_extents[1])) &
    (df[input_name_z].between(min_extents[2], max_extents[2]))
    ]

In [None]:
# reduce imported data based on the specified extents
df_rock_unit = FilterInputData(df_rock_unit)
print("%d input rock unit data points within specified limits" % len(df_rock_unit))
df_geologic_contacts = FilterInputData(df_geologic_contacts)
print("%d input geologic contact measurements within specified limits" % len(df_geologic_contacts))
df_orientations = FilterInputData(df_orientations)
print("%d input orientation measurements within specified limits" % len(df_orientations))

In [None]:
def evaluate(model, data):
    """Evaluate the model and return prediction results."""
    model.eval()
    out1, out2 = model(data.x, data.edge_index)
    return out1, out2

# **Validate & Visualize Results**

In [None]:
#plot of training and cross validation loss

# Calculate moving average of train_losses
moving_avg_train = np.convolve(train_losses, np.ones(10)/10, mode='valid')
moving_avg_val = np.convolve(val_losses, np.ones(10)/10, mode='valid')

epochs = range(1, len(train_losses) + 1)

plt.figure(figsize=(8, 6))
#plt.plot(epochs, train_losses, 'b', label='Training Loss')
plt.plot(epochs[:len(moving_avg_train)], moving_avg_train, 'b--', label='Training Moving Avg')
#plt.plot(epochs, val_losses, 'r', label='Validation Loss')
plt.plot(epochs[:len(moving_avg_val)], moving_avg_val, 'r--', label='Validation Moving Avg')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
#Create confusion matrix

# Lists to store ground truth labels and predictions
list_labels = []
list_predictions = []

out1,out2 = evaluate(graphsage,data_scalar_field)
predicted_rock_unit = out1.argmax(dim=1)
predicted_scalar_field = out2

combined_mask = np.logical_or(data_rock_unit.train_mask.cpu(), data_rock_unit.val_mask.cpu())

filtered_predictions = predicted_rock_unit[combined_mask]
filtered_labels = data_rock_unit.y[combined_mask]

list_predictions.extend(filtered_predictions.cpu().numpy())
list_labels.extend(filtered_labels.cpu().numpy())

# Create a confusion matrix
conf_matrix = confusion_matrix(list_labels, list_predictions)

# Visualize the confusion matrix using seaborn
class_labels = ['1', '2', '3']
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=class_labels, yticklabels=class_labels)
plt.xlabel('Predicted labels')
plt.ylabel('True labels')
plt.title('Confusion Matrix')
plt.show()

# Print a summary of predicted values by class
predicted_counts = Counter(predicted_rock_unit.cpu().numpy())
predicted_counts = {k: v for k, v in sorted(predicted_counts.items(), key=lambda item: item[0])}
print("Predicted values summary by Min Code:")
for class_label, count in predicted_counts.items():
    print(f"Min Code {class_label}: {count} nodes")

In [None]:
#plot interactively with an optional mesh slice
def interactive_visualize(rock_unit_data, geologic_contact_data=None, orientation_data=None, mesh_points=None, mesh_edges=None, predicted_labels=None):

  #Plot rock unit data
  fig = px.scatter_3d(rock_unit_data, x=input_name_x, y=input_name_y, z=input_name_z, color=input_name_rock_unit)

  #Plot geologic contact data
  if geologic_contact_data is not None:
    geologic_contact_trace = px.scatter_3d(geologic_contact_data, x=input_name_x, y=input_name_y, z=input_name_z, color=input_name_geologic_contact).data[0]
    geologic_contact_trace.marker.symbol = 'x'  # Change marker symbol to 'x'
    geologic_contact_trace.marker.size = 4
    fig.add_trace(geologic_contact_trace)

  #Plot orientation measurement data
  if orientation_data is not None:
    orientation_trace = px.scatter_3d(orientation_data, x=input_name_x, y=input_name_y, z=input_name_z).data[0]
    orientation_trace.marker.symbol = 'diamond-open'
    orientation_trace.marker.size = 4
    fig.add_trace(orientation_trace)

  #Plot mesh points in viewing slice
  if mesh_points is not None:
    if predicted_labels==None:
      predicted_labels = np.zeros(len(mesh_points))
    # Creating a DataFrame from the mesh points
    df_mesh = pd.DataFrame({
        'x': mesh_points[:, 0],
        'y': mesh_points[:, 1],
        'z': mesh_points[:, 2],
        input_name_rock_unit: predicted_labels
    })

    # Filter points to a slice in y-z plane
    mesh_slice = df_mesh[(df_mesh['x'] >= min_viewing_x) & (df_mesh['x'] <= max_viewing_x)]

    # Adding new points to the existing figure
    fig.add_trace(
        px.scatter_3d(mesh_slice, x='x', y='y', z='z', color=input_name_rock_unit).data[0]
    )
    fig.update_traces(marker=dict(size=5))  # Change the marker size here
    fig.update_layout(title='Interactive 3D Plot')

  #plot mesh edges in viewing slice
  if mesh_points is not None and mesh_edges is not None:
    for edge in mesh_edges:
          for point in edge:
              add_point = True
              point_coords = mesh_points[point]
              if (point_coords[0] < min_viewing_x or point_coords[0] > max_viewing_x):
                  add_point = False
                  break
          if add_point:
              point1, point2 = edge
              point1 = mesh_points[point1]
              point2 = mesh_points[point2]
              x_vals = [point1[0],point2[0]]
              y_vals = [point1[1],point2[1]]
              z_vals = [point1[2],point2[2]]

              fig.add_trace(go.Scatter3d(
                  x=x_vals, y=y_vals, z=z_vals,
                  mode='lines',
                  line=dict(color='grey', width=2),
                  name=''
              ))

  fig.show()

In [None]:
#Interactive plot of rock unit predictions in a mesh slice
interactive_visualize(rock_unit_data=df_rock_unit, geologic_contact_data=df_geologic_contacts,
                       orientation_data=df_orientations, mesh_points=mesh_points, predicted_labels=predicted_rock_unit.cpu())
                       #mesh_edges=mesh_edges #mesh edges off for performance

In [None]:
#Interactive plot of scalar field predictions in a mesh slice
interactive_visualize(rock_unit_data=df_rock_unit, geologic_contact_data=df_geologic_contacts,
                      orientation_data=df_orientations, mesh_points=mesh_points, predicted_labels=predicted_scalar_field.cpu().detach()) 
                      #mesh_edges=mesh_edges #mesh edges off for performance

In [None]:
#Plot the input training data with the labels as larger circles and the predictions as smaller circles within

filtered_predictions_cpu = filtered_predictions.cpu().detach().numpy()
filtered_labels_cpu = filtered_labels.cpu().detach().numpy()
mislabel_mask = filtered_labels_cpu != filtered_predictions_cpu

filtered_locations = data_rock_unit.x[combined_mask].cpu().detach().numpy()

fig = px.scatter_3d(x=filtered_locations[:,0], y=filtered_locations[:,1], z=filtered_locations[:,2])
fig.update_traces(marker=dict(color=filtered_labels_cpu, size=7, line=dict(width=2, color=filtered_labels_cpu)), selector=dict(mode='markers'))

smaller_size_trace = px.scatter_3d(
    x=filtered_locations[mislabel_mask, 0],
    y=filtered_locations[mislabel_mask, 1],
    z=filtered_locations[mislabel_mask, 2],
    color=filtered_predictions_cpu[mislabel_mask]
)
smaller_size_trace.update_traces(
    marker=dict(size=4),  # Adjust the size as needed
    selector=dict(mode='markers')
)

fig.add_trace(smaller_size_trace.data[0])

fig.update_layout(title='Interactive 3D Plot')

fig.show()