In [None]:
import pandas as pd

In [None]:
# LOAD dataset
cfd_df = pd.read_csv("apollo_cfd_database.csv")

# RENAME columns with corresponding names
new_col_names = {
    "X":"x",
    "Y":"y",
    "Z":"z",
    "pw (Pa)":"pressure",
    "qw (W/m^2)":"heat_flux",
    "Me": "edge_mach_number",
    "delta (m)": "boundary_layer_thickness",
    "theta (m)": "momentum_thickness",
    "Re-theta": "momentum_thickness_reynolds_number",
    "tauw (Pa)": "shear_stress",
    "mach (-)": "mach",
    "velocity (m/s)":"velocity",
    "density (kg/m^3)": "density",
    "aoa (degrees)": "angle_of_attack",
    "dynamic_pressure (Pa)":"dynamic_pressure"
    }
cfd_df.rename(columns=new_col_names, inplace=True)
cfd_df.head()

In [None]:
# DEFINE independent and dependent variables
ind_var = ["mach", "dynamic_pressure", "angle_of_attack"]
dep_var = "pressure"

In [None]:
# FILTER independent and dependent from entire dataframe
filtered_df = cfd_df[ind_var + [dep_var]]

In [None]:
# GROUP by independent variables IMPORTANT!!!: There should be 185 rows
grouped = (
    filtered_df.groupby(ind_var)[dep_var]
      .apply(lambda x: x.values)
      .reset_index()
)

In [None]:
grouped

In [None]:
# DEFINE Pytorch dataset
import numpy as np
import torch

class CFDSolutions(torch.utils.data.Dataset):
    def __init__(self, grouped_df):
        self.X = grouped_df[ind_var].values.astype('float32')
        self.Y = np.stack(grouped_df[dep_var].values).astype('float32')

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

In [None]:
# INITIALIZE dataset with Grouped data
dataset = CFDSolutions(grouped)

In [None]:
# TEST dataset, tensor X = [number of dependent variables], tensor Y = [50176 independent variables]
dataset[0]

In [None]:
# DEFINE model arquitecture
class ShieldModel(torch.nn.Module):
    def __init__(self):
        super(ShieldModel, self).__init__()
        self.linear1 = torch.nn.Linear(in_features = len(ind_var), out_features = 64)
        self.linear2 = torch.nn.Linear(in_features = 64, out_features = 128)
        self.linear3 = torch.nn.Linear(in_features = 128, out_features = 256)
        self.linear4 = torch.nn.Linear(in_features = 256, out_features = 50176)

    def forward(self, x):
        x = self.linear1(x)
        x = torch.nn.functional.relu(x)
        x = self.linear2(x)
        x = torch.nn.functional.relu(x)
        x = self.linear3(x)
        x = torch.nn.functional.relu(x)
        x = self.linear4(x)
        return x

In [None]:
# TEST model
dummy_batch = torch.zeros(10, len(ind_var))
my_model = ShieldModel()
predictions = my_model(dummy_batch)
print(dummy_batch.shape, predictions.shape)

In [None]:
# DEFINE training loop
from torch.utils.data import DataLoader
import torch.optim as optim
from livelossplot import PlotLosses

def train_model(n_epochs, threshold, model, loss_fn, optimizer, scheduler, train_loader, val_loader, device):
    # for gpu training
    model = model.to(device)
    loss_fn = loss_fn.to(device)
    # for plotting
    liveloss = PlotLosses()
    for epoch in range(n_epochs):
        logs = {}
        # initialize control variables.
        correct = 0
        cumulative_loss = 0
        n_samples = 0
        # Set the model in training mode.
        model.train()
        for idx_batch, (X, y) in enumerate(train_loader):
            model.zero_grad()
            # Move (x,y) data to GPU (if so desired).
            X = X.to(device)
            y = y.to(device)
            # Compute predictions.
            pred = model(X)
            # Compute loss.
            loss = loss_fn(pred, y)
            cumulative_loss += loss.item()
            # Count how many correct in batch.
            pred_placeholder = pred.detach()
            percent_diff_each = (pred_placeholder - y).abs() / y.abs() * 100
            mean_percent_diff = percent_diff_each.mean(dim=1)
            mask = mean_percent_diff < threshold
            correct += mask.sum().cpu().item()
            n_samples += mask.size(0)
            # Compute gradients (autograd).
            loss.backward()
            # Run one basic training step of SGD.
            optimizer.step()
            # Keep track of loss and accuracy for the plot.
            n_batches = 1 + idx_batch
            logs['loss'] = cumulative_loss / n_batches
            logs['accuracy'] = correct / n_samples
        # initialize control variables.
        correct = 0
        cumulative_loss = 0
        n_samples = 0
        # Set the model in evaluation mode.
        model.eval()
        with torch.no_grad():
            for idx_batch, (X, y) in enumerate(val_loader):
                # Move data to GPU if needed.
                X = X.to(device)
                y = y.to(device)
                # Compute predictions.
                pred = model(X)
                # Compute loss.
                loss = loss_fn(pred, y)
                cumulative_loss += loss.item()
                # Count how many correct in batch.
                pred_placeholder = pred.detach()
                percent_diff_each = (pred_placeholder - y).abs() / y.abs() * 100
                mean_percent_diff = percent_diff_each.mean(dim=1)
                mask = mean_percent_diff < threshold
                correct += mask.sum().cpu().item()
                n_samples += mask.size(0)
                # Keep track of loss and accuracy for the plot.
                n_batches = 1 + idx_batch
                logs['val_loss'] = cumulative_loss / n_batches
                logs['val_accuracy'] = correct / n_samples
        # Update the plot with new logging information.
        liveloss.update(logs)
        liveloss.send()

        if scheduler != -1:
            scheduler.step()

In [None]:
# DEFINE training parameters
from sklearn.model_selection import train_test_split

train_df, val_df = train_test_split(grouped, test_size=0.30, random_state=42)

n_epochs = 100
batch_size = 16
lr = 0.0001
threshold = 10

train_set = CFDSolutions(train_df)
val_set = CFDSolutions(val_df)

train_loader = DataLoader(train_set, batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size, shuffle=False)

model = ShieldModel()
loss_fn = torch.nn.SmoothL1Loss()
optimizer = optim.Adam(model.parameters(), lr)
device = torch.device("mps")

train_model(n_epochs, threshold, model, loss_fn, optimizer, -1, train_loader, val_loader, device)

In [None]:
# SAMPLE 1 of 185 point for testing
idx_sample = 180
x_sample = grouped.loc[idx_sample, ind_var].to_numpy(dtype='float32')
y_sample = grouped.loc[idx_sample, dep_var]
x_sample_tensor = torch.tensor([x_sample], dtype=torch.float32).to(device)

In [None]:
# TEST 1 sample with model
model.eval()
with torch.no_grad():
    prediction = model(x_sample_tensor).squeeze(0).cpu().numpy()
print(prediction)
print(y_sample)

In [None]:
# CALCULATE mean difference for 1 sample and its prediction
percent_diff_each = np.abs(prediction - y_sample) / np.abs(y_sample) * 100
mean_percent_diff = percent_diff_each.mean()
mean_percent_diff

In [None]:
# COLLECT coordiantes for ploting
xyz_df = cfd_df[["x", "y", "z"]].iloc[0:50176]

In [None]:
# ATTACH correct and prediction
xyz_df["correct"] = y_sample
xyz_df["prediction"] = prediction
xyz_df["percent_difference"] = np.abs(prediction - y_sample) / np.abs(y_sample) * 100

In [None]:
xyz_df

In [None]:
# PLOT correct points
import plotly.express as px

fig = px.scatter_3d(
    xyz_df,
    x="x",
    y="y",
    z="z",
    color="correct",
    color_continuous_scale="Viridis" ,
    hover_data=xyz_df.columns
)

fig.show()

In [None]:
# PLOT predicted points
fig = px.scatter_3d(
    xyz_df,
    x="x",
    y="y",
    z="z",
    color="prediction",
    color_continuous_scale="Viridis" ,
    hover_data=xyz_df.columns
)

fig.show()

In [None]:
# PLOT diffrence points
fig = px.scatter_3d(
    xyz_df,
    x="x",
    y="y",
    z="z",
    color="percent_difference",
    color_continuous_scale="Viridis" ,
    hover_data=xyz_df.columns
)

fig.show()