In [1]:
import json
import glob
import os
import sys
from time import time
import numpy as np
import pandas as pd
import torch
import torch_geometric as tg
import torch.nn.functional as F
import pyvista as pv
import altair as alt
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pickle

sys.path.append('C:/Users/ewhalen/OneDrive - Massachusetts Institute of Technology/research/toolbox/caeSurrogateUtility/')
import caeUtility as cu

sys.path.append('../readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('../visualization')
from trussViz2D import plotTruss

In [2]:
# toss out the wost 10% of designs
def filterbyDisp(graphList, pctCutoff):
    maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphsUnfiltered]
    df = pd.DataFrame(maxes, columns=['maxes'])
    mask = df['maxes'].rank(pct=True) <= pctCutoff
    return [g for g,b in zip(graphList, mask) if b]

# partition into train, validate and test
def partitionGraphList(allGraphs):
    allIds = list(range(len(allGraphs)))
    other, testIds = train_test_split(allIds, test_size=0.2, shuffle=True, random_state=1234) # 20% test
    trainIds, valIds = train_test_split(other, test_size=0.15, shuffle=True, random_state=1234) # 15% val 15% test
    trainGraphs = [allGraphs[i] for i in trainIds]
    valGraphs = [allGraphs[i] for i in valIds]
    testGraphs = [allGraphs[i] for i in testIds]
    return trainGraphs, valGraphs, testGraphs

logTrans = lambda x: np.sign(x)*np.log(10.0*np.abs(x)+1.0)
invLogTrans = lambda y: np.sign(y)*(np.exp(np.abs(y))-1.0)/10.0

def fitSS(graphList, logTrans=True, ssTrans=True, flatten=False):
    ss = StandardScaler()
    if flatten:
        allResponses = np.empty((0,1))
        for graph in graphList:
            allResponses = np.vstack([allResponses, graph.y.reshape(-1,1)])
    else:
        allResponses = np.empty((0,graphList[0].y.numpy().size))
        for graph in graphList:
            allResponses = np.vstack([allResponses, graph.y.reshape(1,-1)])
    ss.fit(allResponses)
    ss.logTrans = logTrans
    ss.ssTrans = ssTrans
    ss.flatten = flatten
    return ss

def applySS(ss, graphList):
    transformedGraphList = [g.clone() for g in graphList] # deep copy
    for graph in transformedGraphList:
        if ss.ssTrans:
            if ss.flatten:
                graph.y = torch.as_tensor(ss.transform(graph.y.reshape(-1,1).cpu()).reshape(-1,2), dtype=torch.float)
            else:
                graph.y = torch.as_tensor(ss.transform(graph.y.reshape(1,-1).cpu()).reshape(-1,2), dtype=torch.float)
        if ss.logTrans: 
            graph.y = logTrans(graph.y)
    return transformedGraphList

def applyInvSS(ss, out):
    if ss.logTrans: 
        out = invLogTrans(out)
    if ss.ssTrans:
        if ss.flatten:
            out = ss.inverse_transform(out.reshape(-1,1)).reshape(-1,2)
        else:
            out = ss.inverse_transform(out.reshape(1,-1)).reshape(-1,2)
    return out

## define network architecture
#############################################################
class FeaStNet(torch.nn.Module):
    def __init__(self):
        super(FeaStNet, self).__init__()
        self.norm0 = tg.nn.BatchNorm(4, momentum=0.3, affine=True, track_running_stats=True)
        self.lin0 = torch.nn.Linear(4, 16)
        self.conv0 = tg.nn.FeaStConv(16, 32, heads=8) # The (translation-invariant) FeaStNet convolution
        self.normc0 = tg.nn.BatchNorm(32, momentum=0.3, affine=True, track_running_stats=True)
        self.conv1 = tg.nn.FeaStConv(32, 64, heads=8)
        self.normc1 = tg.nn.BatchNorm(64, momentum=0.3, affine=True, track_running_stats=True)
        self.conv2 = tg.nn.FeaStConv(64, 128, heads=8)
        self.normc2 = tg.nn.BatchNorm(128, momentum=0.3, affine=True, track_running_stats=True)
        self.conv3 = tg.nn.FeaStConv(128, 256, heads=8)
        self.normc3 = tg.nn.BatchNorm(256, momentum=0.3, affine=True, track_running_stats=True)
        self.conv4 = tg.nn.FeaStConv(256, 128, heads=8)
        self.normc4 = tg.nn.BatchNorm(128, momentum=0.3, affine=True, track_running_stats=True)
        self.conv5 = tg.nn.FeaStConv(128, 128, heads=8)
        self.normc5 = tg.nn.BatchNorm(128, momentum=0.3, affine=True, track_running_stats=True)
        self.lin1 = torch.nn.Linear(128, 64)
        self.lin2 = torch.nn.Linear(64, 2)

    def forward(self, data):
        data.x = torch.cat([data.pos, data.x.float()], 1)
        data.x = self.norm0(data.x)
        data.x = self.lin0(data.x)
        data.x = F.relu(data.x)
        data.x = self.conv0(data.x, data.edge_index)
        data.x = self.normc0(data.x)
        data.x = F.relu(data.x)
        data.x = self.conv1(data.x, data.edge_index)
        data.x = self.normc1(data.x)
        data.x = F.relu(data.x)        
        data.x = self.conv2(data.x, data.edge_index)
        data.x = self.normc2(data.x)
        data.x = F.relu(data.x)
        data.x = self.conv3(data.x, data.edge_index)
        data.x = self.normc3(data.x)
        data.x = F.relu(data.x)
        data.x = self.conv4(data.x, data.edge_index)
        data.x = self.normc4(data.x)
        data.x = F.relu(data.x)
        data.x = self.conv5(data.x, data.edge_index)
        data.x = self.normc5(data.x)
        data.x = F.relu(data.x)
        data.x = self.lin1(data.x)
        data.x = F.relu(data.x)
        data.x = self.lin2(data.x)
        return data.x
    
# configure training
def train(model, trainGraphs, valGraphs, device, epochs=10, saveDir=None, batch_size=256, flatten=False):
    # prep train data
    model.ss = fitSS(trainGraphs, flatten=flatten)
    ssFile = None
    modelFile = None
    if saveDir:
        if not os.path.exists(saveDir): os.mkdir(saveDir)
        ssFile = os.path.join(saveDir, 'ss.pkl')
        pickle.dump(model.ss, open(ssFile, 'wb'))
    trainGraphsScaled = applySS(model.ss, trainGraphs)
    loader = tg.data.DataLoader(trainGraphsScaled, batch_size=batch_size, shuffle=True)
    
    # prep validation data
    valGraphsScaled = applySS(model.ss, valGraphs)
    valLoader = tg.data.DataLoader(valGraphsScaled, batch_size=1, shuffle=False)
    
    # prep model
    optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=10e-4)
    trainHist, valHist = [], []
    model.to(device)

    for epoch in range(epochs):

        ### train ###
        model.train()
        t = time()
        batchHist = []
        for batch in loader:
            batch.to(device)
            optimizer.zero_grad()
            out = model(batch)
            loss = F.mse_loss(out, batch.y)
            loss.backward()
            optimizer.step()
            batchHist.append(loss.item())

        trainHist.append(np.mean(batchHist))
        
        ### validate ###
        batchHist = []
        model.eval()
        with torch.no_grad():
            for batch in valLoader:
                batch.to(device)
                out = model(batch)
                loss = F.mse_loss(out, batch.y)
                batchHist.append(loss.item())
        valHist.append(np.mean(batchHist))

        print(f'epoch: {epoch}   trainLoss: {trainHist[-1]:.4e}   time: {(time()-t):.2e}')
              
        if saveDir:
            with open(os.path.join(saveDir, 'trainlog.csv'), 'a') as fp: 
                fp.write(f'{epoch},{trainHist[-1]},{valHist[-1]},{(time()-t)}/n')
                
            if (np.argmin(valHist) == len(valHist)-1):
                modelFile = os.path.join(saveDir, f'checkpoint_{epoch}')
                torch.save(model.state_dict(), modelFile) # save best model    

    return trainHist, valHist, modelFile, ssFile

def predict(model, inputs, device):
    # prep data
    inputsScaled = applySS(model.ss, inputs)
    testLoader = tg.data.DataLoader(inputsScaled, batch_size=1, shuffle=False)
    
    model.to(device)
    preds = []
    model.eval()
    with torch.no_grad():
        for batch in testLoader:
            batch.to(device)
            out = model(batch)
            p = applyInvSS(model.ss, out.cpu().numpy())
            preds.append(p)
    return preds

def test(model, inputs, outputs, baselineRef, device, level='set'):
    preds = predict(model, inputs, device)
    if baselineRef: baselineRef = [b.y.numpy() for b in baselineRef]
    return cu.computeFieldLossMetrics([g.y.numpy() for g in outputs], 
                                      preds, 
                                      baselineRef=baselineRef, level=level)
        
def loadModelFromFile(modelFile, ssFile):
    bestModel.load_state_dict(torch.load(modelFile, map_location=torch.device('cpu')))
    model = FeaStNet()
    model.load_state_dict(torch.load(modelFile))
    ss = pickle.load(open(ssFile, 'rb'))
    model.ss = ss
    return model

def plotHistory(trainHist, valHist):
    histDf = pd.DataFrame({'train': trainHist, 'val': valHist})
    return alt.Chart(histDf.reset_index()).transform_fold(
            ['train', 'val'],
            as_=['metric', 'value']
        ).mark_line().encode(
            alt.X('index:Q'),
            alt.Y('value:Q', axis=alt.Axis(title='loss')),
            color=alt.Color('metric:N'),
            tooltip=['epoch:Q', 'value:Q']
        ).properties(width=400, height=200)

## Load data

In [3]:
# load group 5
doeFile = 'C:/Users/ewhalen/OneDrive - Massachusetts Institute of Technology/research/data/trusses/EW/v1.3/design_5_N_1000.csv'
allGraphsUnfiltered = loadGhGraphs(doeFile, NUM_DV=5)
print(f'loaded {len(allGraphsUnfiltered)} graphs')
all5Graphs = filterbyDisp(allGraphsUnfiltered, 0.9)
trainGraphs5, valGraphs5, testGraphs5 = partitionGraphList(all5Graphs)

# load group 9
doeFile = 'C:/Users/ewhalen/OneDrive - Massachusetts Institute of Technology/research/data/trusses/EW/v1.3/design_9_N_1000.csv'
allGraphsUnfiltered = loadGhGraphs(doeFile, NUM_DV=5)
print(f'loaded {len(allGraphsUnfiltered)} graphs')
all9Graphs = filterbyDisp(allGraphsUnfiltered, 0.9)
trainGraphs9, valGraphs9, testGraphs9 = partitionGraphList(all9Graphs)

# combine data sets
trainSets = {'group 5':trainGraphs5, 'group 9':trainGraphs9, 'comb':trainGraphs5+trainGraphs9}
valSets = {'group 5':valGraphs5, 'group 9':valGraphs9, 'comb':valGraphs5+valGraphs9}
testSets = {'group 5':testGraphs5, 'group 9':testGraphs9, 'comb':testGraphs5+testGraphs9}

loaded 1000 graphs
loaded 1000 graphs


## Load pretrained model

In [4]:
modelFile = "C:/Users/ewhalen/OneDrive - Massachusetts Institute of Technology/research/graphConvolutions/code/pretrained/topoTest04/group 5/checkpoint_95"
ssFile = "C:/Users/ewhalen/OneDrive - Massachusetts Institute of Technology/research/graphConvolutions/code/pretrained/topoTest04/group 5/ss.pkl"
device = torch.device('cpu')

model = FeaStNet()
model.load_state_dict(torch.load(modelFile, map_location=torch.device('cpu')), strict=False)
ss = pickle.load(open(ssFile, 'rb'))
model.ss = ss

## Test

In [5]:
test(model, testGraphs5, testGraphs5, None, device, level='set')

{'mse': 0.00053397915,
 'mae': 0.01603628,
 'mre': 0.44665208,
 'peakR2': -1.3475659159016105,
 'maxAggR2': 0.0,
 'meanAggR2': -0.9085957988128718,
 'minAggR2': -5.4398566550491045}

In [6]:
test(model, testGraphs9, testGraphs9, None, device, level='set')

{'mse': 0.0012662994,
 'mae': 0.02514669,
 'mre': 0.61803246,
 'peakR2': -1.0462381996868366,
 'maxAggR2': -0.05875430087881606,
 'meanAggR2': -1.8033689501358083,
 'minAggR2': -19.850220066726028}