# Training on multiple topologies
Eamon Whalen

In [1]:
import sys
import glob
import numpy as np
import pandas as pd
import altair as alt

sys.path.append('./models')
from feastnetSurrogateModel import FeaStNet

sys.path.append('./readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('./visualization')
from altTrussViz import plotTruss, interactiveErrorPlot

sys.path.append('./util')
from gcnSurrogateUtil import *

## 1. Load simulation data

In [2]:
trainSets ,valSets ,testSets = {}, {}, {}

doeFiles = np.sort(glob.glob("/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/*.csv"))
for doeFile in doeFiles[:3]:
    designName = doeFile.split('/')[-1].split('_N')[0]
    print(f'loading {designName}')
    allGraphsUnfiltered = loadGhGraphs(doeFile, NUM_DV=5)
    allGraphs = filterbyDisp(allGraphsUnfiltered, 0.9)
    trainData, valData, testData = partitionGraphList(allGraphs)
    trainSets[designName] = trainData
    valSets[designName] = valData
    testSets[designName] = testData

loading design_5
loading design_6
loading design_7


## 2. Train on each group seperately

In [3]:
resultsList = []
saveDir = './results/topoTest01/'
epochs = 2

In [4]:
for trainName, trainSet in trainSets.items():
    print('training on '+trainName)

    # train
    gcn = FeaStNet()
    history = gcn.trainModel(trainSet, valSets[trainName], epochs=epochs, batch_size=256, flatten=True, logTrans=False, 
                             ssTrans=True, saveDir=saveDir+trainName)

    display(plotHistory(history))

    # test
    print('testing on '+trainName+'\n\n')
    resultsDict = gcn.testModel(testSets[trainName], level='field')
    resultsDict['Trained on'] = ['test group']*len(resultsDict['mse'])
    resultsDict['Tested on'] = [trainName]*len(resultsDict['mse'])
    results = pivotDict(resultsDict)
    resultsList.extend(results)
        
pd.DataFrame(resultsList)

training on design_5
epoch: 0   trainLoss: 1.0241e+00   valLoss:1.0310e+00  time: 1.25e+00
epoch: 1   trainLoss: 8.8183e-01   valLoss:1.0317e+00  time: 9.87e-01
loading checkpoint 0




testing on design_5
training on design_6
epoch: 0   trainLoss: 1.0214e+00   valLoss:1.1879e+00  time: 1.02e+00
epoch: 1   trainLoss: 9.3002e-01   valLoss:1.1868e+00  time: 9.87e-01
loading checkpoint 1




testing on design_6
training on design_7
epoch: 0   trainLoss: 9.9097e-01   valLoss:9.4755e-01  time: 1.05e+00
epoch: 1   trainLoss: 9.4306e-01   valLoss:9.5552e-01  time: 1.01e+00
loading checkpoint 0




testing on design_7


Unnamed: 0,mse,mae,mre,maxAE,mae/peak,maxAE/peak,relEAtPeak,Trained on,Tested on
0,0.000102,0.009497,0.413324,0.016740,0.579711,1.021790,0.202034,test group,design_5
1,0.000149,0.011003,0.440630,0.020789,0.329851,0.623254,0.608070,test group,design_5
2,0.000312,0.014027,0.397681,0.043827,0.246585,0.770431,0.770159,test group,design_5
3,0.001408,0.032848,0.653785,0.084175,0.338876,0.868404,0.865103,test group,design_5
4,0.000109,0.009751,0.426114,0.016016,0.687116,1.128606,0.079063,test group,design_5
...,...,...,...,...,...,...,...,...,...
535,0.000321,0.015078,0.597525,0.038660,0.474929,1.217699,0.773204,test group,design_7
536,0.000027,0.004269,0.230275,0.010475,0.241644,0.592957,0.592256,test group,design_7
537,0.000042,0.005001,0.238586,0.013884,0.249227,0.691966,0.641112,test group,design_7
538,0.000024,0.004029,0.199995,0.011034,0.221053,0.605424,0.604840,test group,design_7


## 3. Train on all groups at once

In [5]:
allTrainData, allValData = [], []
print('training on all groups')
for name, data in trainSets.items():
    allTrainData = allTrainData + data
    allValData = allValData + valSets[name]

gcn = FeaStNet()
history = gcn.trainModel(allTrainData, allValData, epochs=epochs, batch_size=256, flatten=True, logTrans=False, 
                         ssTrans=True, saveDir=saveDir+'allGroups')

display(plotHistory(history))

# test
for testName, testSet in testSets.items():
    print('testing on '+testName+'\n\n')
    resultsDict = gcn.testModel(testSet, level='field')
    resultsDict['Trained on'] = ['all groups']*len(resultsDict['mse'])
    resultsDict['Tested on'] = [testName]*len(resultsDict['mse'])
    results = pivotDict(resultsDict)
    resultsList.extend(results)
    
pd.DataFrame(resultsList)

training on all groups
epoch: 0   trainLoss: 9.7300e-01   valLoss:1.0826e+00  time: 2.85e+00
epoch: 1   trainLoss: 8.5515e-01   valLoss:1.0942e+00  time: 2.87e+00
loading checkpoint 0


/n/ntesting on design_5
/n/ntesting on design_6
/n/ntesting on design_7


Unnamed: 0,mse,mae,mre,maxAE,mae/peak,maxAE/peak,relEAtPeak,Trained on,Tested on
0,0.000102,0.009497,0.413324,0.016740,0.579711,1.021790,0.202034,test group,design_5
1,0.000149,0.011003,0.440630,0.020789,0.329851,0.623254,0.608070,test group,design_5
2,0.000312,0.014027,0.397681,0.043827,0.246585,0.770431,0.770159,test group,design_5
3,0.001408,0.032848,0.653785,0.084175,0.338876,0.868404,0.865103,test group,design_5
4,0.000109,0.009751,0.426114,0.016016,0.687116,1.128606,0.079063,test group,design_5
...,...,...,...,...,...,...,...,...,...
1075,0.000384,0.016773,0.617295,0.040020,0.528309,1.260535,0.367941,all groups,design_7
1076,0.000076,0.007430,0.323261,0.020325,0.420582,1.150489,0.170252,all groups,design_7
1077,0.000072,0.006248,0.261484,0.018434,0.311413,0.918733,0.048575,all groups,design_7
1078,0.000050,0.005475,0.233024,0.020196,0.300378,1.108068,0.117784,all groups,design_7


## 4. Leave one out

In [6]:
for trainName, trainSet in trainSets.items():
    allTrainData, allValData = [], []
    print('training on all but '+ trainName)
    for name, data in trainSets.items():
        if name != trainName:
            allTrainData = allTrainData + data
            allValData = allValData + valSets[name]

    gcn = FeaStNet()
    history = gcn.trainModel(allTrainData, allValData, epochs=epochs, batch_size=256, flatten=True, logTrans=False, 
                             ssTrans=True, saveDir=saveDir+'allGroups')

    display(plotHistory(history))

    # test
    print('testing on '+trainName+'\n\n')
    resultsDict = gcn.testModel(testSets[trainName], level='field')
    resultsDict['Trained on'] = ['all groups but test group']*len(resultsDict['mse'])
    resultsDict['Tested on'] = [trainName]*len(resultsDict['mse'])
    results = pivotDict(resultsDict)
    resultsList.extend(results)
        
pd.DataFrame(resultsList)

training on all but design_5
epoch: 0   trainLoss: 9.4538e-01   valLoss:1.0780e+00  time: 2.05e+00
epoch: 1   trainLoss: 8.7500e-01   valLoss:1.0885e+00  time: 1.97e+00
loading checkpoint 0




testing on design_5
training on all but design_6
epoch: 0   trainLoss: 9.6460e-01   valLoss:1.0611e+00  time: 2.03e+00
epoch: 1   trainLoss: 8.5588e-01   valLoss:1.1058e+00  time: 1.97e+00
loading checkpoint 0




testing on design_6
training on all but design_7
epoch: 0   trainLoss: 1.0000e+00   valLoss:1.1138e+00  time: 1.94e+00
epoch: 1   trainLoss: 9.2037e-01   valLoss:1.1172e+00  time: 1.92e+00
loading checkpoint 0




testing on design_7


Unnamed: 0,mse,mae,mre,maxAE,mae/peak,maxAE/peak,relEAtPeak,Trained on,Tested on
0,0.000102,0.009497,0.413324,0.016740,0.579711,1.021790,0.202034,test group,design_5
1,0.000149,0.011003,0.440630,0.020789,0.329851,0.623254,0.608070,test group,design_5
2,0.000312,0.014027,0.397681,0.043827,0.246585,0.770431,0.770159,test group,design_5
3,0.001408,0.032848,0.653785,0.084175,0.338876,0.868404,0.865103,test group,design_5
4,0.000109,0.009751,0.426114,0.016016,0.687116,1.128606,0.079063,test group,design_5
...,...,...,...,...,...,...,...,...,...
1615,0.000497,0.018511,0.701854,0.045137,0.583046,1.421729,0.577405,all groups but test group,design_7
1616,0.000081,0.008095,0.355224,0.016775,0.458218,0.949523,0.241007,all groups but test group,design_7
1617,0.000068,0.007294,0.315784,0.020253,0.363501,1.009398,0.331035,all groups but test group,design_7
1618,0.000051,0.006177,0.268852,0.013292,0.338937,0.729298,0.264892,all groups but test group,design_7


## 6. Plot results

In [13]:
df = pd.DataFrame(resultsList)
order = ['test group', 'all groups']
barChart = alt.Chart(df).mark_bar().encode(
    x=alt.X('Trained on:N', sort=order, title='', axis=alt.Axis(ticks=False, labels=False)),
    y=alt.Y('mean(mse):Q', scale=alt.Scale(type='log'), axis=alt.Axis(tickCount=8, format=".0e"), title='MSE'),
    color=alt.Color('Trained on:N', sort=order),
    opacity = alt.OpacityValue(0.8),
    tooltip='mean(mse):Q'
).properties(width=75, height=200)

scatter = alt.Chart(df).mark_circle(size=20).encode(
    x=alt.X('Trained on:N', title='', sort=order),
    y=alt.Y('mse:Q', scale=alt.Scale(type='log')),
    color=alt.Color('Trained on:N', sort=order),
    opacity = alt.OpacityValue(0.3),
    tooltip='mse:Q'
)

alt.layer(barChart, scatter, data=df).facet(
    column=alt.Column('Tested on:N'))