# Transfer learning tests
Eamon Whalen

In [1]:
import sys
import os
import glob
import numpy as np
import pandas as pd
import altair as alt

sys.path.append('./models')
from feastnetSurrogateModel import FeaStNet
from pointRegressorSurrogateModel import PointRegressor

sys.path.append('./readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('./visualization')
from altTrussViz import plotTruss, interactiveErrorPlot

sys.path.append('./util')
from gcnSurrogateUtil import *

## 1. Load test data

In [2]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
testFile = os.path.join(dataDir, 'design_7_N_1000.csv')
allGraphsUnfiltered = loadGhGraphs(testFile, NUM_DV=5)

maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphsUnfiltered]
source = pd.DataFrame(maxes, columns=['maxes'])
source.describe()

Unnamed: 0,maxes
count,1000.0
mean,0.199516
std,2.524026
min,0.00657
25%,0.015425
50%,0.024334
75%,0.046981
max,67.325867


In [3]:
testData = filterbyDisp(allGraphsUnfiltered, 0.9)
maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in testData]
source = pd.DataFrame(maxes, columns=['maxes'])
maxDispCutoff = source.max()
source.describe()

Unnamed: 0,maxes
count,900.0
mean,0.028952
std,0.019071
min,0.00657
25%,0.014951
50%,0.021719
75%,0.038367
max,0.097861


## 2. Load pre-train data

In [4]:
pretrainFiles = glob.glob(os.path.join(dataDir, '*1000.csv'))
pretrainFiles.remove(testFile)

allPretrainGraphs = []
for pretrainFile in pretrainFiles:
    designName = pretrainFile.split('/')[-1].split('_N')[0]
    print(f'loading {designName}')
    graphsUnfiltered = loadGhGraphs(pretrainFile, NUM_DV=5)
    graphs = filterbyDisp(graphsUnfiltered, 0.9)
    allPretrainGraphs.extend(graphs)

print(f'loaded {len(allPretrainGraphs)} pretraining graphs')
pretrainData, pretrainValData, _ = partitionGraphList(allPretrainGraphs, testSize=0.0, valSize=0.15)

loading design_9
loading design_6
loading design_8
loading design_5
loaded 3600 pretraining graphs


## 3. Initial training

In [5]:
saveDir = './results/transferLrn_des7_01/'
ptrGcn = FeaStNet()
history = ptrGcn.trainModel(pretrainData, pretrainValData, 
                         epochs=100, 
                         batch_size=256, 
                         flatten=True, 
                         logTrans=False, 
                         ssTrans=True, 
                         saveDir=saveDir+f'preTrain/gcn/')

ptrGcnCheckptFile = ptrGcn.checkptFile
plotHistory(history)

epoch: 0   trainLoss: 9.0857e-01   valLoss:9.2913e-01  time: 5.29e+00
epoch: 1   trainLoss: 7.1825e-01   valLoss:8.8634e-01  time: 4.94e+00
epoch: 2   trainLoss: 5.9192e-01   valLoss:5.0052e-01  time: 4.95e+00
epoch: 3   trainLoss: 4.8664e-01   valLoss:4.2644e-01  time: 4.95e+00
epoch: 4   trainLoss: 4.1146e-01   valLoss:3.7613e-01  time: 4.99e+00
epoch: 5   trainLoss: 3.5204e-01   valLoss:3.3834e-01  time: 5.01e+00
epoch: 6   trainLoss: 3.1701e-01   valLoss:3.0320e-01  time: 5.02e+00
epoch: 7   trainLoss: 2.8547e-01   valLoss:2.7650e-01  time: 4.99e+00
epoch: 8   trainLoss: 2.5552e-01   valLoss:2.4898e-01  time: 5.09e+00
epoch: 9   trainLoss: 2.3239e-01   valLoss:2.3264e-01  time: 4.99e+00
epoch: 10   trainLoss: 2.1400e-01   valLoss:2.1519e-01  time: 4.98e+00
epoch: 11   trainLoss: 1.9565e-01   valLoss:2.0955e-01  time: 4.98e+00
epoch: 12   trainLoss: 1.7706e-01   valLoss:1.9891e-01  time: 5.01e+00
epoch: 13   trainLoss: 1.6325e-01   valLoss:1.8650e-01  time: 4.99e+00
epoch: 14   trai

In [6]:
trainRes = ptrGcn.testModel(pretrainData)
testRes = ptrGcn.testModel(testData) # unseen topology
pd.DataFrame([trainRes, testRes], index=['train', 'test'])

Unnamed: 0,mse,mae,mre,peakR2,maxAggR2,meanAggR2,minAggR2
train,2.6e-05,0.003139,0.113388,0.93765,,,
test,4.3e-05,0.004044,0.171898,0.66472,0.879246,-0.739851,-21.937541


## 4. Transfer learning study

In [7]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
trainDataFiles = glob.glob(os.path.join(dataDir, 'design_7*.csv'))
trainDataFiles.remove(testFile)

allResults = []
for trainDataFile in trainDataFiles:
    trainData = loadGhGraphs(trainDataFile, NUM_DV=5)
    trainSize = len(trainData)
    print(f'loaded train set of size {trainSize}')
    
    
    ### fresh neural network ###
    gcn = FeaStNet()
    history = gcn.trainModel(trainData, trainData, 
                         epochs=3, 
                         batch_size=256, 
                         flatten=True, 
                         logTrans=False, 
                         ssTrans=True, 
                         saveDir=saveDir+f'{trainSize}/gcn/')
    
    trainRes = gcn.testModel(trainData)
    trainRes['model'] = 'Fresh'
    trainRes['set'] = 'Train'
    trainRes['train size'] = trainSize
    allResults.append(trainRes)
    
    testRes = gcn.testModel(testData)
    testRes['model'] = 'Fresh'
    testRes['set'] = 'Test'
    testRes['train size'] = trainSize
    allResults.append(testRes)
    pd.DataFrame(allResults).to_csv(saveDir+'testResults.csv', index=False)

    
    ### transfer learning ###
    ptrGcn = FeaStNet()
    history = gcn.trainModel(trainData, trainData, 
                             restartFile=ptrGcnCheckptFile
                             epochs=3, 
                             batch_size=256, 
                             flatten=True, 
                             logTrans=False, 
                             ssTrans=True, 
                             saveDir=saveDir+f'{trainSize}/ptrGcn/')
    
    trainRes = gcn.testModel(trainData)
    trainRes['model'] = 'Transfer learning'
    trainRes['set'] = 'Train'
    trainRes['train size'] = trainSize
    allResults.append(trainRes)
    
    testRes = gcn.testModel(testData)
    testRes['model'] = 'Transfer learning'
    testRes['set'] = 'Test'
    testRes['train size'] = trainSize
    allResults.append(testRes)
    pd.DataFrame(allResults).to_csv(saveDir+'testResults.csv', index=False)
    
    
    ### random forest ###
    rf = PointRegressor('Random Forest')
    rf.trainModel(trainData, trainData, 
                     flatten=False, 
                     logTrans=False, 
                     ssTrans=True, 
                     saveDir=saveDir+f'{trainSize}/rf/')

    trainRes = rf.testModel(trainData)
    trainRes['model'] = 'Random Forest'
    trainRes['set'] = 'Train'
    trainRes['train size'] = trainSize
    allResults.append(trainRes)
    
    testRes = rf.testModel(testData)
    testRes['model'] = 'Random Forest'
    testRes['set'] = 'Test'
    testRes['train size'] = trainSize
    allResults.append(testRes)
    pd.DataFrame(allResults).to_csv(saveDir+'testResults.csv', index=False)
    
    
pd.DataFrame(allResults)

SyntaxError: invalid syntax (<ipython-input-7-36b31bd7d079>, line 40)

In [None]:
df = pd.DataFrame(allResults)
df[df.set=='Test']