# Transfer learning tests
Eamon Whalen

In [1]:
import sys
import os
import glob
import numpy as np
import pandas as pd
import altair as alt

sys.path.append('./models')
from feastnetSurrogateModel import FeaStNet
from pointRegressorSurrogateModel import PointRegressor

sys.path.append('./readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('./visualization')
from altTrussViz import plotTruss, interactiveErrorPlot

sys.path.append('./util')
from gcnSurrogateUtil import *

## 1. Load test data

In [2]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
testFile = os.path.join(dataDir, 'design_7_N_1000.csv')
allGraphsUnfiltered = loadGhGraphs(testFile, NUM_DV=5)

maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphsUnfiltered]
source = pd.DataFrame(maxes, columns=['maxes'])
source.describe()

Unnamed: 0,maxes
count,1000.0
mean,0.199516
std,2.524026
min,0.00657
25%,0.015425
50%,0.024334
75%,0.046981
max,67.325867


In [3]:
testGraphs = filterbyDisp(allGraphsUnfiltered, 0.9)
maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in testGraphs]
source = pd.DataFrame(maxes, columns=['maxes'])
maxDispCutoff = source.max()
source.describe()

Unnamed: 0,maxes
count,900.0
mean,0.028952
std,0.019071
min,0.00657
25%,0.014951
50%,0.021719
75%,0.038367
max,0.097861


## 2. Load pre-train data

In [4]:
pretrainFiles = glob.glob(os.path.join(dataDir, '*1000.csv'))
pretrainFiles.remove(testFile)

allPretrainGraphs = []
for pretrainFile in pretrainFiles:
    designName = pretrainFile.split('/')[-1].split('_N')[0]
    print(f'loading {designName}')
    graphsUnfiltered = loadGhGraphs(pretrainFile, NUM_DV=5)
    graphs = filterbyDisp(graphsUnfiltered, 0.9)
    allPretrainGraphs.extend(graphs)

print(f'loaded {len(allPretrainGraphs)} pretraining graphs')
pretrainData, pretrainValData, test = partitionGraphList(allPretrainGraphs, testSize=0.0, valSize=0.15)

loading design_9
loading design_6
loading design_8
loading design_5
loaded 3600 pretraining graphs


## 3. Initial training

In [10]:
saveDir = './results/transferLrn01/'
ptrGcn = FeaStNet()
history = ptrGcn.trainModel(pretrainData, pretrainValData, 
                         epochs=100, 
                         batch_size=256, 
                         flatten=True, 
                         logTrans=False, 
                         ssTrans=True, 
                         saveDir=saveDir+f'preTrain/gcn/')

plotHistory(history)

epoch: 0   trainLoss: 8.3074e-01   valLoss:8.8119e-01  time: 9.98e+00
epoch: 1   trainLoss: 6.3161e-01   valLoss:8.0567e-01  time: 5.30e+00
epoch: 2   trainLoss: 5.0887e-01   valLoss:4.6461e-01  time: 5.19e+00
epoch: 3   trainLoss: 4.2253e-01   valLoss:3.9987e-01  time: 5.07e+00
epoch: 4   trainLoss: 3.6052e-01   valLoss:3.5567e-01  time: 5.09e+00
epoch: 5   trainLoss: 3.1862e-01   valLoss:3.2774e-01  time: 5.07e+00
epoch: 6   trainLoss: 2.8169e-01   valLoss:2.9000e-01  time: 5.14e+00
epoch: 7   trainLoss: 2.6012e-01   valLoss:2.8451e-01  time: 5.21e+00
epoch: 8   trainLoss: 2.3791e-01   valLoss:2.4848e-01  time: 5.27e+00
epoch: 9   trainLoss: 2.1157e-01   valLoss:2.2721e-01  time: 5.31e+00
epoch: 10   trainLoss: 1.9208e-01   valLoss:2.2545e-01  time: 5.29e+00
epoch: 11   trainLoss: 1.7342e-01   valLoss:2.1352e-01  time: 5.28e+00
epoch: 12   trainLoss: 1.6153e-01   valLoss:1.9441e-01  time: 5.22e+00
epoch: 13   trainLoss: 1.4882e-01   valLoss:1.6108e-01  time: 5.29e+00
epoch: 14   trai

In [18]:
histDf = pd.DataFrame(history)
histDf.index = histDf.index.rename('epoch')
alt.Chart(histDf.reset_index()).transform_fold(
        ['train', 'val'],
        as_=['metric', 'value']
    ).mark_line().encode(
        alt.X('index:Q'),
        alt.Y('value:Q', axis=alt.Axis(title='loss')),
        color=alt.Color('metric:N'),
        tooltip=['index:Q', 'value:Q']
    ).properties(width=400, height=200)
histDf

Unnamed: 0_level_0,train,val
epoch,Unnamed: 1_level_1,Unnamed: 2_level_1
0,0.830739,0.881189
1,0.631611,0.805666
2,0.508872,0.464607
3,0.422534,0.399872
4,0.360520,0.355670
...,...,...
95,0.038734,0.045238
96,0.040373,0.045654
97,0.037690,0.043101
98,0.037591,0.044192


## 4. Transfer learning study

In [None]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
trainDataFiles = glob.glob(os.path.join(dataDir, 'design_7*.csv'))
trainDataFiles.remove(doeFile)

allResults = []
for trainDataFile in trainDataFiles:
    trainData = loadGhGraphs(trainDataFile, NUM_DV=5)
    trainSize = len(trainData)
    print(f'loaded train set of size {trainSize}')
    
    rf = PointRegressor('Random Forest')
    rf.trainModel(trainData, trainData, 
                     flatten=False, 
                     logTrans=False, 
                     ssTrans=True, 
                     saveDir=saveDir+f'{trainSize}/rf/')

    trainRes = rf.testModel(trainData)
    trainRes['set'] = 'Train'
    trainRes['train size'] = trainSize
    allResults.append(trainRes)
    
    testRes = rf.testModel(testData)
    testRes['set'] = 'Test'
    testRes['train size'] = trainSize
    allResults.append(testRes)
    
pd.DataFrame(allResults)