# Transfer learning tests
Eamon Whalen

In [1]:
import sys
import os
import glob
import numpy as np
import pandas as pd
import altair as alt

sys.path.append('./models')
from feastnetSurrogateModel import FeaStNet
from pointRegressorSurrogateModel import PointRegressor

sys.path.append('./readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('./visualization')
from altTrussViz import plotTruss, interactiveErrorPlot

sys.path.append('./util')
from gcnSurrogateUtil import *

## 1. Load test data

In [None]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
testFile = os.path.join(dataDir, 'design_7_N_1000.csv')
allGraphsUnfiltered = loadGhGraphs(doeFile, NUM_DV=5)

maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphsUnfiltered]
source = pd.DataFrame(maxes, columns=['maxes'])
source.describe()

In [None]:
testGraphs = filterbyDisp(allGraphsUnfiltered, 0.9)
maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in testGraphs]
source = pd.DataFrame(maxes, columns=['maxes'])
maxDispCutoff = source.max()
source.describe()

## 2. Load pre-train data

In [11]:
pretrainFiles = glob.glob(os.path.join(dataDir, '*1000.csv'))
pretrainFiles.remove(testFile)

allPretrainGraphs = []
for pretrainFile in pretrainFiles:
    designName = pretrainFile.split('/')[-1].split('_N')[0]
    print(f'loading {designName}')
    graphsUnfiltered = loadGhGraphs(pretrainFile, NUM_DV=5)
    graphs = filterbyDisp(graphsUnfiltered, 0.9)
    allPretrainGraphs.extend(graphs)

print(f'loaded {len(allPretrainGraphs)} pretraining graphs')
trainData, valData, testData = partitionGraphList(allGraphs)

loading design_9
loading design_6
loading design_8
loading design_5
loaded 3600 pretraining graphs


## 3. Initial training

In [None]:
saveDir = './results/transferLrn01/'
ptrGcn = FeaStNet()
history = ptrGcn.trainModel(allPretrainGraphs, valData, 
                         epochs=100, 
                         batch_size=256, 
                         flatten=True, 
                         logTrans=False, 
                         ssTrans=True, 
                         saveDir=saveDir+f'preTrain/gcn/)

plotHistory(history)

## 4. Transfer learning study

In [20]:
dataDir = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/"
trainDataFiles = glob.glob(os.path.join(dataDir, 'design_7*.csv'))
trainDataFiles.remove(doeFile)

allResults = []
for trainDataFile in trainDataFiles:
    trainData = loadGhGraphs(trainDataFile, NUM_DV=5)
    trainSize = len(trainData)
    print(f'loaded train set of size {trainSize}')
    
    rf = PointRegressor('Random Forest')
    rf.trainModel(trainData, trainData, 
                     flatten=False, 
                     logTrans=False, 
                     ssTrans=True, 
                     saveDir=saveDir+f'{trainSize}/rf/')

    trainRes = rf.testModel(trainData)
    trainRes['set'] = 'Train'
    trainRes['train size'] = trainSize
    allResults.append(trainRes)
    
    testRes = rf.testModel(testData)
    testRes['set'] = 'Test'
    testRes['train size'] = trainSize
    allResults.append(testRes)
    
pd.DataFrame(allResults)

30
trained 30 random forest models in 3.63 seconds
10
trained 30 random forest models in 3.40 seconds


Unnamed: 0,mse,mae,mre,peakR2,maxAggR2,meanAggR2,minAggR2,set,train size
0,0.000171,0.004807,0.110205,0.945597,0.953342,0.907048,0.788241,Train,30
1,0.000301,0.007306,0.239722,-6.456106,0.566105,-1.629988,-15.108212,Test,30
2,0.00059,0.009096,0.163803,0.866579,0.922484,0.845893,0.788937,Train,10
3,0.000219,0.009061,0.306289,-2.643653,0.394297,-1.249129,-13.83298,Test,10
