In [1]:
import sys
import numpy as np
import pandas as pd
import altair as alt

sys.path.append('./models')
from feastnetSurrogateModel import FeaStNet

sys.path.append('./readers')
from loadGhGraphs import loadGhGraphs

sys.path.append('./visualization')
from altTrussViz import plotTruss, interactiveErrorPlot

sys.path.append('./util')
from gcnSurrogateUtil import *

In [2]:
doeFile = "/home/ewhalen/projects/data/trusses/2D_Truss_v1.3/design_7_N_1000.csv"
allGraphsUnfiltered = loadGhGraphs(doeFile, NUM_DV=5)

maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphsUnfiltered]
source = pd.DataFrame(maxes, columns=['maxes'])
source.describe()

Unnamed: 0,maxes
count,1000.0
mean,0.199516
std,2.524026
min,0.00657
25%,0.015425
50%,0.024334
75%,0.046981
max,67.325867


In [3]:
plotTruss(allGraphsUnfiltered[0], showDeformed=True, defScale=100)

In [4]:
allGraphs = filterbyDisp(allGraphsUnfiltered, 0.9)
trainData, valData, testData = partitionGraphList(allGraphs)

maxes = [max(np.abs(graph.y.numpy().flatten())) for graph in allGraphs]
source = pd.DataFrame(maxes, columns=['maxes'])
source.describe()

Unnamed: 0,maxes
count,900.0
mean,0.028952
std,0.019071
min,0.00657
25%,0.014951
50%,0.021719
75%,0.038367
max,0.097861


In [10]:
## random forest class
import sys
import os
from time import time
import numpy as np
import torch
import torch_geometric as tg
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import pickle

sys.path.append('./util')
from gcnSurrogateUtil import *

class RandomForest():
    
###############################################################################
    def __init__(self):
        super(RandomForest, self).__init__()
    
###############################################################################
    def logTrans(self, x):
        return np.sign(x)*np.log(10.0*np.abs(x)+1.0)
    
###############################################################################
    def invLogTrans(self, y):
        return np.sign(y)*(np.exp(np.abs(y))-1.0)/10.0
    
###############################################################################
    def fitSS(self, graphList):
        self.ss = StandardScaler()
        if self.flatten:
            allResponses = np.empty((0,1))
            for graph in graphList:
                allResponses = np.vstack([allResponses, graph.y.reshape(-1,1)])
        else:
            allResponses = np.empty((0,graphList[0].y.numpy().size))
            for graph in graphList:
                allResponses = np.vstack([allResponses, graph.y.reshape(1,-1)])
        self.ss.fit(allResponses)
        return
    
###############################################################################
    def applySS(self, graphList):
        transformedGraphList = [g.clone() for g in graphList] # deep copy
        for graph in transformedGraphList:
            if self.ssTrans:
                if self.flatten:
                    graph.y = torch.as_tensor(self.ss.transform(graph.y.reshape(-1,1).cpu()).reshape(-1,2), dtype=torch.float)
                else:
                    graph.y = torch.as_tensor(self.ss.transform(graph.y.reshape(1,-1).cpu()).reshape(-1,2), dtype=torch.float)
            if self.logTrans: 
                graph.y = self.logTrans(graph.y)
        return transformedGraphList
    
###############################################################################
    def applyInvSS(self, out):
        if self.logTrans: 
            out = self.invLogTrans(out)
        if self.ssTrans:
            if self.flatten:
                out = self.ss.inverse_transform(out.reshape(-1,1)).reshape(-1,2)
            else:
                out = self.ss.inverse_transform(out.reshape(1,-1)).reshape(-1,2)
        return out
    
###############################################################################
    def trainModel(self, trainGraphs, valGraphs, saveDir=None, flatten=False, logTrans=True, ssTrans=True):
        t = time()
        # data transformation
        self.flatten = flatten
        self.logTrans = logTrans
        self.ssTrans = ssTrans
        self.fitSS(trainGraphs)
        trainGraphsScaled = self.applySS(trainGraphs)
        valGraphsScaled = self.applySS(valGraphs)

        # put data in tabular form
        Xtrain = np.vstack([graph.pos.numpy().flatten() for graph in trainGraphs])
        Ytrain = np.vstack([graph.y.numpy().flatten() for graph in trainGraphs])
    
        # build regressor for each joint
        self.allModels = []
        for i in range(Xtrain.shape[1]):
            rf = RandomForestRegressor()
            rf.fit(Xtrain, Ytrain[:,i])
            self.allModels.append(rf)

        # save model
        if saveDir:
            if not os.path.exists(saveDir): os.makedirs(saveDir, exist_ok=True)
            self.checkptFile = os.path.join(saveDir, 'trainedRfModel')
            pickle.dump(self, self.checkptFile)
            
        print(f'trained {Xtrain.shape[1]} random forest models in {time()-t:.2f} seconds')
            
###############################################################################
    def predict(self, inputs):
        # prep data
        inputsScaled = self.applySS(inputs)
        
        for i in range(len(self.allModels)):
            PredTrain[:,i] = RF.predict(Xtrain)
            

        preds = []
        self.eval()
        with torch.no_grad():
            for batch in testLoader:
                batch.to(self.device)
                out = self(batch)
                p = self.applyInvSS(out.cpu().numpy())
                preds.append(p)
        return preds

###############################################################################
    def testModel(self, inputs, baselineRef=None, level='set'):
        preds = self.predict(inputs)
        if baselineRef: baselineRef = [b.y.cpu().numpy() for b in baselineRef]
        return computeFieldLossMetrics([g.y.cpu().numpy() for g in inputs], 
                                          preds, 
                                          baselineRef=baselineRef, level=level)
    
###############################################################################
    def loadModel(self, modelFile):
        return pickle.load(modelFile)
    
    
rf = RandomForest()
rf.trainModel(trainData, valData, 
             flatten=True, 
             logTrans=False, 
             ssTrans=True, 
             saveDir='./results/rf01/')

trainRes = rf.testModel(trainData)
testRes = rf.testModel(testData)
pd.DataFrame([trainRes, testRes], index=['train', 'test'])

trained 30 random forest models in 12.41 seconds


In [None]:
    testDir = os.path.join(studyName, f'{i}_trainsize_{len(trainIds)}') 
    trainGraphs = [allGraphs[runIds.index(i)].clone() for i in trainIds]
    testGraphs = [allGraphs[runIds.index(i)].clone() for i in testIds]
    
    Xtrain = np.vstack([graph.pos.numpy().flatten() for graph in trainGraphs])
    Xtest = np.vstack([graph.pos.numpy().flatten() for graph in testGraphs])
    Ytrain = np.vstack([graph.y.numpy().flatten() for graph in trainGraphs])
    Ytest = np.vstack([graph.y.numpy().flatten() for graph in testGraphs])
    PredTrain = np.zeros_like(Ytrain)
    PredTest = np.zeros_like(Ytest)
    
    # build regressor for each join
    for i in range(Xtrain.shape[1]):
        RF = RandomForestRegressor()
        RF.fit(Xtrain, Ytrain[:,i])
        PredTrain[:,i] = RF.predict(Xtrain)
        PredTest[:,i] = RF.predict(Xtest)
        
    trainMetrics = cu.computeFieldLossMetrics([Ytrain[i,:] for i in range(Ytrain.shape[0])], 
                                              [PredTrain[i,:] for i in range(Ytrain.shape[0])], 
                                              baselineRef=[Ytrain[i,:] for i in range(Ytrain.shape[0])],
                                              level='set')
    
    testMetrics = cu.computeFieldLossMetrics([Ytest[i,:] for i in range(Ytest.shape[0])], 
                                              [PredTest[i,:] for i in range(Ytest.shape[0])], 
                                              baselineRef=[Ytrain[i,:] for i in range(Ytrain.shape[0])],
                                                level='set')
    
    i += 1
    with open(resultFileName, 'a') as fp:
        if newResFile:
            colNames = ['train_size']+['train_'+i for i in trainMetrics.keys()] + ['test_'+i for i in testMetrics.keys()]
            fp.write(','.join(colNames)+'\n')
            newResFile = False
            
        stats = ','.join([str(len(trainIds))]+[str(v) for v in trainMetrics.values()]+[str(v) for v in testMetrics.values()])
        fp.write(f'{stats}\n')