# Notebook to fit XGB and ANN models for Gij

In [None]:
import numpy as np
%matplotlib notebook
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error
from mpl_toolkits.mplot3d import axes3d, Axes3D 
import os
import time
import operator
import random
import matplotlib.ticker as ticker
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PowerTransformer,QuantileTransformer, RobustScaler
import pickle
from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Activation, Dropout, GaussianNoise, Conv2D, MaxPooling2D, Flatten, Conv1D,MaxPooling1D
from keras.losses import logcosh
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import regularizers
import glob
from keras import optimizers
from keras import Input, optimizers, layers
import math
import xgboost as xgb
import tensorflow as tf


In [None]:
# Function to read the relevant data
def read_data(loc,d,delU):
    Gij = np.loadtxt(loc+'/Gij_'+d+'.dat')
    Tij = np.loadtxt(loc+'/Tij_'+d+'.dat')
    K   = np.loadtxt(loc+'/k-eps_'+d+'.dat')    
    meanVelGrad = np.loadtxt(loc+'/mean_vel_grad_'+d+'.dat')
    Lij = np.loadtxt(loc+'/lambda_'+d+'.dat')
    Reyn = np.loadtxt(loc+'/Reyn_'+d+'.dat')
    rho = np.loadtxt(loc+'/rho_'+d+'.dat')
    mv = np.loadtxt(loc+'/mean_vel_'+d+'.dat')
    
    mvgg = np.loadtxt(loc+'/mvgg_'+d+'.dat')
    
    idel = [3, 6, 7, 12, 15, 16, 21, 24, 25]
    
    mvgg = np.delete(mvgg,idel,axis=1)
    delta = np.ones(Reyn.shape[0])*np.sum(rho*(delU/2-mv[:,0])*(delU/2+mv[:,0]))*7.34e-6/(0.68*delU**2)
    x = np.linspace(0,0.0423,576)
    x = x-0.0423/2.0
    eps = np.copy(K[:,1])
    K   = K[:,0]
    index = np.where(K>0.05*np.max(K))
    t_time = np.zeros(576)
    t_time[:] = int(d)*3.38e-7
    
    loend = index[0][0]
    hiend = index[0][-1]

    return Gij, Tij, meanVelGrad, eps, K, Lij, Reyn, delta,x,t_time,mvgg

    

    

In [None]:
# Forms the input features as described in the paper
def get_basis_vect(b,Sij,omega,meanVelGrad,Reyn,Tij,eps,K,Lij,x,t_time):
        # inputs
        basis_vect = np.zeros((b.shape[0],3,3,24))
        C = np.transpose(meanVelGrad,(0,2,1))

        basis_vect[:,:,:,0] = b
        basis_vect[:,:,:,1] = meanVelGrad
        basis_vect[:,:,:,2] = C
        basis_vect[:,:,:,3] = Lij
        
        basis_vect[:,:,:,4] = np.matmul(meanVelGrad,Lij)
        basis_vect[:,:,:,5] = np.matmul(Lij,Lij)
        
        basis_vect[:,:,:,6] = np.matmul(Lij,meanVelGrad)
        
        basis_vect[:,:,:,7] = np.matmul(C,Lij)
        basis_vect[:,:,:,8] = np.matmul(Lij,C)
        
        basis_vect[:,:,:,9] = np.matmul(C,b)
        basis_vect[:,:,:,10] = np.matmul(b,C)
        basis_vect[:,:,:,11] = np.matmul(b,Lij)
        
        
        
        basis_vect[:,:,:,12] = np.matmul(meanVelGrad,b)
        basis_vect[:,:,:,13] = np.matmul(b,meanVelGrad)
        
                
        basis_vect[:,:,:,14] = np.trace(np.matmul(b,b),axis1=1,axis2=2)[:,None,None]
        
        basis_vect[:,:,:,15] = np.trace(C,axis1=1,axis2=2)[:,None,None]
        basis_vect[:,:,:,16] = np.trace(np.matmul(C,C),axis1=1,axis2=2)[:,None,None]
        basis_vect[:,:,:,17] = np.trace(Lij,axis1=1,axis2=2)[:,None,None]
        basis_vect[:,:,:,18] = np.trace(np.matmul(Lij,Lij),axis1=1,axis2=2)[:,None,None]
        
        basis_vect[:,:,:,19] = np.trace(np.matmul(meanVelGrad,b),axis1=1,axis2=2)[:,None,None]
    
        basis_vect[:,:,:,20] = np.trace(np.matmul(b,Lij),axis1=1,axis2=2)[:,None,None]
        basis_vect[:,:,:,21] = np.trace(np.matmul(meanVelGrad,Lij),axis1=1,axis2=2)[:,None,None]
        
        basis_vect[:,:,:,22] = eps[:,None,None]
        basis_vect[:,:,:,23] = K[:,None,None]
        
        
        return basis_vect       


In [None]:
# Returns data for a single time instant
def get_training_data_time(dir_loc,time,normalise=False,normalise_const=True,delU=100):

    files = sorted(glob.glob(dir_loc+'accu*.dat'))
    indexes = [F[-7:-4] for F in files]        
    loindices = np.zeros(len(indexes)+1)
    Gij, Tij , meanVelGrad , eps, K, Lij, Reyn, delta, x, t_time,mvgg = read_data(dir_loc,indexes[time],delU)
    loindices[1] = K.shape[0]
    
    b = np.copy(Tij)
    b[:,0] = b[:,0]-2.0*K/3.0
    b[:,4] = b[:,4]-2.0*K/3.0
    b[:,8] = b[:,8]-2.0*K/3.0
    Gij = np.reshape(Gij,(Gij.shape[0],3,3))
    Tij = np.reshape(Tij,(Tij.shape[0],3,3))
    meanVelGrad = np.reshape(meanVelGrad,(meanVelGrad.shape[0],3,3))
    b = np.reshape(b,(b.shape[0],3,3))
    Lij = np.reshape(Lij,(Lij.shape[0],3,3))

    Sij = np.zeros((b.shape[0],3,3))
    omega = np.zeros((b.shape[0],3,3))
    
    for i in range(Sij.shape[0]):
        Sij[i,:,:] = 0.5*(meanVelGrad[i,:,:]+np.transpose(meanVelGrad[i,:,:]))
        omega[i,:,:] = 0.5*(meanVelGrad[i,:,:]-np.transpose(meanVelGrad[i,:,:]))
        
    b = b/(2.0*K[:,None,None])
    if(normalise):    
        Gij = Gij*K[:,None,None]/eps[:,None,None]
        Tij = Tij/K[:,None,None]
        meanVelGrad = meanVelGrad*K[:,None,None]/eps[:,None,None]
        Sij = Sij*K[:,None,None]/eps[:,None,None]
        omega = omega*K[:,None,None]/eps[:,None,None]
        Lij = Lij*K[:,None,None]
    
    if(normalise_const):
        
        Gij = Gij*delta[:,None,None]/delU
        #Gij = Gij*delta[:,None,None]/delU**3        
        Tij = Tij/delU**2
        meanVelGrad = meanVelGrad*delta[:,None,None]/delU
        Sij = Sij*delta[:,None,None]/delU
        omega = omega*delta[:,None,None]/delU
        eps = eps*delta/delU**3
        K = K/delU**2
        Lij = Lij*delU**2
        t_time = t_time*delU/delta

    basis_vect = get_basis_vect(b,Sij,omega,meanVelGrad,Reyn,Tij,eps,K,Lij,x,t_time)
    basis_vect = np.moveaxis(basis_vect,0,-2)
    Gij = np.moveaxis(Gij,0,-1)
    Tij = np.moveaxis(Tij,0,-1)
    Lij = np.moveaxis(Lij,0,-1)
    b = np.moveaxis(b,0,-1)
    meanVelGrad = np.moveaxis(meanVelGrad,0,-1)
    Sij = np.moveaxis(Sij,0,-1)
    omega = np.moveaxis(omega,0,-1)
        
    return Gij, Tij, meanVelGrad, b, Sij, omega, Lij, eps, K, basis_vect, loindices.astype(int), delta, x


In [None]:
# Returns data for a range of time instants determined by the input parameters
# Consider data directory consists of files numbered from 1 to 100
# ist = index of first file to read
# nt = how many files to read
# stp = step size, i.e. next file will be ist+stp
def get_data(direc,ist,nt,stp, delU,nfs,i1,j1):
            Gij, _, _, _, _, _, Lij, _, _, basis_vect, _, _, _ = get_training_data_time(direc,ist,False,False,delU)
            idxs = np.zeros(math.ceil((nt)/stp)+1)
            idxs[0]=0
            X = np.copy(basis_vect[i1,j1,:,0:nfs])
            Y = np.copy(Gij[i1,j1,:])
            j=1
            for i in range(ist+stp,ist+nt,stp):
                idxs[j] = idxs[j-1]+Gij.shape[2]
                Gij, _, _, _, _, _, Lij, _, _, basis_vect, _, _, _ = get_training_data_time(direc,i,False,False,delU)
                X = np.append(X,basis_vect[i1,j1,:,0:nfs],axis=0)
                Y = np.append(Y,Gij[i1,j1,:],axis=0)
                j=j+1
            idxs[j] = X.shape[0]    
            
            return X,Y,idxs.astype(int)

In [None]:
# Returns train, test and validation data for case S1, for a component i1, j1
def get_train_test_val_S1_data(i1,j1,scale='std'):
    
    # Divide data into train, test and validation dataset
    nfs=24
    npts=576
    np.random.seed(10)
    # Randomly divide training dataset into train and validation
    nT = [i for i in range(0,40)]
    np.random.shuffle(nT)

    
    X, Y, _ = get_data('./data/CaseF_scaled/',0,80,2,100,nfs,i1,j1)

    
    npts=576
    X_train = np.zeros((npts*30,nfs))
    Y_train = np.zeros((npts*30,1))

    X_val = np.zeros((npts*10,nfs))
    Y_val = np.zeros((npts*10,1))

    for i in range(0,30):
        X_train[i*npts:(i+1)*npts,:] = X[nT[i]*npts:(nT[i]+1)*npts,:]
        Y_train[i*npts:(i+1)*npts,:] = Y[nT[i]*npts:(nT[i]+1)*npts,None]
    j=0
    for i in range(30,40):
        X_val[j*npts:(j+1)*npts,:] = X[nT[i]*npts:(nT[i]+1)*npts,:]
        Y_val[j*npts:(j+1)*npts,:] = Y[nT[i]*npts:(nT[i]+1)*npts,None]
        j=j+1
   
    Y_un = np.copy(Y_train)

    X_test, Y_test, _ = get_data('./data/CaseF_scaled/',80,20,2,100,nfs,i1,j1)

    

    X_train, X_test, X_val, Y_train, Y_test, Y_val = scale_data(X_train, X_test,  X_val, Y_train, Y_test,  Y_val,scale)
    return X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un


In [None]:
# Returns train, test and validation data for case S2, for a component i1, j1
def get_train_test_val_S2_data(i1,j1,scale='std'):
    nfs=24
    
    
    
    npts=576
    np.random.seed(10)
    nT = [i for i in range(0,100)]
    np.random.shuffle(nT)

    X, Y, _ = get_data('./data/CaseF_scaled/',0,100,2,100,nfs,i1,j1)
    X1, Y1, _ = get_data('./data/CaseC_scaled/',50,150,3,100,nfs,i1,j1)
    
    X = np.append(X,X1,axis=0)
    Y = np.append(Y,Y1,axis=0)
    
    X_train = np.zeros((npts*80,nfs))
    Y_train = np.zeros((npts*80,1))

    X_val = np.zeros((npts*20,nfs))
    Y_val = np.zeros((npts*20,1))
    
    for i in range(0,80):
        X_train[i*npts:(i+1)*npts,:] = X[nT[i]*npts:(nT[i]+1)*npts,:]
        Y_train[i*npts:(i+1)*npts,:] = Y[nT[i]*npts:(nT[i]+1)*npts,None]
    j=0
    for i in range(80,100):
        X_val[j*npts:(j+1)*npts,:] = X[nT[i]*npts:(nT[i]+1)*npts,:]
        Y_val[j*npts:(j+1)*npts,:] = Y[nT[i]*npts:(nT[i]+1)*npts,None]
        j=j+1
        
    minY = np.min(Y_train)
    maxY = np.max(Y_train)
    stdY = np.std(Y_train)
    meanY = np.mean(Y_train)
    Y_un = np.copy(Y_train)

    X_test, Y_test, _ = get_data('./data/50_new_scaled/',15,80,5,150,nfs,i1,j1)
    
    
    X_train, X_test, X_val, Y_train, Y_test, Y_val = scale_data(X_train, X_test,  X_val, Y_train, Y_test,  Y_val,scale)
    return X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un


In [None]:
X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un = get_train_test_val_S2_data(0,0)

In [None]:
# Various scaling strategies, usually std and minmax give the best results
def scale_data(X_train,X_test, X_val, Y_train, Y_test, Y_val, which):
    
    if(which == 'std'):
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)
        X_val = sc.transform(X_val)
        Y_train = sc.fit_transform(Y_train.reshape(-1,1))
        Y_test = sc.transform(Y_test.reshape(-1,1))
        Y_val = sc.transform(Y_val.reshape(-1,1))
        return X_train, X_test, X_val, Y_train, Y_test, Y_val
    
    if(which == 'mean'):
        meanX   = np.mean(X_train,axis=0)
        stdX    = np.std(X_train,axis=0)
    
        meanY   = np.mean(Y_train)
        stdY    = np.std(Y_train)
    
        Y_train = (Y_train)/meanY
        Y_test = (Y_test)/meanY
        Y_val   = (Y_val)/meanY
        
        for i in range(X_train.shape[1]):
            X_train[:,i] = (X_train[:,i])/meanX[i]
            X_test[:,i] = (X_test[:,i])/meanX[i]
            X_val[:,i] = (X_val[:,i])/meanX[i]
        return X_train, X_test, X_val, Y_train, Y_test, Y_val
    
    
    if(which=='minmax'):
        minX   = np.min(X_train,axis=(0))
        maxX    = np.max(X_train,axis=(0))
    
        minY   = np.min(Y_train)
        maxY    = np.max(Y_train)
    
        Y_train = (Y_train-minY)/(maxY-minY)
        Y_test = (Y_test-minY)/(maxY-minY)
        Y_val   = (Y_val-minY)/(maxY-minY)
        #Y_train = (Y_train)/(maxY-minY)
        #Y_test = (Y_test)/(maxY-minY)
        #Y_val   = (Y_val)/(maxY-minY)
    
        for i in range(X_train.shape[1]):
            X_train[:,i] = (X_train[:,i]-minX[i])/(maxX[i]-minX[i])
            X_test[:,i] = (X_test[:,i]-minX[i])/(maxX[i]-minX[i])            
            X_val[:,i] = (X_val[:,i]-minX[i])/(maxX[i]-minX[i])
            
        return X_train, X_test, X_val, Y_train[:,None], Y_test[:,None], Y_val[:,None]
    
    if(which=='max'):
        maxX    = np.max(np.abs(X_train),axis=(0))
    
        maxY    = np.max(np.abs(Y_train))
    
        Y_train = (Y_train)/(maxY)
        Y_test = (Y_test)/(maxY)
        Y_val   = (Y_val)/(maxY)
        
    
        for i in range(X_train.shape[1]):
            X_train[:,i] = (X_train[:,i])/(maxX[i])
            X_test[:,i] = (X_test[:,i])/(maxX[i])            
            X_val[:,i] = (X_val[:,i])/(maxX[i])
            
        return X_train, X_test, X_val, Y_train, Y_test, Y_val
    
    if(which == 'yeo'):
            pt = PowerTransformer(standardize=True)
            X_train = pt.fit_transform(X_train)            
            X_test  = pt.transform(X_test)
            X_val   = pt.transform(X_val)
            Y_train  = pt.fit_transform(Y_train.reshape(-1,1))            
            Y_test  = pt.transform(Y_test.reshape(-1,1))
            Y_val   = pt.transform(Y_val.reshape(-1,1))
            print(pt.lambdas_)
            return X_train, X_test, X_val, Y_train, Y_test, Y_val
    
    if(which == 'quant'):
                pt = QuantileTransformer(output_distribution='normal')
                X_train = pt.fit_transform(X_train)
                X_test  = pt.transform(X_test)
                X_val   = pt.transform(X_val)
                Y_train = pt.fit_transform(Y_train.reshape(-1,1))
                Y_test  = pt.transform(Y_test.reshape(-1,1))
                Y_val   = pt.transform(Y_val.reshape(-1,1))
                return X_train, X_test, X_val, Y_train, Y_test, Y_val
    
    if(which=='log'):
               X_train = do_log(X_train)    
               X_test = do_log(X_test)    
               X_val = do_log(X_val)
               Y_train = do_log2(Y_train)
               Y_test = do_log2(Y_test)
               Y_val = do_log2(Y_val)
               X_train, X_test, X_val, Y_train, Y_test, Y_val = scale_data(X_train, X_test,  X_val, Y_train, Y_test,  Y_val,'std')

               return X_train, X_test, X_val, Y_train, Y_test, Y_val
    if(which=='exp'):
               X_train = X_train**0.3    
               X_test = X_test**0.3    
               X_val = X_val**0.3
               Y_train = Y_train**0.3
               Y_test = Y_test**0.3
               Y_val = Y_val**0.3
               X_train, X_test, X_val, Y_train, Y_test, Y_val = scale_data(X_train, X_test,  X_val, Y_train, Y_test,  Y_val,'std')

               return X_train, X_test, X_val, Y_train, Y_test, Y_val
    if(which=='median'):
        RS = RobustScaler()
        X_train = RS.fit_transform(X_train)
        X_test = RS.transform(X_test)
        X_val = RS.transform(X_val)
        Y_train = RS.fit_transform(Y_train.reshape(-1,1))
        Y_test = RS.transform(Y_test.reshape(-1,1))
        Y_val = RS.transform(Y_val.reshape(-1,1))
        return X_train, X_test, X_val, Y_train, Y_test, Y_val


        

    

In [None]:
# Inverse scaling
def do_inverse_scaling(Y_sc, Y_un, which):
    if(which=='std'):
        meanY = np.mean(Y_un)
        stdY = np.std(Y_un)
        return Y_sc*stdY + meanY
    if(which=='minmax'):
        minY = np.min(Y_un)
        maxY = np.max(Y_un)
        return Y_sc*(maxY-minY)+minY
    if(which=='median'):
        RS = RobustScaler()
        RS.fit(Y_un.reshape(-1,1))
        return RS.inverse_transform(Y_sc.reshape(-1,1))

In [None]:
# Template XGB model, input takes the number of estimators
# See XGB reference for details
def get_model_XGB(nest):
    model = xgb.XGBRegressor(versbosity=0,booster='gbtree',objective='reg:squarederror',max_depth=3, \
                               learning_rate=0.1,n_estimators=nest,n_jobs=8,importance_type='gain')
    return model
        
               
   


In [None]:
# returns mean squared error in the predictions of model
def get_score(model, X, Y, Y_un, which):
    pred = model.predict(X)
    pred = do_inverse_scaling(pred,Y_un,which)
    Y    = do_inverse_scaling(Y,Y_un,which)
    npts=576
    sc = np.zeros(int(Y.shape[0]/npts))
    for i in range(0,sc.shape[0]):
        sc[i]=np.sqrt(mean_squared_error(Y[i*npts:(i+1)*npts],pred[i*npts:(i+1)*npts])/np.mean(Y[i*npts:(i+1)*npts]**2))
        #sc[i]=r2_score(Y[i*576:(i+1)*576],pred[i*576:(i+1)*576])
    return sc

In [None]:
# This cell will fit an XGB model for each component of Gij and save the trained model
reg = 500

for i in range(3):
    for j in range(3):
        print(i,j)
        X_train, X_test, X_val,Y_train,Y_test, Y_val, Y_un = get_train_test_val_S2_data(i,j,'std')

        model = get_model_XGB(reg)
        model.fit(X_train,Y_train,eval_set=[(X_val,Y_val)],early_stopping_rounds=40,verbose=False)
    
        model.save_model('models/Gij_{}_{}_S2_XGB.model'.format(i,j))
    

In [None]:
# Template of ANN, takes as input 
# 1) reg : L2 regularisation weight
# 2) dimi: input dimensions
def get_model(reg,dimi):
        model = Sequential()
        model.add(Dense(8,input_dim=dimi,use_bias=True,kernel_regularizer=regularizers.l2(reg)))
        model.add(layers.LeakyReLU(alpha=0.01))
        model.add(Dense(6, use_bias=True,kernel_regularizer=regularizers.l2(reg)))
        model.add(layers.LeakyReLU(alpha=0.01))
        model.add(Dense(4,use_bias=True,kernel_regularizer=regularizers.l2(reg)))
        model.add(layers.LeakyReLU(alpha=0.01))
       
        opt = optimizers.Adam(learning_rate=0.001)
        model.add(Dense(1,activation='linear'))
        model.compile(optimizer=opt,loss='mean_squared_error')
        return model

In [None]:
# This cell will train ANN for various values of regularisation weight for each component,
# the model which exhibits smallest error in the validation data will be saved and used later for predictions

reg = [0,1e-5, 5e-5, 1e-4, 5e-4,1e-3,5e-3]
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=20,restore_best_weights=True)

for i in range(0,3):
    for j in range(0,3):
        models=[]
        X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un = get_train_test_val_S2_data(i,j,'std')
        
        scs = np.zeros((int(X_val.shape[0]/576),len(reg)))
        sctr = np.zeros((int(X_train.shape[0]/576),len(reg)))
        sct = np.zeros((int(X_test.shape[0]/576),len(reg)))
        for ik, k  in enumerate(reg):
            print(i,j)
            
            model = get_model(k,24)
            models.append(model)
            h=model.fit(X_train,Y_train,epochs=200,batch_size=512,validation_data=(X_val,Y_val),callbacks=[es],shuffle=True)
    
            scs[:,ik] = get_score(model, X_val, Y_val, Y_un, 'std')
            sct[:,ik] = get_score(model, X_test, Y_test, Y_un, 'std')
            sctr[:,ik] = get_score(model, X_train, Y_train, Y_un, 'std')
        idx = np.argmin(np.mean(scs,axis=0))
        print(idx)
        models[idx].save('./models/Gij_{}_{}_S2_ANN.h5'.format(i,j))
    #scv2[:,i] = get_score(model, X_val2, Y_val2, Y_un, 'std')
    
    

In [None]:
# Will return Gij for DNS and predictions from ANN and XGB, change the input data to get results for case S1 and S2
def get_Gij_predictions(case='S1', scale='std'):
    
    nfs = 24
    DNS = np.zeros((576*16,3,3))
    PRED = np.zeros((576*16,3,3))
    PRED_ANN = np.zeros((576*16,3,3))
    
    for i1 in range(0,3):
        for j1 in range(0,3):
            print(i1,j1)
           
            if(case=='S1'):
                X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un = get_train_test_val_S1_data(i1,j1,scale)
            else:
                X_train, X_test, X_val, Y_train, Y_test, Y_val, Y_un = get_train_test_val_S2_data(i1,j1,scale)
            

            #print(Y_test.shape)
            DNS[:,i1,j1] = Y_test[:,0]           
            model = get_model_XGB(500)
            model.load_model('models/Gij_{}_{}_{}_XGB.model'.format(i1,j1,case))
            model_ANN  = tf.keras.models.load_model('models/Gij_{}_{}_{}_ANN.h5'.format(i1,j1,case))
            PRED[:,i1,j1] = model.predict(X_test)
            PRED[:,i1,j1] = do_inverse_scaling(PRED[:,i1,j1],Y_un,scale)
            
            PRED_ANN[:,i1,j1] = model_ANN.predict(X_test)[:,0]
            PRED_ANN[:,i1,j1] = do_inverse_scaling(PRED_ANN[:,i1,j1],Y_un,scale)
            
            
            DNS[:,i1,j1] = do_inverse_scaling(DNS[:,i1,j1],Y_un,scale)
   
    return PRED, PRED_ANN, DNS

In [None]:
PRED_XGB, PRED, DNS = get_Gij_predictions('S2')

In [None]:
    #Figure 3 and 4
    
    plt.figure(figsize=(14,10))
    k=8
    x = np.linspace(0,0.423e-2,576)
    x=x-0.423e-2/2
    x=x*100
    for i in range(0,3):
      for j in range(0,3):
        plt.subplot(3,3,i*3+j+1) 
        
        plt.plot(x,(DNS[k*576:(k+1)*576,i,j]-min(DNS[k*576:(k+1)*576,i,j]))/(max((DNS[k*576:(k+1)*576,i,j]))-min(DNS[k*576:(k+1)*576,i,j])),'--x',linewidth=2,markevery=12,markeredgewidth=1,markersize=8)
        plt.plot(x,(PRED[k*576:(k+1)*576,i,j]-min(DNS[k*576:(k+1)*576,i,j]))/(max((DNS[k*576:(k+1)*576,i,j]))-min(DNS[k*576:(k+1)*576,i,j])),linewidth=2)
        plt.plot(x,(PRED_XGB[k*576:(k+1)*576,i,j]-min(DNS[k*576:(k+1)*576,i,j]))/(max((DNS[k*576:(k+1)*576,i,j]))-min(DNS[k*576:(k+1)*576,i,j])),'.',linewidth=2)

        
        plt.locator_params(axis='y', nbins=3)
        plt.locator_params(axis='x', nbins=3)
        if(i==0 and j==0):
            plt.legend(('DNS','ANN','XGB'))
        #if(i==1 and j==1):
            #plt.ylim((\))
            #plt.yticks([-0.5, 0, 0.5])
        #plt.ylim((-0.1,1.1))
        plt.subplots_adjust(wspace=0.05)
        plt.subplots_adjust(hspace=0.05)
        plt.xlabel('$Y (cm)$')

        if(i<2):
        #plt.gca().axes.get_yaxis().set_visible(False)
            #plt.gca().axes.get_xaxis().set_visible(False)
            plt.xticks([-0.2,0.0,0.2],"")
            plt.xlabel("")
        if(j>0):
            #plt.gca().axes.get_yaxis().set_visible(False)
            plt.yticks([0,0.5,1],"")
        plt.grid(alpha=0.5)
        #x = np.linspace(min(Gij[i,j,:]*eps/K),max(Gij[i,j,:]*eps/K),10)        
        ax=plt.gca()
        plt.text(0.1,0.9,str(i+1)+','+str(j+1),transform=ax.transAxes,bbox=dict(facecolor='white',alpha=0.5),size=15)
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 
            ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(15)
        #plt.plot(x,x,'k--')
    #plt.savefig('Gij-ANN-XGB-S2.png',dpi=300,bbox_inches = "tight")     
    #plt.close()
            
            

In [None]:
# Error values for figure 5 and 8, give predictions for a case, will return error
def get_error(pred, DNS):
    errs = np.zeros(int(pred.shape[0]/576))
    for i in range(errs.shape[0]):
        errs[i] = np.sqrt(np.sum((DNS[i*576:(i+1)*576,:,:]-pred[i*576:(i+1)*576,:,:])**2))/np.sqrt(np.sum((DNS[i*576:(i+1)*576,:,:])**2))
    return errs

In [None]:
EANNS1 = get_error(PRED, DNS)
EXGBS1 = get_error(PRED_XGB, DNS)


In [None]:
# For Figure 5 and 8

plt.figure(figsize=(10,4))
plt.plot(EANNS1[0:10],'-x',linewidth=4,markersize=10,markeredgewidth=2)
#plt.plot(EXGBS1[0:10],'-x',linewidth=4,markersize=10,markeredgewidth=2)
#plt.plot(EANNS2[0:10],'-o',linewidth=4,markersize=10,markeredgewidth=2)
#plt.plot(EXGBS2[0:10],'-o',linewidth=4,markersize=10,markeredgewidth=2)

#plt.ylim(0,0.4)
plt.ylabel('$\epsilon_T$')
plt.xlabel('Sample')
plt.grid(alpha=0.2)
ax=plt.gca()
plt.yticks([0, 0.10,0.20])
#plt.legend(('$\epsilon_{3,1}$','$\epsilon_{3,2}$','$\epsilon_{3,3}$'),prop={'size': 18})
plt.legend(('$\epsilon$-ANN-$\mathcal{S}$1','$\epsilon$-XGB-$\mathcal{S}$1','$\epsilon$-ANN-$\mathcal{S}$2','$\epsilon$-XGB-$\mathcal{S}$2'),prop={'size': 16})

for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 
            ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(20)
#plt.savefig('eps-S1.png',dpi=300,bbox_inches = "tight")     
    #plt.close()
#plt.plot(MERRS[:,2,2]*100,'-x')
#plt.plot(m*100,'-x')




In [None]:
# Returns data needed to obtain Reynolds stresses and Gij for various models below
def get_data_RSS(direc,ist,nt,stp, delU):
            _, Tij, meanVelGrad, b, Sij, _, Lij, eps, K, _, _, _, _ = get_training_data_time(direc,ist,False,False,delU)
            for i in range(ist+stp,ist+nt,stp):
                _, Tij1, meanVelGrad1, b1, Sij1, _, Lij1, eps1, K1, _, _, _, _ = get_training_data_time(direc,i,False,False,delU)
                Tij = np.append(Tij, Tij1, axis=2)
                b = np.append(b, b1, axis=2)                
                meanVelGrad = np.append(meanVelGrad,meanVelGrad1,axis=2)
                Sij = np.append(Sij,Sij1,axis=2)
                Lij = np.append(Lij,Lij1,axis=2)
                eps = np.append(eps,eps1,axis=0)
                K = np.append(K,K1,axis=0)
            
                
            return Tij, b, meanVelGrad, Sij, Lij, eps, K

In [None]:
# Obtains Reynolds stress closure implied by the Gij values
# Reynolds stress closure means the values of unclosed terms in the Reynolds stress transport equation
def get_RSM(Gij,Tij):
    RSM = np.zeros((Gij.shape[0],3,3))
   
    for i in range(0,3):
        for j in range(0,3):
            for k in range(0,3):
                RSM[:,i,j]=RSM[:,i,j]+Gij[:,i,k]*Tij[:,k,j]+Gij[:,j,k]*Tij[:,k,i]
    return RSM

In [None]:
# Gij from LIPM model
def LIPM_model(C0,CIPM,alpha2,alpha3,beta1,beta2,beta3,gama5,gama6,b,Tij,meanVelGrad,eps,K,lambdaij,Sij):
    pred = np.zeros((3,3,K.shape[0]))
    for i in range(0,K.shape[0]):        
        #P = - 0.5*np.trace(np.matmul(Tij[:,:,i],np.transpose(meanVelGrad[:,:,i])) + np.transpose(np.matmul(Tij[:,:,i],np.transpose(meanVelGrad[:,:,i]))))
        #To be used when normalised by eps/K 
        P1 = b[:,:,i]*Sij[:,:,i]
        P = -2.0*P1.sum()*K[i]
        B3 = np.matmul(np.matmul(b[:,:,i],b[:,:,i]),b[:,:,i])
        
        alpha1 = -(0.5+3.0/4*C0)+3.0*alpha2*np.trace(B3)+0.5*CIPM*P/eps[i] #P is non dim so no omega here
        
        pred[:,:,i] = (alpha1*np.eye(3,3)+alpha2*b[:,:,i] + alpha3*np.matmul(b[:,:,i],b[:,:,i]))*eps[i]/K[i] + beta1*np.eye(3,3)*np.trace(meanVelGrad[:,:,i]) + \
        beta2*meanVelGrad[:,:,i] + beta3*np.transpose(meanVelGrad[:,:,i]) + gama5*np.matmul(b[:,:,i],meanVelGrad[:,:,i]) + \
        gama6*np.matmul(b[:,:,i],np.transpose(meanVelGrad[:,:,i]))
     
        pred[:,:,i] = pred[:,:,i]+0.5*C0*eps[i]*lambdaij[:,:,i]
       
    return pred

    

In [None]:
# LSSG Model
def LSSG_model(C0,C2SSG,C3SSG,C3SSGp,C3starSSG,C5SSG,b,Tij,meanVelGrad,eps,K,lambdaij,Sij):
    pred = np.zeros((3,3,K.shape[0]))
    # To be used when normalised with delta and delU
    for i in range(0,K.shape[0]):        
        P = - 0.5*np.trace(np.matmul(Tij[:,:,i],np.transpose(meanVelGrad[:,:,i])) + np.transpose(np.matmul(Tij[:,:,i],np.transpose(meanVelGrad[:,:,i]))))
        B3 = np.matmul(np.matmul(b[:,:,i],b[:,:,i]),b[:,:,i])
        alpha2 = 4.0-1.7*P/eps[i]
        alpha1 = -(0.5+3.0/4*C0)-1.0/4*C2SSG*np.trace(np.matmul(b[:,:,i],b[:,:,i])) + \
        (3.0*alpha2-3.0/4*C2SSG)*np.trace(B3) + 3.0/8*(C3SSG-C3starSSG*np.sqrt(np.trace(np.matmul(b[:,:,i],b[:,:,i]))))*P/eps[i] 
        alpha3 = 3.0/4*C2SSG-3.0*alpha2
        beta1 = -0.2
        beta2 = 3.0/8*(C3SSG-C3SSGp*np.sqrt(np.trace(np.matmul(b[:,:,i],b[:,:,i]))))+0.5
        beta3 = 3.0/8*(C3SSG-C3SSGp*np.sqrt(np.trace(np.matmul(b[:,:,i],b[:,:,i]))))-0.5
        gama5 = 3.0/2-3.0/4*C5SSG
        gama6 = -3.0/2+3.0/4*C5SSG
        
        pred[:,:,i] = (alpha1*np.eye(3,3)+alpha2*b[:,:,i] + alpha3*np.matmul(b[:,:,i],b[:,:,i]))*eps[i]/K[i] + beta1*np.eye(3,3)*np.trace(meanVelGrad[:,:,i]) + \
        beta2*meanVelGrad[:,:,i] + beta3*np.transpose(meanVelGrad[:,:,i]) + gama5*np.matmul(b[:,:,i],meanVelGrad[:,:,i]) + \
        gama6*np.matmul(b[:,:,i],np.transpose(meanVelGrad[:,:,i]))
        
        
        pred[:,:,i] = pred[:,:,i]+0.5*C0*eps[i]*lambdaij[:,:,i]
    return pred

    

In [None]:
# SLM model
def get_SLM(C0,eps,K,Lij):
    pred = np.zeros((3,3,K.shape[0]))

    for i in range(0,K.shape[0]):
        pred[:,:,i] = -(0.5+3.0/4*C0)*np.eye(3,3)*eps[i]/K[i] + 0.5*C0*eps[i]*Lij[:,:,i]

    return pred

In [None]:
# Get Reynolds stress closure for various models

Tij, b, meanVelGrad, Sij, Lij, eps, K = get_data_RSS('./data/50_new_scaled/',15,80,5,100)
LIPM  = LIPM_model(2.1,0.6,3.5,-3*3.5,-0.2,0.8,-0.2,0.6,-0.6,b,Tij,meanVelGrad,eps,K,Lij,Sij)
LSSG = LSSG_model(2.1,4.2,0.8,1.0,1.0,0.4,b,Tij,meanVelGrad,eps,K,Lij,Sij)

SLM = get_SLM(2.1,eps,K,Lij)

LIPM = np.moveaxis(LIPM,2,0)
LSSG = np.moveaxis(LSSG,2,0)
SLM  = np.moveaxis(SLM,2,0)
Tij  = np.moveaxis(Tij,2,0)

RSS_LIPM = get_RSM(LIPM,Tij)
RSS_LSSG = get_RSM(LSSG,Tij)
RSS_DNS = get_RSM(DNS,Tij)
RSS_XGB = get_RSM(PRED_XGB,Tij)
RSS_ANN = get_RSM(PRED,Tij)

RSS_SLM = get_RSM(SLM,Tij)


In [None]:
    # Figure 6 and 7
    
    plt.figure(figsize=(16,8))
    k=4
    x = np.linspace(0,0.423e-2,576)
    x=x-0.423e-2/2
    x=x/0.0109e-3
    #iindex = [(0,,1,2]
    n=1
    for i in range(0,3):
      for j in range(0,3):
        if(i>j):
            continue
            
        plt.subplot(2,3,n) 
        n=n+1
        plt.plot(x,RSS_DNS[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),'--x',linewidth=2,markevery=14,markeredgewidth=1.2,markersize=10)
        plt.plot(x,RSS_ANN[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),'--',linewidth=2)
        plt.plot(x,RSS_XGB[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),':',linewidth=2)
        plt.plot(x,RSS_SLM[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),linewidth=2,alpha=0.7)
        plt.plot(x,RSS_LSSG[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),linewidth=2,alpha=0.7)
        plt.plot(x,RSS_LIPM[k*576:(k+1)*576,i,j]/max(abs(RSS_DNS[k*576:(k+1)*576,i,j])),linewidth=2,alpha=0.7)
        
        
        plt.locator_params(axis='y', nbins=3)
        if(i==0 and j==0):
            plt.legend(('DNS','ANN','XGB','SLM','LSSG','LIPM'))
        #if(i==1 and j==1):
            #plt.ylim((\))
            #plt.yticks([-0.5, 0, 0.5])
        #plt.ylim((-0.1,1.2))
        plt.subplots_adjust(wspace=0.15)
        plt.subplots_adjust(hspace=0.05)
        plt.xlabel(r'$Y/\delta_{\theta}$')
        if(n==5):
            plt.ylim((-1,1.5))
        if(n==6):
            plt.ylim((-1.1,1.0))
        plt.xticks([-200,-100,0.0,100,200])

        if(n<5):
        #plt.gca().axes.get_yaxis().set_visible(False)
            #plt.gca().axes.get_xaxis().set_visible(False)
            plt.xticks([-200,-100,0.0,100,200],"")
            #plt.xlabel("")
        #if(j>0):
            #plt.gca().axes.get_yaxis().set_visible(False)
        #    plt.yticks([0,0.5,1],"")
        
        plt.grid(alpha=0.3)
        #x = np.linspace(min(Gij[i,j,:]*eps/K),max(Gij[i,j,:]*eps/K),10)        
        ax=plt.gca()
        plt.text(0.1,0.9,str(i+1)+','+str(j+1),transform=ax.transAxes,bbox=dict(facecolor='white',alpha=0.5),size=15)
        
        plt.text(0.68,0.8,"{:.2e}".format(max(abs(RSS_DNS[k*576:(k+1)*576,i,j]))),transform=ax.transAxes,bbox=dict(facecolor='white',alpha=0.5),size=12)
        
        for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + 
            ax.get_xticklabels() + ax.get_yticklabels()):
            item.set_fontsize(15)
        #plt.plot(x,x,'k--')
    #plt.savefig('RSS-ANN-XGB-S1.png',dpi=300,bbox_inches = "tight")     
    #plt.close()
            
            