In [1]:
import numpy as np
import pandas as pd
import matplotlib
%matplotlib notebook
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA 
from keras.models import Sequential, load_model
from keras.layers import Dense, Activation, Dropout, GaussianNoise
from keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from keras import regularizers
from mpl_toolkits.mplot3d import axes3d, Axes3D 
import keras.backend as K
from sklearn.cluster import KMeans, AgglomerativeClustering, FeatureAgglomeration
from sklearn.metrics import r2_score
from multiprocessing import Pool
import pickle 
import cantera as ct
from cantera import ck2cti
from scipy.interpolate import griddata, RegularGridInterpolator, LinearNDInterpolator
import h5py
from scipy.stats import binned_statistic, skew, binned_statistic_2d,binned_statistic_dd
import os
import time
from sklearn.utils.class_weight import compute_sample_weight
from scipy.signal import savgol_filter


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  return f(*args, **kwds)


In [None]:
composition={'AC3H5':[3,5,0,0],'C10H20':[10,20,0,0],'C12H24':[12,24,0,0],'C12H25O2':[12,25,2,0],'C12OOH':[12,25,2,0],'C2H2':[2,2,0,0],
 'C2H3':[2,3,0,0],'C2H3CHO':[3,4,1,0],'C2H4':[2,4,0,0],'C2H5':[2,5,0,0],'C2H6':[2,6,0,0],'C3H6':[3,6,0,0],'C4H7':[4,7,0,0],
'C4H81':[4,8,0,0],'C5H10':[5,10,0,0],'C5H9':[5,9,0,0],'C6H12':[6,12,0,0],'C7H14':[7,14,0,0],'C8H16':[8,16,0,0],'C9H18':[9,18,0,0],
 'CH2':[1,2,0,0],'CH2*':[1,2,0,0],'CH2CHO':[2,3,1,0],'CH2O':[1,2,1,0],'CH3':[1,3,0,0],'CH3O':[1,3,1,0],'CH4':[1,4,0,0],
 'CO':[1,0,1,0],'CO2':[1,0,2,0],'H':[0,1,0,0],'H2':[0,2,0,0],'H2O':[0,2,1,0],'H2O2':[0,2,2,0],'HCO':[1,1,1,0],
 'HO2':[0,1,2,0],'N2':[0,0,0,2],'NC12H26':[12,26,0,0],'C3H7':[3,7,0,0],'O':[0,0,1,0],'O2':[0,0,2,0],'O2C12H24OOH':[12,25,4,0],
 'OC12H23OOH':[12,24,3,0],'OH':[0,1,1,0],'C4H9':[4,9,0,0],'PXC10H21':[10,21,0,0],'PXC12H25':[12,25,0,0],'PXC5H11':[5,11,0,0],
 'C6H13':[6,13,0,0],'PXC7H15':[7,15,0,0],'C8H17':[8,17,0,0],'PXC9H19':[9,19,0,0],'S3XC12H25':[12,25,0,0],'SXC12H25':[12,25,0,0]}

specs=['AC3H5','C10H20','C12H24','C12H25O2','C12OOH','C2H2','C2H3','C2H3CHO','C2H4','C2H5','C2H6','C3H6','C4H7',
'C4H81','C5H10','C5H9','C6H12','C7H14','C8H16','C9H18','CH2','CH2*','CH2CHO','CH2O','CH3','CH3O','CH4',
 'CO','CO2','H','H2','H2O','H2O2','HCO','HO2','N2','NC12H26','C3H7','O','O2','O2C12H24OOH','OC12H23OOH','OH','C4H9','PXC10H21','PXC12H25','PXC5H11',
 'C6H13','PXC7H15','C8H17','PXC9H19','S3XC12H25','SXC12H25']
molar_mass = np.zeros(53)
aij = np.zeros((53,4))
j=0
for spec in specs:
    atoms = composition[spec]
    aij[j,:]=atoms
    molar_mass[j] = atoms[0]*12+atoms[1]+atoms[2]*16+atoms[3]*14
    j=j+1
atom_mass = np.zeros(4)    
atom_mass[0]=12
atom_mass[1]=1
atom_mass[2]=16
atom_mass[3]=14




In [2]:
 specs=['aC3H5','C10H20','C12H24','C12H25O2','C12OOH','C2H2','C2H3','C2H3CHO','C2H4','C2H5','C2H6','C3H6','C4H7',
'C4H81','C5H10','C5H9','C6H12','C7H14','C8H16','C9H18','CH2','CH2*','CH2CHO','CH2O','CH3','CH3O','CH4',
 'CO','CO2','H','H2','H2O','H2O2','HCO','HO2','N2','NC12H26','nC3H7','O','O2','O2C12H24OOH','OC12H23OOH','OH','pC4H9','PXC10H21','PXC12H25','PXC5H11',
 'PXC6H13','PXC7H15','PXC8H17','PXC9H19','S3XC12H25','SXC12H25']

In [None]:
specs_NP=['N2','H','O2','OH','O','H2','H2O','HO2','H2O2','CO2','CO','HCO','CH2O','CH2','CH3','C2H2','CH2*','CH3O','CH4','C2H4',
          'C2H6','C2H5','C2H3','aC3H5','CH2CHO','C2H3CHO','C3H6','nC3H7','C4H7','C4H81','pC4H9','C5H9','C5H10','PXC5H11','C6H12','PXC6H13',
          'C7H14','PXC7H15','C8H16','PXC8H17','C9H18','PXC9H19','C10H20','PXC10H21','C12H24','PXC12H25','S3XC12H25','SXC12H25',
          'NC12H26','C12H25O2','C12OOH','O2C12H24OOH','OC12H23OOH']


In [None]:
def get_mean_molar_mass(molar_mass,data):
    mean_mass = np.zeros((data.shape[0]))
    mean_mass = 1.0/np.sum(data[:,0:molar_mass.shape[0]]/molar_mass[None,:],1)
    
    return mean_mass

In [None]:
map_spec=[]
for i in range(0,53):
   for j in range(0,53):
       if(specs[i]==specs_NP[j]):
           print(specs[i],specs_NP[j],j,i)
           map_spec.append(j)

In [None]:
def get_data_2d(file):
    f = h5py.File(file,'r')
    dset = f['DATA'][:,:,3:54+3]
    T  = f['DATA'][:,:,2]
    dset=np.delete(dset,1,axis=2)
    dset = np.reshape(dset,(dset.shape[0]*dset.shape[1],53))
    T = np.reshape(T,(T.shape[0]*T.shape[1],1))
    data = dset[:,map_spec]
    data = np.append(data,T,axis=1)
    data = data[0:-1:2,:]
    return data
    

    
    

In [None]:
def get_reac_2d(file):
    f = h5py.File(file,'r')
    dset = f['DATA'][:,:,65:119]
    HRR  = -f['DATA'][:,:,60]
    dset=np.delete(dset,1,axis=2)
    dset = np.reshape(dset,(dset.shape[0]*dset.shape[1],53))
    HRR = np.reshape(HRR,(HRR.shape[0]*HRR.shape[1],1))
    data = dset[:,map_spec]
    data = np.append(data,HRR,axis=1)
    data = data[0:-1:2,:]
    return data
    

    
    

In [None]:
mean_mass = get_mean_molar_mass(molar_mass,data)

In [None]:
def get_atoms_conservation(data):
    out = np.zeros((data.shape[0],4))
    out = np.matmul((data[:,0:53]/molar_mass),aij*atom_mass)
    return out
   

In [None]:
gas = ct.Solution('nDodecane_sk54.xml')

In [None]:
def get_cp(data):
  enth_mass3d_p =[]
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    enth_mass3d_p.append(q1.cp_mass)
  return enth_mass3d_p

In [None]:
def get_enthalpy(data):
  enth_mass3d_p =[]
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    enth_mass3d_p.append(q1.enthalpy_mass)
  return enth_mass3d_p

In [None]:
def get_viscosity(data):
  enth_mass3d_p =[]
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    enth_mass3d_p.append(q1.viscosity)
  return enth_mass3d_p

In [None]:
def get_conductivity(data):
  enth_mass3d_p =[]
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    enth_mass3d_p.append(q1.thermal_conductivity)
  return enth_mass3d_p

In [None]:
def get_reaction(data):
  rr = np.zeros((data.shape[0],54))
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    rr[i,:]=q1.net_production_rates
  specs_cantera=gas.species_names
  specs_cantera.remove('AR')
  rr = np.delete(rr,1,1)
  map_spec=[]
  for i in range(0,53):
     for j in range(0,53):
         if(specs[i]==specs_cantera[j]):
             map_spec.append(j)
  rr = rr[:,map_spec]
  return rr

In [None]:
def get_diffusion(data):
  rr = np.zeros((data.shape[0],54))
  for i in range(0,data.shape[0]):
    comp = dict(zip(specs,data[i,0:53]))
    gas.TPY = data[i,53],6079500,comp
    q1 = ct.Quantity(gas)
    rr[i,:]=q1.mix_diff_coeffs_mass
  specs_cantera=gas.species_names
  specs_cantera.remove('AR')
  rr = np.delete(rr,1,1)
  map_spec=[]
  for i in range(0,53):
     for j in range(0,53):
         if(specs[i]==specs_cantera[j]):
             map_spec.append(j)
  rr = rr[:,map_spec]

  return rr

In [None]:
specs_cantera=gas.species_names
specs_cantera.remove('AR')  
map_spec=[]
for i in range(0,53):
   for j in range(0,53):
       if(specs[i]==specs_cantera[j]):
           print(specs[i],specs_cantera[j],j)
           map_spec.append(j)

In [None]:
def opt_est(Xt,data,nbins):
    
       
    cond_mean, _ , bins = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True)
    cond_mean[np.isnan(cond_mean)]=0
    bins = bins-1
    bins[bins==nbins]=nbins-1
    pred = np.zeros(Xt.shape[0])
    #for i in range(0,Xt.shape[0]):
    pred=cond_mean[bins[0,:],bins[1,:],bins[2,:],bins[3,:],bins[4,:]]
    return pred

In [None]:
def get_table_noholes(Xt,data,Xt3d,nbins):
           
    grid = np.linspace(np.min(Xt,0),np.max(Xt,0),nbins)
    xi,yi = np.meshgrid(grid[:,0],grid[:,1])
    cond_mean = griddata((Xt[:,0],Xt[:,1]),data,(xi,yi),method='linear')
    indices = np.argwhere(np.isnan(cond_mean))
    cond_mean[indices[:,0],indices[:,1]] = griddata((Xt[:,0],Xt[:,1]),data,(xi[indices[:,0],indices[:,1]],yi[indices[:,0],indices[:,1]]),method='nearest')
    
    #print(np.sum(np.isnan(cond_mean)))    
    
    
    bins = ((Xt3d-np.min(Xt,0))/(np.max(Xt,0)-np.min(Xt,0)))*nbins 
    bins = bins.astype(int)
    bins[bins>nbins-1]=nbins-1
    bins[bins<0]=0    
    pred=cond_mean[bins[:,1],bins[:,0]]
    
    #pred[np.isnan(pred)]=0
    
    #cond_mean = np.transpose(cond_mean)
    
    #interp=RegularGridInterpolator((grid[:,0],grid[:,1]),cond_mean,method='linear',bounds_error=False,fill_value=0)
    

    
    
    #interp = LinearNDInterpolator(grid,cond_mean,Xt3d)

    
    
    return  pred

In [None]:
def get_table_noholes3d(Xt,data,Xt3d,nbins):
           
    grid = np.linspace(np.min(Xt,0),np.max(Xt,0),nbins)
    xi,yi,zi = np.meshgrid(grid[:,0],grid[:,1],grid[:,2])
    cond_mean = griddata((Xt[:,0],Xt[:,1],Xt[:,2]),data,(xi,yi,zi),method='linear')
    indices = np.argwhere(np.isnan(cond_mean))
    cond_mean[indices[:,0],indices[:,1],indices[:,2]] = griddata((Xt[:,0],Xt[:,1],Xt[:,2]),data,(xi[indices[:,0],indices[:,1],indices[:,2]],yi[indices[:,0],indices[:,1],indices[:,2]],zi[indices[:,0],indices[:,1],indices[:,2]]),method='nearest')
    
    print(np.sum(np.isnan(cond_mean)))    
    
    
    bins = ((Xt3d-np.min(Xt,0))/(np.max(Xt,0)-np.min(Xt,0)))*nbins 
    bins = bins.astype(int)
    bins[bins>nbins-1]=nbins-1
    bins[bins<0]=0    
    pred=cond_mean[bins[:,1],bins[:,0],bins[:,2]]
    pred[np.isnan(pred)]=0
    
    #cond_mean = np.transpose(cond_mean)
    
    #interp=RegularGridInterpolator((grid[:,0],grid[:,1]),cond_mean,method='linear',bounds_error=False,fill_value=0)
    

    
    
    #interp = LinearNDInterpolator(grid,cond_mean,Xt3d)

    
    
    return  pred

In [None]:
def get_table(Xt,data,Xt3d,nbins):
           
    cond_mean, _ , _ = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True)
    cond_mean[np.isnan(cond_mean)]=0
    bins = np.zeros((Xt3d.shape[0],3))
    bins = ((Xt3d-np.min(Xt,0))/(np.max(Xt,0)-np.min(Xt,0)))*nbins 
    bins = bins.astype(int)
    bins[bins>nbins-1]=nbins-1
    bins[bins<0]=0    
    pred=cond_mean[bins[:,0],bins[:,1]]
    return pred

In [None]:
def cond_mean(Xt,data,nbins):
           
    cond_mean, _ , _ = binned_statistic_dd(Xt,data,bins=nbins,expand_binnumbers=True)
    cond_mean[np.isnan(cond_mean)]=0
    cond_mean = np.transpose(cond_mean)
    
    #grid = np.linspace(np.min(Xt,0),np.max(Xt,0),nbins)
    #xi,yi = np.meshgrid(grid[:,0],grid[:,1])
    #cond_mean = griddata((Xt[:,0],Xt[:,1]),data,(xi,yi),method='linear')
    #indices = np.argwhere(np.isnan(cond_mean))
    #cond_mean[indices[:,0],indices[:,1]] = griddata((Xt[:,0],Xt[:,1]),data,(xi[indices[:,0],indices[:,1]],yi[indices[:,0],indices[:,1]]),method='nearest')
    #cond_mean = np.transpose(cond_mean)
    
    return cond_mean

In [None]:
bin_test = np.arange(10,200,10)
res_Yc = np.zeros(bin_test.shape[0])
res_Xt = np.zeros(bin_test.shape[0])
for i,bins in enumerate(bin_test):
    pred = get_table_noholes(Yc2d[:,0:2],reac[:,53],Yc3d[:,0:2],bins)
    res_Yc[i] = np.sqrt(mean_squared_error(reac3d[:,53],pred)/np.mean(reac3d[:,53]**2))    
    pred = get_table_noholes(Xt[:,0:2],reac[:,53],Xt3d[:,0:2],bins)
    res_Xt[i] = np.sqrt(mean_squared_error(reac3d[:,53],pred)/np.mean(reac3d[:,53]**2))    
    print(i,bins)
    


In [None]:
def opt_est3(Xt,data,nbins):
    
    out = np.zeros((nbins,nbins,nbins))
    count = np.ones((nbins,nbins,nbins))
    mins = np.min(Xt,0)
    maxs = np.max(Xt,0)
    pred = np.zeros(data.shape[0])
    for i in range(0,Xt.shape[0]):
        x1 = np.zeros(3)
        for j in range(0,3):
            x1[j] = int((Xt[i,j]-mins[j])/(maxs[j]-mins[j])*nbins)
            if(x1[j]>nbins-1):
                x1[j]=nbins-1
            
        x1 = x1.astype(int)    
        
        out[x1[0],x1[1],x1[2]]  = out[x1[0],x1[1],x1[2]] +data[i]
        count[x1[0],x1[1],x1[2]]  = count[x1[0],x1[1],x1[2]] +1
        
    out  = out/count
    out[np.isnan(out)]=0

    for i in range(0,Xt.shape[0]):
        x1 = np.zeros(3)
        for j in range(0,3):
            x1[j] = int((Xt[i,j]-mins[j])/(maxs[j]-mins[j])*nbins)
            if(x1[j]>nbins-1):
                x1[j]=nbins-1
        x1 = x1.astype(int)    
        pred[i]=out[x1[0],x1[1],x1[2]]
    return pred

In [None]:
def opt_est2(Xt,data,nbins):
    
    out = np.zeros((nbins,nbins))
    count = np.zeros((nbins,nbins))
    mins = np.min(Xt,0)
    maxs = np.max(Xt,0)
    pred = np.zeros(data.shape[0])
    for i in range(0,Xt.shape[0]):
        x1 = np.zeros(2)
        for j in range(0,2):
            x1[j] = int((Xt[i,j]-mins[j])/(maxs[j]-mins[j])*nbins)
            if(x1[j]>nbins-1):
                x1[j]=nbins-1
            
        x1 = x1.astype(int)    
        
        out[x1[0],x1[1]]  = out[x1[0],x1[1]] +data[i]
        count[x1[0],x1[1]]  = count[x1[0],x1[1]] +1
        
    out  = out/count
    out[np.isnan(out)]=0
    for i in range(0,Xt.shape[0]):
        x1 = np.zeros(2)
        for j in range(0,2):
            x1[j] = int((Xt[i,j]-mins[j])/(maxs[j]-mins[j])*nbins)
            if(x1[j]>nbins-1):
                x1[j]=nbins-1
        x1 = x1.astype(int)    
        pred[i]=out[x1[0],x1[1]]
    return pred

In [3]:
def read_data(fname,nc):    
    data = np.fromfile(fname,dtype=np.single)
    data = np.reshape(data,(int(data.size/nc),nc))
    #HRR = data[:,0]
    data = np.delete(data,0,1)
    return data

In [4]:
def read_data_mem(fname,nc):    
    data = np.memmap(fname, dtype=np.single, mode='r')    
    data = np.reshape(data,(int(data.size/nc),nc))
    #HRR = data[:,0]
    data = np.delete(data,0,1)
    return data

In [5]:
def read_reaction(fname):    
    data = np.fromfile(fname,dtype=np.single)
    data = np.reshape(data,(int(data.size/56),56))
    HRR = data[:,0]
    data = np.delete(data,0,1)
    data[:,53]=HRR
    data = np.delete(data,54,1)
    return data

In [6]:
def read_reaction_mem(fname):    
    data = np.memmap(fname, dtype=np.single, mode='r')    
    data = np.reshape(data,(int(data.size/56),56))
    HRR = data[:,0]
    data = np.delete(data,0,1)
    data[:,53]=HRR
    data = np.delete(data,54,1)
    return data

In [7]:
def do_normalization(data,data2,which):
    if(which=='range'):
        datanorm = (data-np.mean(data2,0))/(np.max(data2,0)-np.min(data2,0))
        return datanorm
    elif(which=='std'):
        datanorm = (data-np.mean(data2,0))/(np.std(data2,0))
        return datanorm
    elif(which=='level'):
        datanorm = (data-np.mean(data2,0))/(np.mean(data2,0))
        return datanorm
    elif(which=='vast'):
        datanorm = (data-np.mean(data2,0))/(np.std(data2,0))*np.mean(data2,0)
        return datanorm
    elif(which=='pareto'):
        datanorm = (data-np.mean(data2,0))/np.sqrt(np.std(data2,0))
        return datanorm
    elif(which=='minmax'):
        datanorm = (data-np.min(data2,0))/(np.max(data2,0)-np.min(data2,0))
        return datanorm
    elif(which=='none'):

        return np.copy(data)
    
def do_inverse_norm(data,datanorm,which):
    if(which=='range'):
        data_inv = datanorm*(np.max(data,0)-np.min(data,0))+np.mean(data,0)
        return data_inv
    if(which=='std'):
        data_inv = datanorm*(np.std(data,0))+np.mean(data,0)
        return data_inv
    if(which=='level'):
        data_inv = datanorm*(np.mean(data,0))+np.mean(data,0)
        return data_inv
    if(which=='vast'):
        data_inv = datanorm*(np.std(data,0))/np.mean(data,0)+np.mean(data,0)
        return data_inv
    if(which=='pareto'):
        data_inv = datanorm*np.sqrt(np.std(data,0))+np.mean(data,0)
        return data_inv
    if(which=='minmax'):
        data_inv = datanorm*(np.max(data,0)-np.min(data,0))+np.min(data,0)
        return data_inv
    



In [None]:
def generate_table(Xt,Xt3d,data):
    ng = 10
    gridx = np.zeros((ng,Xt.shape[1]))
    for i in range(0,Xt.shape[1]):
        gridx[:,i] = np.linspace(np.min(Xt[:,i]),np.max(Xt[:,i]),ng)
    return   np.meshgrid(gridx[:,0],gridx[:,1],gridx[:,2],gridx[:,3],gridx[:,4])
    

In [None]:
data0D = np.fromfile('../PCA/data_0D.bin',dtype=np.single)
data0D = np.reshape(data0D,(56,int(data0D.size/56)))   
data0D = data0D.T
data0D = np.delete(data0D,0,1)

reac0D = np.fromfile('../PCA/reac_0D.bin',dtype=np.single)
reac0D = np.reshape(reac0D,(56,int(reac0D.size/56)))   
reac0D = reac0D.T
HRR = reac0D[:,0]
reac0D = np.delete(reac0D,0,1)
reac0D[:,53]=HRR
reac0D = np.delete(reac0D,54,1)



In [78]:
#data = read_data('../PCA/data2d_lower2_chi.bin',57)
data = read_data('../PCA/data2d_lower2.bin',56)
#data2 = read_data('../PCA/data2d_lower.bin')
data3 = read_data('../PCA/data2d_base.bin',56)

#data4 = read_data('../PCA/data2d_high.bin')
#dat3d = read_data('../PCA/forPCA_coarse.bin')

In [None]:
#data = get_data_2d('../PCA/data_10.h5')
#data2 = get_data_2d('../PCA/data_5.h5')
#data3 = get_data_2d('../PCA/data_15.h5')
data = get_data_2d('../PCA/data_20.h5')
#data5 = get_data_2d('../PCA/data_25.h5')
#data6 = get_data_2d('../PCA/data_30.h5')


In [None]:
#reac = get_reac_2d('../PCA/data_10.h5')
#reac2 = get_reac_2d('../PCA/data_5.h5')
#reac3 = get_reac_2d('../PCA/data_15.h5')
reac = get_reac_2d('../PCA/data_20.h5')
#reac5 = get_reac_2d('../PCA/data_25.h5')
#reac6 = get_reac_2d('../PCA/data_30.h5')


#reac2 = get_reac_2d('../PCA/data_10.h5')
#reac3 = get_reac_2d('../PCA/data_15.h5')
#reac4 = get_reac_2d('../PCA/data_20.h5')

In [79]:
reac = read_reaction('../PCA/reac2d_lower2.bin')
#reac2 = read_reaction('../PCA/reac2d_lower.bin')
reac3 = read_reaction('../PCA/reac2d_base.bin')
#reac4 = read_reaction('../PCA/reac2d_high.bin')
#reac3, HRR32d = read_data('../PCA/reac2d_high.bin')
#reac2 = read_data('../PCA/reac2d_base.bin')
#reac3 = read_data('../PCA/reac2d_high.bin')
#reac3d = read_data('../PCA/reac3d.bin')

In [36]:
def do_CMA(data):
    cmas = np.ones(data.shape[0])*5
    idxs = np.logical_or(data[:,53] < 1120, data[:,3] > 0.05*max(data[:,3]))
    cmas[idxs]=0
    idxs = np.logical_and(np.logical_and(np.logical_and(data[:,53] >=1120, data[:,3] < 0.05*max(data[:,3])),data[:,42] < 0.05*max(data[:,42]))
            ,data[:,54]>0.046)
    cmas[idxs]=1
    idxs = np.logical_and(np.logical_and(np.logical_and(data[:,53] >=1120, data[:,3] < 0.05*max(data[:,3])),data[:,42] < 0.05*max(data[:,42]))
            ,data[:,54]<0.046)
    cmas[idxs]=2
    idxs = np.logical_and(np.logical_and(data[:,53] >=1120, data[:,3] < 0.05*max(data[:,3])),data[:,42] > 0.05*max(data[:,42]))
            
    cmas[idxs]=3
    
    
    
        
    return cmas

In [None]:
Yc2d = np.zeros((data.shape[0],2))
Yc2d[:,1] = data[:,27]+data[:,28]+data[:,30]+data[:,31]
Yc2d[:,0] =data[:,54]
ReacC = reac[:,27]+reac[:,28]+reac[:,30]+reac[:,31]

Yc3d = np.zeros((dat3d.shape[0],2))
Yc3d[:,1] = dat3d[:,27]+dat3d[:,28]+dat3d[:,30]+dat3d[:,31]
Yc3d[:,0] = dat3d[:,54]
ReacC3d = reac3d[:,27]+reac3d[:,28]+reac3d[:,30]+reac3d[:,31]

In [24]:
dat3d = read_data('../PCA/dat3d.bin',56)
reac3d = read_reaction('../PCA/reac3d.bin')

#reac3d = read_reaction_mem('../PCA/reac3d_big.bin')

In [None]:
HRR2d = np.copy(reac[:,53])
HRR3d = np.copy(reac3d[:,53])
HRR3d = do_normalization(HRR3d,HRR2d,'minmax')
HRR2d = do_normalization(HRR2d,HRR2d,'minmax')

In [58]:
data = np.concatenate((data,data3))
reac = np.concatenate((reac,reac3))

In [81]:
data = abs(data)
dat3d = abs(dat3d)

In [82]:
nc=54
datanorm = do_normalization(data[:,0:nc],data[:,0:nc],'range')
dat3dnorm = do_normalization(dat3d[:,0:nc],data[:,0:nc],'range')

In [83]:
# DO PCA Here
nc=54
pca = PCA(n_components=5)
Xt= pca.fit_transform(datanorm[:,0:nc])
components = pca.components_
Xt3d = np.matmul((dat3dnorm[:,0:nc]-np.mean(datanorm[:,0:nc],0)),components.T)
#XPCA = pca.inverse_transform(Xt)


In [None]:
RPCA2D = np.matmul(reac[:,0:nc]/(np.std(data[:,0:nc],0)),components.T)
RPCA3D = np.matmul(reac3d[:,0:nc]/(np.std(data[:,0:nc],0)),components.T)

In [84]:
RPCA2D = np.matmul(reac[:,0:nc]/(np.max(data[:,0 :nc],0)-np.min(data[:,0:nc],0)),components.T)
RPCA3D = np.matmul(reac3d[:,0:nc]/(np.max(data[:,0:nc],0)-np.min(data[:,0:nc],0)),components.T)

In [None]:
RPCA2D = np.matmul(reac[:,0:nc],components.T)
RPCA3D = np.matmul(reac3d[:,0:nc],components.T)

In [None]:
plt.figure()
plt.scatter(RPCA3D[:,1],RPCA3D[:,1],marker='.')
plt.scatter(RPCA2D[:,1],RPCA2D[:,1],marker='.')


In [None]:
RPCA2DN = (RPCA2D[:,0:5]-np.min(RPCA2D[:,0:5],0))/(np.max(RPCA2D[:,0:5],0)-np.min(RPCA2D[:,0:5],0))
RPCA3DN = (RPCA3D[:,0:5]-np.min(RPCA2D[:,0:5],0))/(np.max(RPCA2D[:,0:5],0)-np.min(RPCA2D[:,0:5],0))

In [None]:
def custom_loss(y_true, y_pred):
    #return K.mean((y_true-y_pred)**2) + (1-K.sum(y_pred)/500)*0.1
    alpha=0.2
    return  abs(1-K.sum(y_pred*(maxs-mins)+means)/500)*alpha + K.mean((y_true-y_pred)**2)*(1.0-alpha)

In [None]:
# ANN
def get_model(dimi):
    model = Sequential()
    model.add(Dense(80,activation='relu',input_dim=dimi ))
    model.add(BatchNormalization())
    
    model.add(Dense(80,activation='relu'))
    model.add(BatchNormalization())
    
    model.add(Dense(80,activation='relu'))
    model.add(BatchNormalization())
    
    model.add(Dense(nc,activation='linear'))
    model.compile(optimizer='nadam',loss='mean_squared_error')
    #model.compile(optimizer='nadam',loss=custom_loss)
    return model
    


    

In [None]:
datanormF = do_normalization(data[:,0:54],data[:,0:54],'std')
dat3dnormF = do_normalization(dat3d[:,0:54],data[:,0:54],'std')


In [None]:
XtN = do_normalization(Xt[:,0:5],Xt[:,0:5],'range')
Xt3dN= do_normalization(Xt3d[:,0:5],Xt[:,0:5],'range')

In [None]:
nc=53
#model = get_model(5)
h=model.fit(XtN[:,0:5],datanormF[:,0:53],epochs=20,validation_data=(Xt3dN[:,0:5],dat3dnormF[:,0:53]),batch_size=500)
#h=model.fit(Xt[0:-1:1000,0:2],datanorm[0:-1:1000,0:53],epochs=20,batch_size=500)

In [None]:
pred = get_atoms_conservation(out3d)
atoms_dns = get_atoms_conservation(dat3d)

In [85]:
model = load_model('RR-PCA-1.h5')
RRpred[:,0]=model.predict(Xt3d)[:,0]
model = load_model('RR-PCA-2.h5')
RRpred[:,1]=model.predict(Xt3d)[:,0]
model = load_model('RR-PCA-3.h5')
RRpred[:,2]=model.predict(Xt3d)[:,0]
model = load_model('RR-PCA-4.h5')
RRpred[:,3]=model.predict(Xt3d)[:,0]
model = load_model('RR-PCA-5.h5')
RRpred[:,4]=model.predict(Xt3d)[:,0]



In [None]:
RRpred[:,4] = opt_est(Xt3d,RPCA3D[:,4],75)
#RRpred[:,1] = opt_est(Xt3d,RPCA3D[:,1],60)
#RRpred[:,2] = opt_est(Xt3d,RPCA3D[:,2],60)
#RRpred[:,3] = opt_est(Xt3d,RPCA3D[:,3],60)
#RRpred[:,4] = opt_est(Xt3d,RPCA3D[:,4],60)


In [None]:
RRpred = opt_est(Xt3d,reac3d[:,53],60)


In [None]:
error = np.zeros(RPCA3D.shape[0])
error = (RPCA3D[:,0]-RRpred[:,0])**2 + (RPCA3D[:,1]-RRpred[:,1])**2 + (RPCA3D[:,2]-RRpred[:,2])**2 + \
        (RPCA3D[:,3]-RRpred[:,3])**2 + (RPCA3D[:,4]-RRpred[:,4])**2

In [67]:
error = np.sum((RPCA3D-RRpred)**2,axis=1)

In [None]:
def get_model_prop(dimi):
    model = Sequential()
    model.add(Dense(10,input_dim=dimi, activation='relu',kernel_initializer='normal'))
    model.add(Dense(10, activation='relu',kernel_initializer='normal'))
    
    model.add(Dense(1,activation='linear'))
    model.compile(optimizer='nadam',loss='mean_squared_error')
    return model
    


In [None]:
model = get_model_prop(5)
model.fit(Xt[:,0:5],HRR2d,validation_data=(Xt3d[:,0:5],HRR3d),epochs=10,batch_size=500)
#model.fit(Yc2d[:,0:2],HRR2d,epochs=20,batch_size=500)

In [None]:
plt.figure(figsize=(3,3))
plt.scatter(dat3d[:,42]/np.max(dat3d[:,42]), pred[:,42]/np.max(dat3d[:,42]),marker='.',c='k',s=10)
plt.locator_params(axis='y', nbins=3)
plt.locator_params(axis='x', nbins=3)
x = np.linspace(0.0,1.0,10)
plt.plot(x,x,'r--')
plt.xlabel('$DNS$')
plt.ylabel('$ANN$')
plt.title('$Y(OH)$')
plt.xlim((0.0,1.0))
plt.ylim((0.0,1.0))
plt.grid(alpha=0.2)
ax=plt.gca()
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
             ax.get_xticklabels() + ax.get_yticklabels()):
    item.set_fontsize(25)

plt.savefig('Final/OH2.png',dpi=300,bbox_inches = "tight")

In [None]:
def get_model_diff(dimi):
    model = Sequential()
    model.add(Dense(10,input_dim=dimi, activation='relu',kernel_initializer='normal'))
    model.add(Dense(10, activation='relu',kernel_initializer='normal'))

    model.add(Dense(5,activation='sigmoid'))
    model.compile(optimizer='nadam',loss='mean_squared_error')
    return model
    #model.add(relu(threshold=0.0))
    #model.add(Dense(1,activation='relu'))



In [None]:
def check_accuracy(epochs,nComp,nClust,labels2d,labels3d):
    mean_accuracy = np.zeros(6)
    mean_accuracy2d = np.zeros(6)
    out2d = np.empty([data.shape[0],nc])
    out3d = np.empty([dat3d.shape[0],nc])    
    
       
    for i in range(0,nClust):
        print("Processing cluster %d",i)    
        model = get_model(nComp)
        bsize = int(datanorm[labels2d==i,0].shape[0]/300)
        if(bsize>800):
            bsize=800
        if(bsize<100):
            bsize=100           
        
        model.fit(Xt[labels2d==i,0:nComp],datanorm[labels2d==i,0:nc],epochs=epochs,batch_size=500)    
        pred = model.predict(Xt[labels2d==i,0:nComp])
        index2d = np.where(labels2d==i)
        out2d[index2d,:] = pred.copy()     
        pred = model.predict(Xt3d[labels3d==i,0:nComp])
        if pred==[] or (not  pred.any()):
                continue
        index3d = np.where(labels3d==i)
        out3d[index3d,:] = pred.copy()
        
    out2d = out2d*(np.max(data[:,0:nc],0)-np.min(data[:,0:nc],0))+np.min(data[:,0:nc],0)
    out3d = out3d*(np.max(data[:,0:nc],0)-np.min(data[:,0:nc],0))+np.min(data[:,0:nc],0)
        
    #atoms2D=get_atoms_conservation(data[:,0:53])
    #atoms2D_pred=get_atoms_conservation(out2d[:,0:53])
    #atoms3D=get_atoms_conservation(dat3d[:,0:53])
    #atoms3D_pred=get_atoms_conservation(out3d[:,0:53])
        
    R2Score2D = np.zeros(nc)
    R2Score3D = np.zeros(nc)
    eps2D = np.zeros(nc)
    eps3D = np.zeros(nc)
    for i in range(0,nc):
            R2Score2D[i]= r2_score(data[:,i],out2d[:,i])
            R2Score3D[i]= r2_score(dat3d[:,i],out3d[:,i])
            eps2D[i] = mean_squared_error(data[:,i],out2d[:,i])/np.mean(data[:,i]**2)
            eps3D[i] = mean_squared_error(dat3d[:,i],out3d[:,i])/np.mean(dat3d[:,i]**2)
            
    mean_accuracy2d[0] = np.mean(R2Score2D)
    mean_accuracy2d[1] = np.mean(abs(1-np.sum(out2d[:,0:53],1)))
    mean_accuracy2d[2] = np.max(abs(1-np.sum(out2d[:,0:53],1)))
    
    mean_accuracy[0] = np.mean(R2Score3D)
    mean_accuracy[1] = np.mean(abs(1-np.sum(out3d[:,0:53],1)))
    mean_accuracy[2] = np.max(abs(1-np.sum(out3d[:,0:53],1)))
    
    #for j in range(0,4):
    #    mean_accuracy2d[j+2] = np.mean(abs(1.0-atoms2D_pred[:,j]/atoms2D[:,j]))
    #    mean_accuracy[j+2] = np.mean(abs(1.0-atoms3D_pred[:,j]/atoms3D[:,j]))       
        
    return mean_accuracy2d,mean_accuracy, R2Score2D, R2Score3D,eps2D,eps3D, out2d, out3d
    

In [None]:
def check_accuracy_reac(epochs,nComp,nClust,labels2d,labels3d,rindx):
    
    out2d = np.zeros((data.shape[0],1))
    out3d = np.zeros((dat3d.shape[0],1))
    
    for i in range(0,nClust):
        print("Processing cluster %d",i)    
        model = get_model_reac(nComp)
        #model.load_weights('model.h5')        
        bsize = int((RPCA2D[labels2d==i,rindx].shape[0])/300)
        if(bsize>800):
            bsize=800
        if(bsize<100):
            bsize=100           
        
        h=model.fit(Xt[labels2d==i,0:nComp],RPCA2DN[labels2d==i,rindx],epochs=epochs,batch_size=500)    
        pred = model.predict(Xt[labels2d==i,0:nComp])
        index2d = np.where(labels2d==i)
        out2d[index2d,:] = pred.copy()     
        pred = model.predict(Xt3d[labels3d==i,0:nComp])
        if pred==[] or (not  pred.any()):
                continue
        index3d = np.where(labels3d==i)
        out3d[index3d,:] = pred.copy()
        
    out2d = out2d*(np.max(RPCA2D[:,rindx])-np.min(RPCA2D[:,rindx]))+np.min(RPCA2D[:,rindx])
    out3d = out3d*(np.max(RPCA2D[:,rindx])-np.min(RPCA2D[:,rindx]))+np.min(RPCA2D[:,rindx])
    #out2d = do_inv_norm_neg(RPCA2D[:,rindx],out2d)
    #out3d = do_inv_norm_neg(RPCA2D[:,rindx],out3d)
    #out2d=out2d*np.std(RPCA2D[:,rindx],0)+np.mean(RPCA2D[:,rindx],0)
    #out3d=out3d*np.std(RPCA2D[:,rindx],0)+np.mean(RPCA2D[:,rindx],0)
        
           
    R2Score2D = np.zeros(1)
    R2Score3D = np.zeros(1)
    R2Score2D= r2_score(RPCA2D[:,rindx],out2d)
    R2Score3D= r2_score(RPCA3D[:,rindx],out3d)   
    
    
    return R2Score2D, R2Score3D, out2d, out3d,h
    

In [None]:
def get_cluster_LTC(data,dat3d):
    labels2d = np.zeros(data.shape[0])
    labels3d = np.zeros(dat3d.shape[0])
    labels2d[data[:,3]>0.0001] = 1
    labels3d[dat3d[:,3]>0.0001] = 1
    return labels2d,labels3d
    

In [None]:
#X_train, X_valid, Y_train, Y_valid = train_test_split(Xt,data[:,42])
def get_model_reac(dimi):
    model = Sequential()
    
    model.add(Dense(24,input_dim=dimi,kernel_initializer='normal',kernel_regularizer=regularizers.l2(0.001)))
    #model.add(GaussianNoise(0.5))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    #model.add(Dropout(rate=0.4))


    model.add(Dense(24, kernel_initializer='normal',kernel_regularizer=regularizers.l2(0.001)))
    #model.add(GaussianNoise(0.5))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    #model.add(Dropout(rate=0.4))
    

    
    model.add(Dense(24, kernel_initializer='normal'))
    #model.add(GaussianNoise(0.5))
    #model.add(BatchNormalization())
    model.add(Activation('relu'))
    
    #model.add(Dense(10,kernel_initializer='normal'))
    #model.add(GaussianNoise(0.5))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    

    
    #model.add(Dense(20,input_dim=dimi, kernel_initializer='normal',kernel_regularizer=regularizer.l2(0.0001)))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    
   # model.add(Dropout(rate=0.2))
    
    #model.add(Dense(60,input_dim=dimi, kernel_initializer='normal'))
    #model.add(BatchNormalization())
    #model.add(Activation('relu'))
    
    #model.add(Dropout(rate=0.4))
    
        
    
    
        
    model.add(Dense(1,activation='linear'))
    #model.add(Dropout(rate=0.3))
    #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
    
    model.compile(optimizer='nadam',loss='mean_squared_error')
    return model
    #model.add(relu(threshold=0.0))
    #model.add(Dense(1,activation='relu'))



    

In [None]:
RPCA2DN = do_normalization(RPCA2D,RPCA2D,'minmax')
RPCA3DN = do_normalization(RPCA3D,RPCA2D,'minmax')

In [None]:
model = get_model_reac(5)
es = EarlyStopping(monitor='val_loss',mode='min',verbose=1,patience=20,restore_best_weights=True)
X_train, X_test, Y_train, Y_test = train_test_split(Xt[:,0:5],RPCA2DN[:,0],test_size=0.2,random_state=2)
#mc = ModelCheckpoint('best_model.h5',monitor='val_loss',mode='min',save_best_only=True)
h=model.fit(X_train,Y_train, validation_data=(X_test,Y_test),epochs=100,batch_size=128,callbacks=[es])

In [None]:
def do_conditional_average(bins,data,Xt):
    out = np.zeros((bins,bins,bins))    
    count = np.zeros((bins,bins,bins))    
    for i in range(data.shape[0]):
        ind = np.zeros()
        ind1 = int((Xt[i,0]-min(Xt[:,0]))/(max(Xt[:,0])-min(Xt[i,0]))*bins)
        ind2 = int((Xt[i,1]-min(Xt[:,1]))/(max(Xt[:,1])-min(Xt[i,1]))*bins)
        ind3 = int((Xt[i,2]-min(Xt[:,2]))/(max(Xt[:,2])-min(Xt[i,2]))*bins)
        out[ind1,ind2,ind3] = data[i]+out[ind1,ind2,ind3]
        count[ind1,ind2,ind3] = count[ind1,ind2,ind3]+1
    return out/(count+1)
        
    

In [None]:
def do_conditional_average(bins,data,Xt):
    out = np.zeros((bins,bins))    
    count = np.zeros((bins,bins))    
    for i in range(data.shape[0]):        
        ind1 = int((Xt[i,0]-min(Xt[:,0]))/(max(Xt[:,0])-min(Xt[:,0]))*bins)
        ind2 = int((Xt[i,1]-min(Xt[:,1]))/(max(Xt[:,1])-min(Xt[:,1]))*bins)  
        out[ind1,ind2] = data[i]+out[ind1,ind2]
        count[ind1,ind2] = count[ind1,ind2]+1
    return out/(count+1)
        
    

In [None]:
err=0
summ=0
for i in range(len(b)):
    #print(i)
    indexx = b[i]//102-1
    indexy = b[i]%102-1
    err=err+(out[indexx,indexy]-RPCA3DN[i,0])**2
    summ = summ+(RPCA3DN[i,0])**2
    pred[i] = out[indexx,indexy]

    
    

In [None]:
def process_species(nClust,ep,nComp):
    
    mean_accuracy = np.zeros(6)
    mean_accuracy2d = np.zeros(6)
    out2d = np.empty([data.shape[0],53])
    out3d = np.empty([dat3d.shape[0],53])    
    
    print("Making %d Clusters",nClust)
    labels2d,labels3d=do_clusters(nClust,Xt[:,0:nComp],Xt3d[:,0:nComp])
    
    for i in range(0,nClust):
        print("Processing cluster %d",i) 
        for spec in range(0,53):
            
             print("Processing Species ", spec)    
             bsize = int(datanorm[labels2d==i,0].shape[0]/300)
             if(bsize>800):
                 bsize=800
             if(bsize<100):
                 bsize=100           
                    
#             model = get_model_species(nComp)
             model.load_weights('model.h5')
             model.fit(Xt[labels2d==i,0:nComp],datanorm[labels2d==i,spec],epochs=ep,batch_size=bsize)    
             pred = model.predict(Xt[labels2d==i,0:nComp])
             index2d = np.where(labels2d==i)
             out2d[index2d,spec] = pred.T
             pred = model.predict(Xt3d[labels3d==i,0:nComp])
             if pred==[] or (not  pred.any()):
                continue
             index3d = np.where(labels3d==i)
             out3d[index3d,[spec]] = pred.T
        
    out2d = out2d*(np.max(data[:,0:53],0)-np.min(data[:,0:53],0))+np.min(data[:,0:53],0)
    out3d = out3d*(np.max(data[:,0:53],0)-np.min(data[:,0:53],0))+np.min(data[:,0:53],0)
        
    #atoms2D=get_atoms_conservation(data[:,0:53])
    #atoms2D_pred=get_atoms_conservation(out2d[:,0:53])
    #atoms3D=get_atoms_conservation(dat3d[:,0:53])
    #atoms3D_pred=get_atoms_conservation(out3d[:,0:53])
        
    R2Score2D = np.zeros(53)
    R2Score3D = np.zeros(53)
    for i in range(0,53):
            R2Score2D[i]= r2_score(data[:,i],out2d[:,i])
            R2Score3D[i]= r2_score(dat3d[:,i],out3d[:,i])
    mean_accuracy2d[0] = np.mean(R2Score2D)
    mean_accuracy2d[1] = np.mean(abs(1-np.sum(out2d,1)))
    
    mean_accuracy[0] = np.mean(R2Score3D)
    mean_accuracy[1] = np.mean(abs(1-np.sum(out3d,1)))
    
    #for j in range(0,4):
    #    mean_accuracy2d[j+2] = np.mean(abs(1.0-atoms2D_pred[:,j]/atoms2D[:,j]))
    #    mean_accuracy[j+2] = np.mean(abs(1.0-atoms3D_pred[:,j]/atoms3D[:,j]))       
        
    return mean_accuracy2d,mean_accuracy, R2Score2D, R2Score3D
     
        
