In [17]:
'''
Correlation based selection using Tensorflow library
(Training ANN by adding input parameters in order of descending correlation coefficient)
Ver 1.2
(Add input parameters manually)
2 options for calculation of correlation
(1.Pearson product-moment correlation/2.Volume-weighted correlation)
'''
'''
Written in Aug.2017 by Serin Yoon
Fluid & Thermal Multiphysics Lab.
Department of Mechanical Engineering
Sogang University
'''

#import libraries
import tensorflow as tf
import numpy as np
import csv
import math
from collections import defaultdict
from scipy.stats import spearmanr,pearsonr
from datetime import datetime

#Set the number of factors
numb_data=44208 #the number of data points
numb_input=13 #the number of input parameters to read
numb_output=1 #the number of the total output parameters
numb_neuron=20#the number of neurons for each hidden layer
numb_select=10 #the number of parameters to select

#Set the learning rate of an optimizer
learning_rate=0.001

#Set the training steps
train_step=100000

#Set the criterion option (0: Pearson-correlation coefficient / 1: Spearman-correaltion coefficient)
criterion_opt=1

#Array configuration to store data
input=[[]for j in range(numb_input+2)]
output=[[]for j in range(numb_output)]

#Read csv file
#How to configure the csv file : row = data points, column = parameters
#Add input & output parameters to use for training manually
#=> input[x].append(i[y]) : Read (x+1)th input parameter from (y+1)th column in the csv file 
#=> output[x].append(i[y]) : Read (x+1)th output parameter from (y+1)th column in the csv file
f=open("./new_parameters(18.1.16)/new(18.1.23)/case1+2+3+4_modeling_without_nualpha_scaled_174by87.csv",'r')
csvReader=csv.reader(f)
headers=next(csvReader)
for i in csvReader:
    for j in range(numb_input+2):
        input[j].append(i[j])
    for k in range(numb_output):
        output[k].append(i[numb_input+2])

#Convert the data to float type        
for i in range(numb_data):
    for j in range(numb_input+2):
        input[j][i]=float(input[j][i])
    for k in range(numb_output):
        output[k][i]=float(output[k][i])   
         
#Calculate correlation coefficient between input and output
Corr=[[1. for i in range(numb_output)]for j in range(numb_input)]

#Calculate using Pearson method
if criterion_opt==0:
    print("Calculating correlation using Pearson method")
    for i in range(numb_input):
        for k in range(numb_output):
            #Corr[i][k]=abs(np.corrcoef(input[i],output[k])[0,1])
            Corr[i][k],p=pearsonr(input[i],output[k])
            Corr[i][k]=abs(Corr[i][k])
            
else :
    print("Calculating correlation using Spearman method")
    for i in range(numb_input):
        for k in range(numb_output):
            Corr[i][k],p=spearmanr(input[i],output[k])
            Corr[i][k]=abs(Corr[i][k])
            
#Set the new input array in descending order of the correlation (Set the 'reverse' option as 'True')
sorted_Corr_index=sorted(range(len(Corr)), key=Corr.__getitem__, reverse=True)
input_training=[[1. for i in range(numb_data)]for j in range(numb_input+2)]
for i in range(numb_data):
    input_training[0][i]=input[0][i]
    input_training[1][i]=input[1][i]
    for j in range(numb_input):
        input_training[j+2][i]=input[sorted_Corr_index[j]+2][i]

#Store the correlation coefficient values to a text file       
Head=["" for x in range(numb_input)]
for i in range(numb_input):
    Head[i]=headers[sorted_Corr_index[i]+2]

fp = open('./new_parameters(18.1.16)/new(18.1.23)/Correlation_btw_input&output.txt','w')
for i in range(numb_input):
    fp.write(Head[i]+' %.6f\n' % Corr[i][0])
fp.close()

#Set arrays for calculation of a correlation between the target output and an actual output
Corr_out=[[1. for i in range(numb_output)]for j in range(numb_output)]

#Set arrays to store the relative errors and correlation of selected parameters
Max_err=np.zeros(numb_select)
Mean_err=np.zeros(numb_select)
Pearson=np.zeros(numb_select) 
Spearman=np.zeros(numb_select) 

startTime = datetime.now()
#The number of selected input parameters(t) increases one by one
for t in range(numb_select):
    numb_training=numb_input-t
    input_training2=list(input_training[0:t+3])   

    #Neural network structure    
    with tf.name_scope("1st_hidden_layer"):
        w1 =tf.Variable(tf.random_uniform([numb_neuron,t+3],-1.0,1.0),name='Weight1')
        b1 =tf.Variable(tf.random_uniform([numb_neuron,1],-1.0,1.0),name='Bias1')
        layer1out=tf.tanh(tf.matmul(w1,input_training2)+b1,name="1st_layer_output")

#    with tf.name_scope("2nd_hidden_layer"):
#        w2 =tf.Variable(tf.random_uniform([numb_neuron,numb_neuron],-1.0,1.0),name='Weight2')
#        b2 =tf.Variable(tf.random_uniform([numb_neuron,1],-1.0,1.0),name='Bias2')
#        layer2out=tf.tanh(tf.matmul(w2,layer1out)+b2,name="2nd_layer_output")

    with tf.name_scope("Output_layer"):
        w3 =tf.Variable(tf.random_uniform([numb_output,numb_neuron],-1.0,1.0),name='Weight3')
        finalout=tf.matmul(w3,layer1out,name="Final_output")

    #Configure the session to run the graph    
    sess=tf.Session()

    #Set the values to calculate
    deltas=tf.abs(output-finalout)
    squared_deltas = tf.square(deltas)
    loss = tf.reduce_mean(squared_deltas)
    max_value=tf.reduce_max(output)
    max_error=tf.reduce_max(deltas)
    mean_error=tf.reduce_mean(deltas)
    relative_max_error=max_error*100
    relative_mean_error=mean_error*100

    #Create an optimizer
    train = tf.train.AdamOptimizer(learning_rate,use_locking=False,name='AdamOptimizer').minimize(loss)

    #Initialize the session
    sess.run(tf.global_variables_initializer())

    #Training
    #input_=tf.placeholder("float",shape=(t+1,None))
    #feed_dict={input_:input_training2}
    
    for step in range(train_step):
        sess.run(train)
        if (step+1) % 1000 == 0:
            print('%i steps -> loss: %f'%(step+1,sess.run(loss)))
    corr1 ,p1 = pearsonr(output[0],sess.run(finalout)[0])
    corr2 ,p2 = spearmanr(output[0],sess.run(finalout)[0])
    Max_err[t]=sess.run(relative_max_error)
    Mean_err[t]=sess.run(relative_mean_error)
    Pearson[t]=abs(corr1)
    Spearman[t]=abs(corr2)
    
    print('Selected %d parameters' %(t+1))
print("Time taken:", datetime.now()-startTime)    
fmax = open('./new_parameters(18.1.16)/new(18.1.23)/Corr_Selection_Max_error(s).txt','w')
fmean = open('./new_parameters(18.1.16)/new(18.1.23)/Corr_Selection_Mean_error(s).txt','w')
fpearson = open('./new_parameters(18.1.16)/new(18.1.23)/Corr_Selection_Pearson(s).txt','w')
fspearman = open('./new_parameters(18.1.16)/new(18.1.23)/Corr_Selection_Spearman(s).txt','w')

for i in range(numb_select):
    fmax.write('%.6f\n' %Max_err[i])
    fmean.write('%.6f\n' %Mean_err[i])
    fpearson.write('%.6f\n' %Pearson[i])
    fspearman.write('%.6f\n' % Spearman[i])
fmax.close()
fmean.close()
fpearson.close()    
fspearman.close()
print("DONE")

Calculating correlation using Pearson method
1000 steps -> loss: 0.004834
2000 steps -> loss: 0.004801
3000 steps -> loss: 0.004785
4000 steps -> loss: 0.004769
5000 steps -> loss: 0.004750
6000 steps -> loss: 0.004728
7000 steps -> loss: 0.004706
8000 steps -> loss: 0.004687
9000 steps -> loss: 0.004675
10000 steps -> loss: 0.004668
11000 steps -> loss: 0.004661
12000 steps -> loss: 0.004651
13000 steps -> loss: 0.004639
14000 steps -> loss: 0.004623
15000 steps -> loss: 0.004602
16000 steps -> loss: 0.004573
17000 steps -> loss: 0.004536
18000 steps -> loss: 0.004489
19000 steps -> loss: 0.004435
20000 steps -> loss: 0.004379
21000 steps -> loss: 0.004329
22000 steps -> loss: 0.004268
23000 steps -> loss: 0.004216
24000 steps -> loss: 0.004164
25000 steps -> loss: 0.004131
26000 steps -> loss: 0.004063
27000 steps -> loss: 0.004014
28000 steps -> loss: 0.003967
29000 steps -> loss: 0.003924
30000 steps -> loss: 0.003885
31000 steps -> loss: 0.003849
32000 steps -> loss: 0.003819
3300

72000 steps -> loss: 0.001506
73000 steps -> loss: 0.001507
74000 steps -> loss: 0.001505
75000 steps -> loss: 0.001505
76000 steps -> loss: 0.001505
77000 steps -> loss: 0.001504
78000 steps -> loss: 0.001511
79000 steps -> loss: 0.001503
80000 steps -> loss: 0.001503
81000 steps -> loss: 0.001504
82000 steps -> loss: 0.001502
83000 steps -> loss: 0.001502
84000 steps -> loss: 0.001502
85000 steps -> loss: 0.001503
86000 steps -> loss: 0.001501
87000 steps -> loss: 0.001500
88000 steps -> loss: 0.001500
89000 steps -> loss: 0.001504
90000 steps -> loss: 0.001499
91000 steps -> loss: 0.001499
92000 steps -> loss: 0.001499
93000 steps -> loss: 0.001498
94000 steps -> loss: 0.001498
95000 steps -> loss: 0.001498
96000 steps -> loss: 0.001497
97000 steps -> loss: 0.001503
98000 steps -> loss: 0.001497
99000 steps -> loss: 0.001496
100000 steps -> loss: 0.001496
Selected 3 parameters
1000 steps -> loss: 0.004706
2000 steps -> loss: 0.003029
3000 steps -> loss: 0.002592
4000 steps -> loss: 

44000 steps -> loss: 0.000302
45000 steps -> loss: 0.000301
46000 steps -> loss: 0.000320
47000 steps -> loss: 0.000299
48000 steps -> loss: 0.000298
49000 steps -> loss: 0.000296
50000 steps -> loss: 0.000295
51000 steps -> loss: 0.000294
52000 steps -> loss: 0.000293
53000 steps -> loss: 0.000292
54000 steps -> loss: 0.000293
55000 steps -> loss: 0.000291
56000 steps -> loss: 0.000290
57000 steps -> loss: 0.000289
58000 steps -> loss: 0.000288
59000 steps -> loss: 0.000287
60000 steps -> loss: 0.000286
61000 steps -> loss: 0.000285
62000 steps -> loss: 0.000285
63000 steps -> loss: 0.000284
64000 steps -> loss: 0.000283
65000 steps -> loss: 0.000282
66000 steps -> loss: 0.000282
67000 steps -> loss: 0.000281
68000 steps -> loss: 0.000280
69000 steps -> loss: 0.000280
70000 steps -> loss: 0.000279
71000 steps -> loss: 0.000278
72000 steps -> loss: 0.000277
73000 steps -> loss: 0.000277
74000 steps -> loss: 0.000276
75000 steps -> loss: 0.000276
76000 steps -> loss: 0.000275
77000 step

16000 steps -> loss: 0.000314
17000 steps -> loss: 0.000304
18000 steps -> loss: 0.000296
19000 steps -> loss: 0.000288
20000 steps -> loss: 0.000281
21000 steps -> loss: 0.000274
22000 steps -> loss: 0.000268
23000 steps -> loss: 0.000263
24000 steps -> loss: 0.000258
25000 steps -> loss: 0.000253
26000 steps -> loss: 0.000251
27000 steps -> loss: 0.000245
28000 steps -> loss: 0.000241
29000 steps -> loss: 0.000238
30000 steps -> loss: 0.000235
31000 steps -> loss: 0.000232
32000 steps -> loss: 0.000229
33000 steps -> loss: 0.000227
34000 steps -> loss: 0.000224
35000 steps -> loss: 0.000222
36000 steps -> loss: 0.000220
37000 steps -> loss: 0.000218
38000 steps -> loss: 0.000216
39000 steps -> loss: 0.000214
40000 steps -> loss: 0.000213
41000 steps -> loss: 0.000226
42000 steps -> loss: 0.000210
43000 steps -> loss: 0.000208
44000 steps -> loss: 0.000207
45000 steps -> loss: 0.000209
46000 steps -> loss: 0.000204
47000 steps -> loss: 0.000203
48000 steps -> loss: 0.000202
49000 step