CAUTION: this code has serious memory leaks. To perform training you need at least 4GB RAM (at least 1GB should be free). To have adequate model performance you should to re-run the notebook (training) for at least a couple times, i.e. "Kernel-Restart&Run All" for at least several times or you can use pretrained "pinjointed2.ckpt" file.

In [1]:
import numpy as np
import math
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
import time
import subprocess
import random

  from ._conv import register_converters as _register_converters


In [2]:
PATH_DAT="C:\\Temp\\brdg.dat"
PATH_RES="C:\\Temp\\brdg.res"

## Pre- and post-processing of FE model

In [3]:
def read_dat(path):
    
    # nels - the number of elements
    # nn  - total number of nodes in problem
    # ndim - number of dimensions
    # np_types - number of different property types
    # prop - element properties matrix
    # g_coord - nodal coordinates
    # g_num  - element connectivity
    # nr - number of restraints
    # nf - nodal freedom array
    # loaded_nodes - the number of nodes with forces applied
    # loads
    # fixed_freedoms - number of fixed freedoms
       
    f=open(path,"r")
    nels,nn,ndim,np_types=np.array(f.readline().split()).astype(int)
    prop=float(f.readline())
    g_coord=[]
#     print(math.ceil(nn/3))
    for i in range(math.ceil(nn/3)):
        g_coord+=f.readline().split()
    g_coord=np.array(g_coord).astype(float)
    g_num =[]
    for i in range(math.ceil(nels/5)):
        g_num +=f.readline().split()
    g_num =np.array(g_num).astype(int)

    nr=int(f.readline())
    nf=np.array(f.readline().split()).astype(int)
    loaded_nodes =int(f.readline())
    loads=np.array(f.readline().split()).astype(float) # loads[0] should be type int
    fixed_freedoms=int(f.readline())
    f.close()  
    return nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms  

In [4]:
# nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms =read_dat(PATH_DAT)

In [5]:
# g_coord

In [6]:
def alter_dat(path, position, action, opt, coord, dx=0.1, change_nodes=[3,4,6,8]):
    
    nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms=read_dat(path)
    
    if opt==0:
        if action==0:
            g_coord[int(2*change_nodes[position]-2)]+=dx
        elif action==1:
            g_coord[int(2*change_nodes[position]-2)]-=dx
        elif action==2:
            g_coord=g_coord
    elif opt==1:
        g_coord=coord
        
    f=open(path, "r+") 
    all_lines=f.readlines() 
            
    for j in range(len(all_lines)):
        all_lines[j]=all_lines[j].rstrip("\n") 
        
    f.seek(0)
    
    for i in range(len(all_lines)):
        try:
            if any(i==x for x in range(2,math.ceil(nn/3)+2)):
                f.writelines("{} {}  {} {}  {} {}\n".format(g_coord[0+(i-2)*6],g_coord[1+(i-2)*6],g_coord[2+(i-2)*6],g_coord[3+(i-2)*6],g_coord[4+(i-2)*6],g_coord[5+(i-2)*6]))
            else:
                f.writelines(all_lines[i]+"\n")
        except IndexError:
            print("I am here")
            try:
                f.writelines("{} {}  {} {}  \n".format(g_coord[0+(i-2)*6],g_coord[1+(i-2)*6],g_coord[2+(i-2)*6],g_coord[3+(i-2)*6]))
            except IndexError:
                f.writelines("{} {}  \n".format(g_coord[0+(i-2)*6],g_coord[1+(i-2)*6]))
    f.close()
                         
    return position, g_coord

In [7]:
# position, g_coord= alter_dat(PATH_DAT, 3, 2, 0, np.array([0.0,0.0,2.0,3.0,2.0,0.0,5.0,3.0,7.0,0.0,9.0,3.0,12.0,3.0,12.0,0.0,14.0,0.0]), dx=0.1, change_nodes=[3,4,6,8])

In [8]:
# position, g_coord

In [9]:
def read_res(path,nn):
    f=open(path, "r")
    all_lines=f.readlines() 
    f.close()
#     print(nn)
    A=np.zeros(shape=(nn,2))
    for i in range(nn):
        A[i][0]=all_lines[i+3].split()[1]
        A[i][1]=all_lines[i+3].split()[2]
    return A 

In [10]:
# A=read_res(PATH_RES,9)

In [11]:
# A

In [12]:
# this function must be tailored to different FE models
def observe(position, coord,l1,l2,res):    
    return position,coord[4]/l2,coord[6]/l1,coord[10]/l1,coord[14]/l2, res.reshape(res.shape[0]*res.shape[1],)[9]

## Finite Element Model of pin-jointed frame structure

In [13]:
# FE model is taken from "Programming the finite element method, I. M. Smith et al, 5th edition"

In [14]:
def FEA(path):
    target=path.split(".")
    p=subprocess.Popen("C:\\Temp\\p42.exe {}".format(target[0]), \
                       stdout=subprocess.PIPE, shell=False, stderr=subprocess.STDOUT)
    p.wait()

## Neural Network Policy - Policy Gradients

In [15]:
# Details of model can be found in the book:
# Hands-On Machine Learning with Scikit-Learn & TensorFlow. Aurйlien Gйron

In [16]:
# the NN architecture must be tailored to different FE models

In [17]:
n_inputs = 6 
n_hidden = 50 
n_outputs = 3 
initializer = tf.contrib.layers.variance_scaling_initializer()

learning_rate = 0.001

# Build the neural network
X_ = tf.placeholder(tf.float64, shape=[None, n_inputs], name="X_")
hidden = fully_connected(X_, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)
hidden1 = fully_connected(hidden, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)
logits = fully_connected(hidden1, n_outputs, activation_fn=None, weights_initializer=initializer)
outputs = tf.nn.softmax(logits, name="Y_proba")

# Select a random action based on the estimated probabilities
action = tf.multinomial(tf.log(outputs), num_samples=1,output_dtype=tf.int32)

y=tf.reshape(tf.one_hot(action,depth=3,dtype=tf.float64),[3,1])
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=tf.transpose(logits))

optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(xentropy)
gradients = [grad for grad, variable in grads_and_vars]
gradient_placeholders = []
grads_and_vars_feed = []
for grad, variable in grads_and_vars:
    gradient_placeholder = tf.placeholder(tf.float64, shape=grad.get_shape())
    gradient_placeholders.append(gradient_placeholder)
    grads_and_vars_feed.append((gradient_placeholder, variable))

training_op = optimizer.apply_gradients(grads_and_vars_feed)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [18]:
def discount_rewards(rewards, discount_rate=0.97):
    discounted_rewards = np.empty(len(rewards))
    cumulative_rewards = 0
    for step in reversed(range(len(rewards))):
        cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
        discounted_rewards[step] = cumulative_rewards
    return discounted_rewards

In [19]:
def discount_and_normalize_rewards(all_rewards, discount_rate=0.97):
    all_discounted_rewards = [discount_rewards(rewards) for rewards in all_rewards]
    flat_rewards = np.concatenate(all_discounted_rewards)
    reward_mean = flat_rewards.mean()
    reward_std = flat_rewards.std()
    return [(discounted_rewards - reward_mean)/reward_std for discounted_rewards in all_discounted_rewards]

In [20]:
# this function must be tailored to different FE models

def reward_(obs_,obs):   
    if abs(obs_[5])>abs(obs[5]):  
        return 1
    else:
        return 0


In [21]:
# the training code must be tailored to different FE models

In [22]:
n_iterations =21 #250 # number of training iterations
n_max_steps = 300 #1000 # max steps per episode
n_games_per_update = 10 # train the policy every 10 episodes
save_iterations = 5 # save the model every 10 training iterations


with tf.Session() as sess:
    start=time.time()
    init.run() 
    
    saver.restore(sess, tf.train.latest_checkpoint("./policy2/"))    
    tf.get_default_graph()
    
    for iteration in range(n_iterations):
               
        all_rewards = [] # all sequences of raw rewards for each episode
        all_gradients = [] # gradients saved at each step of each episode
             
        for game in range(n_games_per_update):
            current_rewards = [] # all raw rewards from the current episode
            current_gradients = [] # all gradients from the current episode
            
            pst=random.randint(0,3)
            position, g_coord= alter_dat(PATH_DAT, pst, 4, 1, np.array([0.0,0.0,2.0,3.0,2.0,0.0,5.0,3.0,7.0,0.0,9.0,3.0,12.0,3.0,12.0,0.0,14.0,0.0]),
                                          dx=0.1, change_nodes=[3,4,6,8])
            FEA(PATH_DAT)
            res=read_res(PATH_RES,len(g_coord)//2)
            obs=observe(position,g_coord,10,14,res)
            
            for step in range(n_max_steps):
                action_val, gradients_val = sess.run([action, gradients],
                                                     feed_dict={X_: np.array(obs).reshape(1,n_inputs)}) 
                obs_=obs
                position, g_coord= alter_dat(PATH_DAT, position, action_val[0][0], 0, np.array([]),
                                          dx=0.1, change_nodes=[3,4,6,8])
                
                position=random.randint(0,3)
                
                if g_coord[4]<g_coord[0]+0.2 or g_coord[4]>g_coord[8]-0.2:
                    break
                
                if g_coord[6]<g_coord[2]+0.2 or g_coord[6]>g_coord[12]-0.2:
                    break
                    
                if g_coord[10]<g_coord[2]+0.2 or g_coord[10]>g_coord[12]-0.2:
                    break
                    
                if g_coord[14]<g_coord[8]+0.2 or g_coord[14]>g_coord[16]-0.2:
                    break    
                    
                FEA(PATH_DAT)
                res=read_res(PATH_RES,len(g_coord)//2)
                obs=observe(position,g_coord,10,14,res)               
                reward=reward_(obs_,obs)
                
                current_rewards.append(reward)
                current_gradients.append(gradients_val)

            all_rewards.append(current_rewards)
            all_gradients.append(current_gradients)

    
            
        # At this point we have run the policy for 10 episodes, and we are
        # ready for a policy update using the algorithm described earlier.
        all_rewards = discount_and_normalize_rewards(all_rewards)
        
        
        
        feed_dict = {}
        for var_index, grad_placeholder in enumerate(gradient_placeholders):
            # multiply the gradients by the action scores, and compute the mean
            mean_gradients = np.mean([reward * all_gradients[game_index][step][var_index] 
                                      for game_index, rewards in enumerate(all_rewards)
                                      for step, reward in enumerate(rewards)],axis=0)
            feed_dict[grad_placeholder] = mean_gradients
        
        
        sess.run(training_op, feed_dict=feed_dict)
        
        if iteration % save_iterations == 0:
#             print("Saving {} iteration".format(iteration))
            print('Time taken for {} epoch {} sec\n'.format(iteration, time.time() - start))
            saver.save(sess, "./policy2/pinjointed2.ckpt")

# end=time.time()            

INFO:tensorflow:Restoring parameters from ./policy2/pinjointed2.ckpt
Time taken for 0 epoch 974.8577587604523 sec

Time taken for 5 epoch 5860.085177659988 sec

Time taken for 10 epoch 11748.592981100082 sec

Time taken for 15 epoch 18522.683436870575 sec

Time taken for 20 epoch 26488.97808289528 sec



In [23]:
# print(end-start)

## AI designing the bridge

In [24]:
def predict(path,path1,l1,l2,coord=np.array([0.0,0.0,2.0,3.0,2.0,0.0,5.0,3.0,7.0,0.0,9.0,3.0,12.0,3.0,12.0,0.0,14.0,0.0])):
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph('./policy2/pinjointed2.ckpt.meta')
        saver.restore(sess, "./policy2/pinjointed2.ckpt") 

        graph = tf.get_default_graph()
        outputs = graph.get_tensor_by_name("Y_proba:0") 
        X_ = graph.get_tensor_by_name("X_:0") 
                
        pst=random.randint(0,3)
        position, g_coord= alter_dat(path, pst, 4, 1, coord,
                                      dx=0.1, change_nodes=[3,4,6,8])
        FEA(PATH_DAT)
        res=read_res(path1,len(g_coord)//2)
        obs=observe(position,g_coord,10,14,res)
        
        
        
        
        for step in range(50):
            action_val= sess.run([outputs],feed_dict={X_: np.array(obs).reshape(1,n_inputs)})
            position, g_coord= alter_dat(path, position, np.argmax(action_val), 0, np.array([]),
                                      dx=0.1, change_nodes=[3,4,6,8])
        
            position=random.randint(0,3)
            
            if g_coord[4]<g_coord[0]+0.2 or g_coord[4]>g_coord[8]-0.2:
                break

            if g_coord[6]<g_coord[2]+0.2 or g_coord[6]>g_coord[12]-0.2:
                break

            if g_coord[10]<g_coord[2]+0.2 or g_coord[10]>g_coord[12]-0.2:
                break

            if g_coord[14]<g_coord[8]+0.2 or g_coord[14]>g_coord[16]-0.2:
                break    

            FEA(path)
            res=read_res(path1,len(g_coord)//2)
            obs=observe(position,g_coord,l1,l2,res)
        
        return obs        

In [25]:
predict(PATH_DAT,PATH_RES,10,14)

INFO:tensorflow:Restoring parameters from ./policy2/pinjointed2.ckpt


(1,
 0.2785714285714287,
 0.5599999999999998,
 0.7700000000000005,
 0.7714285714285717,
 -0.0008572)

In [29]:
# Benchmark from initial design:
-0.8888E-03

-0.0008888

In [27]:
nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms =read_dat(PATH_DAT)

In [28]:
g_coord.reshape(nn,2)

array([[ 0. ,  0. ],
       [ 2. ,  3. ],
       [ 3.9,  0. ],
       [ 5.6,  3. ],
       [ 7. ,  0. ],
       [ 7.7,  3. ],
       [12. ,  3. ],
       [10.8,  0. ],
       [14. ,  0. ]])

![title](./Bridge.png "ShowMyImage")