CAUTION: this code has serious memory leaks. To perform training you need at least 4GB RAM (at least 1GB should be free). To have adequate model performance you need to re-run the notebook (training) for at least several times, i.e. "Kernel-Restart&Run All" for at least several times or you can use pretrained "pinjointed1.ckpt" file.

This model tries to optimize the position of middle element by allowing its movement along horizontal axis in order to have displacements in the structure within acceptable limits

In [1]:
# For more details on the model check "Engineering design of 
# 1D rod and 2D pin-jointed frame structure driven by reinforcement 
# learning and finite element analysis"

## Pre- and post-processing of FE model

In [2]:
import numpy as np
import math
from shutil import copyfile
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
import time
import os
import subprocess
import gc

  from ._conv import register_converters as _register_converters


In [3]:
# PATH0="C:\\Temp\\test0.dat"
PATH="C:\\Temp\\test.dat"
PATH1="C:\\Temp\\test.res"

PATH2 ="C:\\Temp\\test1.dat"
PATH3 ="C:\\Temp\\test1.res"

In [4]:
def read_dat(path):
    
    # nels - the number of elements
    # nn  - total number of nodes in problem
    # ndim - number of dimensions
    # np_types - number of different property types
    # prop - element properties matrix
    # g_coord - nodal coordinates
    # g_num  - element connectivity
    # nr - number of restraints
    # nf - nodal freedom array
    # loaded_nodes - the number of nodes with forces applied
    # loads
    # fixed_freedoms - number of fixed freedoms

    f=open(path,"r")
    nels,nn,ndim,np_types=np.array(f.readline().split()).astype(int)
    prop=float(f.readline())
    g_coord=[]
    for i in range(math.ceil(nn/3)):
        g_coord+=f.readline().split()
    g_coord=np.array(g_coord).astype(float)
    g_num =[]
    for i in range(math.ceil(nels/5)):
        g_num +=f.readline().split()
    g_num =np.array(g_num ).astype(int)

    nr=int(f.readline())
    nf=np.array(f.readline().split()).astype(int)
    loaded_nodes =int(f.readline())
    loads=np.array(f.readline().split()).astype(float) # loads[0] should be type int
    fixed_freedoms=int(f.readline())
    f.close()  
    return nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms  

In [5]:
# nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms=read_dat(PATH)

In [6]:
# def insert_element_coord(g_coord,nn):
#     g_coord_=g_coord.reshape(nn,2)
#     min_x=g_coord_.min(axis=0)[0]
#     max_x=g_coord_.max(axis=0)[0]
#     min_y=g_coord_.min(axis=0)[1]
#     max_y=g_coord_.max(axis=0)[1]
#     new_el_x=(max_x-min_x)/2
#     new_el_y=max_y-min_y
#     for i in range(0,len(g_coord),2):
#         if g_coord[i]<new_el_x:
#             ins=i      
#     a=g_coord.tolist()
#     a.insert(ins+2,new_el_x) 
#     a.insert(ins+3,0) 
#     a.insert(ins+4,new_el_x) 
#     a.insert(ins+5,new_el_y) 
#     return np.array(a)       
                   
    

In [7]:
# insert_element_coord(g_coord,nn)

In [8]:
def alter_dat(action,path,fl):
    dx=0.1
    nels,nn,ndim,np_types,prop,g_coord,g_num,nr,nf,loaded_nodes,loads,fixed_freedoms=read_dat(path)
    flag=0
    
    if fl==0:
        if action==0: 
            g_coord[4]=g_coord[4]-dx
        elif action==1:  
            g_coord[4]=g_coord[4]+dx
        elif action==2:  
            g_coord[8]=g_coord[8]-dx 
        elif action==3:
            g_coord[8]=g_coord[8]+dx
        elif action==4:
            g_coord=g_coord
    elif fl==1:        
        g_coord=np.array([ 0.,3.,4.,0.,5.,3.,8.,3.,5.,0.,12.,0.])
    elif fl==2:    
        g_coord=np.array([ 0.,3.,4.,0.,5.,3.,10.,3.,5.,0.,15.,0.])
        
    if (g_coord[8]>g_coord[10]-2*dx)or(g_coord[8]<g_coord[3]+2*dx):
        flag=1
        return flag, g_coord 
        
    if (g_coord[4]>g_coord[6]-2*dx)or(g_coord[4]<g_coord[0]+2*dx):  
        flag=1
        return flag, g_coord 
                
    f=open(path, "r") 
    all_lines=f.readlines() 
    f.close()

    for j in range(len(all_lines)):
        all_lines[j]=all_lines[j].rstrip("\n") 
    
    f=open(path, "w")   
    for i in range(len(all_lines)):
        if i ==2:
            f.writelines("{} {}  {} {}  {} {}\n".format(g_coord[0],g_coord[1],g_coord[2],g_coord[3],g_coord[4],g_coord[5]))
            f.writelines("{} {}  {} {}  {} {}\n".format(g_coord[6],g_coord[7],g_coord[8],g_coord[9],g_coord[10],g_coord[11]))
        elif i!=3:
            f.writelines(all_lines[i]+"\n")
    f.close() 

    return flag, g_coord     

In [9]:
# alter_dat(3,PATH,1)

In [10]:
def read_res(path):
    f=open(path, "r")
    all_lines=f.readlines() 
    f.close()

    A=np.zeros(shape=(6,2))
    A[0][0]=all_lines[3].split()[1]
    A[0][1]=all_lines[3].split()[2]
    A[1][0]=all_lines[4].split()[1]
    A[1][1]=all_lines[4].split()[2]
    A[2][0]=all_lines[5].split()[1]
    A[2][1]=all_lines[5].split()[2]
    A[3][0]=all_lines[6].split()[1]
    A[3][1]=all_lines[6].split()[2]
    A[4][0]=all_lines[7].split()[1]
    A[4][1]=all_lines[7].split()[2]
    A[5][0]=all_lines[8].split()[1]
    A[5][1]=all_lines[8].split()[2]
    return A    

In [11]:
#abs(read_res(PATH1)).max()

In [12]:
# sum(abs(read_res(PATH1).reshape(12,)))

In [13]:
# def copy_file(path0,path):
#     copyfile(path0,path)

In [14]:
def observe(coord,l1,l2,res):    
    return coord[4]/l1,coord[8]/l2, res.reshape(12,)[-1]

## Finite Element Model of pin-jointed frame structure

In [15]:
# FE model is taken from "Programming the finite element method, I. M. Smith et al, 5th edition"

In [16]:
def FEA(path):
    target=path.split(".")
    p=subprocess.Popen("C:\\Temp\\p42.exe {}".format(target[0]), \
                       stdout=subprocess.PIPE, shell=False, stderr=subprocess.STDOUT)
    p.wait()
#     return p.wait()

In [17]:
FEA(PATH)

## Neural Network Policy - Policy Gradients

In [18]:
# Details of model can be found in the book:
# Hands-On Machine Learning with Scikit-Learn & TensorFlow. Aurйlien Gйron

In [19]:
n_inputs = 3 
n_hidden = 50 
n_outputs = 5 
initializer = tf.contrib.layers.variance_scaling_initializer()

learning_rate = 0.001

# Build the neural network
X_ = tf.placeholder(tf.float64, shape=[None, n_inputs], name="X_")
hidden = fully_connected(X_, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)
hidden1 = fully_connected(hidden, n_hidden, activation_fn=tf.nn.elu, weights_initializer=initializer)
logits = fully_connected(hidden1, n_outputs, activation_fn=None, weights_initializer=initializer)
outputs = tf.nn.softmax(logits, name="Y_proba")

# Select a random action based on the estimated probabilities
action = tf.multinomial(tf.log(outputs), num_samples=1,output_dtype=tf.int32)

y=tf.reshape(tf.one_hot(action,depth=5,dtype=tf.float64),[5,1])
xentropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=tf.transpose(logits))

optimizer = tf.train.AdamOptimizer(learning_rate)
grads_and_vars = optimizer.compute_gradients(xentropy)
gradients = [grad for grad, variable in grads_and_vars]
gradient_placeholders = []
grads_and_vars_feed = []
for grad, variable in grads_and_vars:
    gradient_placeholder = tf.placeholder(tf.float64, shape=grad.get_shape())
    gradient_placeholders.append(gradient_placeholder)
    grads_and_vars_feed.append((gradient_placeholder, variable))

training_op = optimizer.apply_gradients(grads_and_vars_feed)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

In [20]:
def discount_rewards(rewards, discount_rate=0.97):
    discounted_rewards = np.empty(len(rewards))
    cumulative_rewards = 0
    for step in reversed(range(len(rewards))):
        cumulative_rewards = rewards[step] + cumulative_rewards * discount_rate
        discounted_rewards[step] = cumulative_rewards
    return discounted_rewards

In [21]:
def discount_and_normalize_rewards(all_rewards, discount_rate=0.97):
    all_discounted_rewards = [discount_rewards(rewards) for rewards in all_rewards]
    flat_rewards = np.concatenate(all_discounted_rewards)
    reward_mean = flat_rewards.mean()
    reward_std = flat_rewards.std()
    return [(discounted_rewards - reward_mean)/reward_std for discounted_rewards in all_discounted_rewards]

In [22]:
def reward_(obs_,obs):
    
    if abs(obs_[2])>abs(obs[2]):  
        return 1
    else:
        return 0


In [23]:
# import tracemalloc
# import linecache

# def display_top(snapshot, key_type='lineno', limit=3):
#     snapshot = snapshot.filter_traces((
#         tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
#         tracemalloc.Filter(False, "<unknown>"),
#     ))
#     top_stats = snapshot.statistics(key_type)

#     print("Top %s lines" % limit)
#     for index, stat in enumerate(top_stats[:limit], 1):
#         frame = stat.traceback[0]
#         # replace "/path/to/module/file.py" with "module/file.py"
#         filename = os.sep.join(frame.filename.split(os.sep)[-2:])
#         print("#%s: %s:%s: %.1f KiB"
#               % (index, filename, frame.lineno, stat.size / 1024))
#         line = linecache.getline(frame.filename, frame.lineno).strip()
#         if line:
#             print('    %s' % line)

#     other = top_stats[limit:]
#     if other:
#         size = sum(stat.size for stat in other)
#         print("%s other: %.1f KiB" % (len(other), size / 1024))
#     total = sum(stat.size for stat in top_stats)
#     print("Total allocated size: %.1f KiB" % (total / 1024))


In [24]:
n_iterations =21 #250 # number of training iterations
n_max_steps = 300 #1000 # max steps per episode
n_games_per_update = 10 # train the policy every 10 episodes
save_iterations = 5 # save the model every 10 training iterations

start=time.time()


with tf.Session() as sess:

    init.run()      
    saver.restore(sess, "./policy1/pinjointed1.ckpt")
    for iteration in range(n_iterations):
            
        all_rewards = [] # all sequences of raw rewards for each episode
        all_gradients = [] # gradients saved at each step of each episode
             
        for game in range(n_games_per_update):
            current_rewards = [] # all raw rewards from the current episode
            current_gradients = [] # all gradients from the current episode
            
            flag, g_coord=alter_dat(4,PATH,1)
            FEA(PATH)
            res=read_res(PATH1)
            obs=observe(g_coord,8,8,res)
            
#             tracemalloc.start()################################################
            for step in range(n_max_steps):
                action_val, gradients_val = sess.run([action, gradients],
                                                     feed_dict={X_: np.array(obs).reshape(1,n_inputs)}) 
                obs_=obs
                flag, g_coord=alter_dat(action_val[0][0],PATH,0)
                if flag==1: 
                    break
                    
                FEA(PATH)
                res=read_res(PATH1)
                obs=observe(g_coord,8,8,res)               
                reward=reward_(obs_,obs)
                
                current_rewards.append(reward)
                current_gradients.append(gradients_val)
#             snapshot = tracemalloc.take_snapshot()#################################
#             display_top(snapshot)#####################################
        
            all_rewards.append(current_rewards)
            all_gradients.append(current_gradients)

    
            
        # At this point we have run the policy for 10 episodes, and we are
        # ready for a policy update using the algorithm described earlier.
        all_rewards = discount_and_normalize_rewards(all_rewards)
        
        feed_dict = {}
        for var_index, grad_placeholder in enumerate(gradient_placeholders):
            # multiply the gradients by the action scores, and compute the mean
            mean_gradients = np.mean([reward * all_gradients[game_index][step][var_index] 
                                      for game_index, rewards in enumerate(all_rewards)
                                      for step, reward in enumerate(rewards)],axis=0)
            feed_dict[grad_placeholder] = mean_gradients
        
        
        sess.run(training_op, feed_dict=feed_dict)
        
        if iteration % save_iterations == 0:
            print("Saving {} iteration".format(iteration))
            saver.save(sess, "./policy1/pinjointed1.ckpt")

end=time.time()            

INFO:tensorflow:Restoring parameters from ./policy1/pinjointed1.ckpt
Saving 0 iteration
Saving 5 iteration
Saving 10 iteration
Saving 15 iteration
Saving 20 iteration


In [25]:
print(end-start)

3398.6333904266357


In [44]:
def predict(path,path1,op,l1,l2):
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph('./policy1/pinjointed1.ckpt.meta')
        saver.restore(sess, "./policy1/pinjointed1.ckpt") 

        graph = tf.get_default_graph()
        outputs = graph.get_tensor_by_name("Y_proba:0") 
        X_ = graph.get_tensor_by_name("X_:0") 
        
        flag, g_coord=alter_dat(4,path,op)
        FEA(path)
        res=read_res(path1)
        obs=observe(g_coord,l1,l2,res)
        
        for step in range(50):
            action_val= sess.run([outputs],feed_dict={X_: np.array(obs).reshape(1,n_inputs)})
            flag, g_coord=alter_dat(np.argmax(action_val),path,0)
#             print(np.argmax(action_val))
            if flag==1: 
                break  
            FEA(path)
            res=read_res(path1)
            obs=observe(g_coord,l1,l2,res)
        
        return obs        

In [45]:
predict(PATH,PATH1,1,8,8)

INFO:tensorflow:Restoring parameters from ./policy1/pinjointed1.ckpt


(0.625, 0.4500000000000004, -0.006606)

In [39]:
# check "test.dat" to see how the geometry of the structure has changed

In [40]:
predict(PATH2,PATH3,2,10,11)

INFO:tensorflow:Restoring parameters from ./policy1/pinjointed1.ckpt


(0.5, 0.363636363636364, -0.01368)

In [41]:
# check "test1.dat" to see how the geometry of the new structure has changed