In [1]:
%matplotlib inline
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import r2_score 
from sklearn.model_selection import train_test_split

# Reading Data

The following method takes input the dataset name and does the follow two things:

## Discretising Data
To reduce the computational load of the problem, we have only taken data at every 1% strain step instead of all timesteps for which the FE Simulation runs.

- `OUT_FILE_PATH`: Location of the `out.csv` for a dataset, which is output at the end of FE simulations
- `DATA_FILE_PATH`: Data file generated using the seacas-exodus python script
- `out_file["eff_strain"]` - contains all values of applied strain at every timestep, we look for timesteps at which there is 1\% increment in strain and record them. 
- `df` - contains all the data as read from the FE output file.
- `df_steps` - contains the data only at 1\% strain steps 
- `df_steps_norm` - contains the normalised data only at 1\% strain steps

## Grouping Data
The method returns a pandas grouped object with data grouped with respect to x and y coordinates. So all the variable values with the same x and y are present together.

- `df_group`: Pandas grouped object returned by the function

In [2]:
def read_data(dataset):
    OUT_FILE_PATH = "/home/sarthak/projects/seacas-exodus/lib/soudip_dataset/%s/out.csv" % dataset
    DATA_FILE_PATH = "/home/sarthak/projects/model_training/new_data/%s.csv" % dataset
    print(OUT_FILE_PATH)
    print(DATA_FILE_PATH)

    out_file = pd.read_csv(OUT_FILE_PATH)
    lim = 0.00
    timesteps = []
    time = []
    for i in range(len(out_file)):
        if(out_file["eff_strain"][i] >= lim):
            timesteps.append(i+1)
            time.append(out_file["time"][i])
            lim = lim + 0.01

    print("Reading %s \n" % dataset)
    # Reading data
    df = pd.read_csv(DATA_FILE_PATH)
    df_dropped = df.drop(columns=['strain_yy', 'phases', 'pressure', 'sdv22', 'sdv23',
                                  'total_strain_xy', 'elem_id', 'blk_id', 'total_stress_xx', 'total_stress_yy',
                                  'total_strain_xx', 'total_strain_yy' ])
    df_norm = (df_dropped-df_dropped.min())/(df_dropped.max()-df_dropped.min())
    df_norm.tail()
    scale_steps = int(np.ceil(len(out_file)/(len(df_dropped)/160000)))
    steps = np.ceil((np.array(timesteps)/scale_steps))
    df_steps = df_dropped[df_dropped["time"].isin(steps)] 
    df_steps_norm = (df_steps-df_steps.min())/(df_steps.max()-df_steps.min())
    df_steps_norm = df_steps_norm.drop(columns=['time'])
    df_group = df_steps_norm.groupby(["elem_x", "elem_y"])
    return df_group

# Structuring Data

`series_to_supervised`: takes in a sequence, the number of steps to be used as inputs and the number of steps in output. It restructures a series and return it in a format for supervised learning problem. The function returns two arrays, one is the input (x) and the other one is output associated with each input.

`structure_data`: takes the pandas grouped object returned by the `read_data` function and structures it for training. Our model is designed such that it takes two steps and predicts the next 13 steps using the data

| Input | Output |
| :---: | :-----: |
| X<sub>1</sub>, X<sub>2</sub> | X<sub>3</sub> ..... X<sub>15</sub> |
| X<sub>2</sub>, X<sub>3</sub> | X<sub>4</sub> ..... X<sub>16</sub> |
| . | . |
| . | . |
| . | . |

Each X<sub>n</sub> is of the shape (samples, timesteps, variables) and are 3 dimensional tensors

In [3]:
def series_to_supervised(sequences, n_steps_in, n_steps_out, dropnan=True):
    X, y = list(), list()
    for i in range(len(sequences)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out
        # check if we beyond the dataset
        if out_end_ix > len(sequences):
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = sequences[i:end_ix, :], sequences[end_ix: out_end_ix, :]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

def structure_data(df_group):
    x_raw_list = []
    y_raw_list = []
    for name, group in df_group:
        strain_vals = group['eff_strain'].values
        stress_vals = group['vonmises'].values
        tri_vals = group['triaxiality'].values
        all_vals = np.stack((strain_vals, stress_vals, tri_vals), axis = 1 )
        x, y = series_to_supervised(all_vals, 2, 13)
        x_raw_list.append(x)
        y_raw_list.append(y)
    x_vals = np.concatenate(x_raw_list)
    y_vals = np.concatenate(y_raw_list)
    return x_vals, y_vals, x_raw_list, y_raw_list

In [4]:
df_group = read_data('AR1')
x_vals, y_vals, x_raw_list, y_raw_list = structure_data(df_group)

df_group_2 = read_data('test61')
x_vals_2, y_vals_2, x_raw_list_2, y_raw_list_2 = structure_data(df_group_2)

/home/sarthak/projects/seacas-exodus/lib/soudip_dataset/AR1/out.csv
/home/sarthak/projects/model_training/new_data/AR1.csv
Reading AR1 

/home/sarthak/projects/seacas-exodus/lib/soudip_dataset/test61/out.csv
/home/sarthak/projects/model_training/new_data/test61.csv
Reading test61 



In [5]:
# Put together data from different datasets for training
x_vals = np.concatenate((x_vals, x_vals_2))
y_vals = np.concatenate((y_vals, y_vals_2))
#n_steps_in, n_steps_out = 1, 1
print(x_vals.shape)
print(y_vals.shape)
n = x_vals.shape[0]

(1280000, 2, 3)
(1280000, 13, 3)


In [6]:
# Splitting data into train, test and validation data
train_x = x_vals[0:int(0.7*n), :, :]
train_y = y_vals[0:int(0.7*n), :, :]
test_x = x_vals[int(0.7*n):int(0.9*n), :, :]
test_y = y_vals[int(0.7*n):int(0.9*n), :, :]
val_x = x_vals[int(0.9*n):, :, :]
val_y = y_vals[int(0.9*n):, :, :]
print(train_x.shape[2])

3


# Defining the model
The model along with all the hyperparameters is defined below. In the below example the hyper parameters are as follows:

- Number of layers: 8
- Number of LSTM unit in each layer: 100
- Activation function: `relu`
- Optimisation function: `adam`
- Loss function: `mse`
- Epochs: 50
- `return_sequences`: is given as `True` because we require he output of the LSTM at each timestep and not just the last one. 
- `history`: stores information about the training process of the model

In [7]:
def get_compiled_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.LSTM(100, activation='relu', input_shape=(2, 3)))
    model.add(tf.keras.layers.RepeatVector(13))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.LSTM(100, activation='relu', return_sequences=True))
    model.add(tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(train_x.shape[2])))
    model.compile(optimizer='adam', loss='mse')
    return model
model = get_compiled_model()
history = model.fit(train_x, train_y, epochs=50, shuffle=True)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Saving the model
The trained model is saved in the directory created above for future analysis and making predictions. A brief summary of the model architecture is also stored in a text file called `model_summary.txt`. 

In [8]:
print(model.evaluate(val_x, val_y))
base_path = '/home/sarthak/projects/model_training/trained_models/window_lstm_AR1_61'
model.save(base_path+'/the_model')
with open(base_path + '/model_summary.txt','w') as fh:
    # Pass the file handle in as a lambda function to make it callable
    model.summary(print_fn=lambda x: fh.write(x + '\n'))

0.0003447342023719102
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: /home/sarthak/projects/model_training/trained_models/window_lstm_AR1_61/the_model/assets


# Making Predictions

The first part of this functions is similar to the `read_data` function. It loads the data for a new microsructure and structures it in the format used by the model. Then the model uses the first two time steps to predict 13 times steps into the future. The predictions are compared with the true values and the Mean Absolute Error and R2 values are calculated for the final time step.

In [9]:
def make_prediction(dataset, start, stop, model):
    OUT_FILE_PATH = "/home/sarthak/projects/seacas-exodus/lib/soudip_dataset/%s/out.csv" % dataset
    DATA_FILE_PATH = "/home/sarthak/projects/model_training/new_data/%s.csv" % dataset
    print(OUT_FILE_PATH)
    print(DATA_FILE_PATH)

    out_file = pd.read_csv(OUT_FILE_PATH)
    lim = 0.00
    timesteps = []
    time = []
    for i in range(len(out_file)):
        if(out_file["eff_strain"][i] >= lim):
            timesteps.append(i+1)
            time.append(out_file["time"][i])
            lim = lim + 0.01

    print("Reading %s \n" % dataset)
    # Reading data
    df = pd.read_csv(DATA_FILE_PATH)
    df_dropped = df.drop(columns=['strain_yy', 'phases', 'pressure', 'sdv22', 'sdv23',
                                  'total_strain_xy', 'elem_id', 'blk_id', 'total_stress_xx', 'total_stress_yy',
                                  'total_strain_xx', 'total_strain_yy' ])
    df_norm = (df_dropped-df_dropped.min())/(df_dropped.max()-df_dropped.min())
    df_norm.tail()
    scale_steps = int(np.ceil(len(out_file)/(len(df_dropped)/160000)))
    steps = np.ceil((np.array(timesteps)/scale_steps))
    df_steps = df_dropped[df_dropped["time"].isin(steps)] 
    df_steps_norm = (df_steps-df_steps.min())/(df_steps.max()-df_steps.min())
    df_steps_norm = df_steps_norm.drop(columns=['time'])
    df_group = df_steps_norm.groupby(["elem_x", "elem_y"])
    
    x_vals, y_vals, x_raw_list, y_raw_list = structure_data(df_group)
    #df_group = df_group[["eff_strain", "vonmises", "triaxiality"]]
    #first_step = df_group.nth(start).values.reshape(160000, 1, 3)
    input_step = np.array(x_raw_list)[:,start,:,:]
    #next_step = model.predict(first_step)
    next_step = model.predict(input_step)
    input_step = np.concatenate(((np.delete(input_step, 0, 1)), next_step), axis = 1)
    true_val = np.array(y_raw_list)
    ans = true_val[:,0,:,:]
    variables = ["eff_strain", "vonmises", "triaxiality"]
    for j in range(0,3):
        var_name = variables[j]
        true_scaled = (ans[:,stop,j]*(df_steps[var_name].max() - df_steps[var_name].min())) + df_steps[var_name].min()
        pred_scaled = (next_step[:,stop,j]*(df_steps[var_name].max() - df_steps[var_name].min())) + df_steps[var_name].min()
        print(r2_score(true_scaled, pred_scaled))
        print(np.sum(abs(true_scaled - pred_scaled))/160000)

In [34]:
make_prediction('test65', 0, 12, model)

/home/sarthak/projects/seacas-exodus/lib/soudip_dataset/test65/out.csv
/home/sarthak/projects/model_training/new_data/test65.csv
Reading test65 

0.8789629454249992
0.017838955827673208
0.9949015731208434
26.226886561781676
0.8993939228554485
0.08973756983621067
