In [30]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D
from keras.layers.pooling import GlobalMaxPooling1D
from keras.layers import Bidirectional
from keras import initializers, regularizers, optimizers, layers
from keras.models import load_model
from sklearn import metrics
from keras.utils.np_utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import keras
import numpy as np
import pandas as pd
import os
import tensorflow as tf

In [32]:
def loadData_Tokenizer(X_train, X_test,MAX_NB_chars=150,MAX_SEQUENCE_LENGTH=200):
    np.random.seed(7)
    text = np.concatenate((X_train, X_test), axis=0)
    text = np.array(text)

    alphabet="abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-,;.!?:'\"/\\|_@#$%^&*~`+-=<>()[]{}"
    char_dict = {}
    for i, char in enumerate(alphabet):
      char_dict[char] = i + 1
    
    print("characters of the dictionary are  ")
    print(char_dict)
    tokenizer = Tokenizer(num_words=MAX_NB_chars, char_level=True, oov_token='UNK')
    # Use char_dict to replace the tk.word_index
    tokenizer.word_index = char_dict 
    # Add 'UNK' to the vocabulary 
    tokenizer.word_index[tokenizer.oov_token] = max(char_dict.values()) + 1
    tokenizer.fit_on_texts(text)
    sequences = tokenizer.texts_to_sequences(text)
    word_index = tokenizer.word_index
    text = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
    vocab_size = len(tokenizer.word_index)+1

    print('Found %s unique tokens.' % len(word_index))
    indices = np.arange(text.shape[0])
    # np.random.shuffle(indices)
    text = text[indices]
    print(text.shape)
    X_train = text[0:len(X_train), ]
    X_test = text[len(X_train):, ]
    return (X_train, X_test, word_index,vocab_size)

In [33]:
def Build_Model_RCNN_Text(word_index, nclasses, VS, MAX_SEQUENCE_LENGTH=200, EMBEDDING_DIM=100):

    kernel_size = 5
    filters = 100
    pool_size = 4
    gru_node = 128

    embed_size = EMBEDDING_DIM
    embedding_layer = Embedding(VS, embed_size, input_length=MAX_SEQUENCE_LENGTH, trainable=True)

    model = Sequential()
    model.add(embedding_layer)
    model.add(Conv1D(filters=100, kernel_size=5,padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=4))
    model.add(Bidirectional(LSTM(128, return_sequences=True,name='lstm_layer',dropout=0.3,recurrent_dropout=0.5)))
    #model.add(Bidirectional(LSTM(128, return_sequences=True,name='lstm_layer',dropout=0.4,recurrent_dropout=0.5)))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(units=512, activation="relu",
    kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
    bias_regularizer=regularizers.l2(1e-4),
    activity_regularizer=regularizers.l2(1e-5)))
    model.add(Dropout(rate=0.4))
    model.add(Dense(7, activation="softmax"))
    opt = keras.optimizers.Adam(learning_rate=0.0003,beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-7,
    amsgrad=False,
    name="Adam")
    model.compile(loss='categorical_crossentropy',
                  optimizer= opt,
                  metrics=['accuracy'])

    return model

In [34]:
def get_data_train():
  df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Data_Aug/trainset1.csv')
  df.head()
  X=df['body']
  Y = df['intent_2'].astype("category")
  cat_types= Y.cat.categories
  Y_nids = Y.cat.codes
  Y = to_categorical(Y_nids, num_classes=7)
  return X, Y

In [35]:
def get_data_test():
  df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Data_Aug/Validation_data.csv')
  df.head()
  X_t = df['body']
  Y = df['intent_2'].astype("category")
  cat_types= Y.cat.categories
  Y_nids = Y.cat.codes
  y_t = to_categorical(Y_nids, num_classes=7)
  return X_t, y_t


In [36]:
#X_train, X_test1, y_train, y_test = train_test_split(X,Y, test_size=0.2, random_state=42)
X_train,y_train= get_data_train()
X_val,y_val= get_data_test()
X_train,X_val, word_index, vocab_size = loadData_Tokenizer(X_train,X_val)
nclasses=7

model_CharRCNN2 = Build_Model_RCNN_Text(word_index, nclasses,vocab_size)


model_CharRCNN2.summary()


characters of the dictionary are  
{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, 'A': 27, 'B': 28, 'C': 29, 'D': 30, 'E': 31, 'F': 32, 'G': 33, 'H': 34, 'I': 35, 'J': 36, 'K': 37, 'L': 38, 'M': 39, 'N': 40, 'O': 41, 'P': 42, 'Q': 43, 'R': 44, 'S': 45, 'T': 46, 'U': 47, 'V': 48, 'W': 49, 'X': 50, 'Y': 51, 'Z': 52, '0': 53, '1': 54, '2': 55, '3': 56, '4': 57, '5': 58, '6': 59, '7': 60, '8': 61, '9': 62, '-': 86, ',': 64, ';': 65, '.': 66, '!': 67, '?': 68, ':': 69, "'": 70, '"': 71, '/': 72, '\\': 73, '|': 74, '_': 75, '@': 76, '#': 77, '$': 78, '%': 79, '^': 80, '&': 81, '*': 82, '~': 83, '`': 84, '+': 85, '=': 87, '<': 88, '>': 89, '(': 90, ')': 91, '[': 92, ']': 93, '{': 94, '}': 95}
Found 57 unique tokens.
(566, 200)
Model: "sequential_5"
_________________________________________________________________
Lay

In [37]:
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CharCRNN/wgt/weights.hdf5', monitor='val_loss', verbose=2, save_best_only=True, save_weights_only=False, mode='auto', period=1)

model_CharRCNN2.fit(X_train, y_train,
                              validation_data=(X_val,y_val),
                              epochs=40,
                              batch_size=32,
                              verbose=2, callbacks=[checkpointer])

Epoch 1/40
17/17 - 10s - loss: 2.0304 - accuracy: 0.1969 - val_loss: 2.0213 - val_accuracy: 0.2083

Epoch 00001: val_loss improved from inf to 2.02131, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CharCRNN/wgt/weights.hdf5
Epoch 2/40
17/17 - 5s - loss: 2.0106 - accuracy: 0.1873 - val_loss: 1.9948 - val_accuracy: 0.2500

Epoch 00002: val_loss improved from 2.02131 to 1.99480, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CharCRNN/wgt/weights.hdf5
Epoch 3/40
17/17 - 5s - loss: 1.9349 - accuracy: 0.2664 - val_loss: 1.9325 - val_accuracy: 0.3750

Epoch 00003: val_loss improved from 1.99480 to 1.93248, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CharCRNN/wgt/weights.hdf5
Epoch 4/40
17/17 - 5s - loss: 1.8132 - accuracy: 0.2876 - val_loss: 1.8926 - val_accuracy: 0.2917

Epoch 00004: val_loss improved from 1.93248 to 1.89261, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CharCRNN/

<tensorflow.python.keras.callbacks.History at 0x7f8f82c86210>

In [41]:
#score, acc = model_CharRCNN.evaluate(X_test,y_test
#                            ,batch_size=128)
#print('Test accuracy with charcrnn:', acc)

In [40]:
model_CharRCNN2.save('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/model_CharCRNN/saved_model/my_modelCharm2')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/model_CharCRNN/saved_model/my_modelCharm2/assets


In [42]:
predicted = model_CharRCNN2.predict(X_val)

In [43]:
predicted = np.argmax(predicted, axis=1)

In [48]:
 target_names = ['Dependencies between design parameters',
       'Effects of design parameters on objectives',
       'Exploration of design parameter values', 'Monitoring objective values',
       'NAN', 'Selected design parameter values for objective(s)',
       'Tradeoff between objectives']

In [50]:
print(metrics.classification_report(np.argmax(y_val, axis=1), predicted , target_names=target_names))

                                                   precision    recall  f1-score   support

           Dependencies between design parameters       0.36      0.80      0.50         5
       Effects of design parameters on objectives       0.27      0.30      0.29        10
           Exploration of design parameter values       0.00      0.00      0.00         8
                      Monitoring objective values       0.29      0.29      0.29         7
                                              NAN       0.90      0.90      0.90        10
Selected design parameter values for objective(s)       0.40      0.50      0.44         4
                      Tradeoff between objectives       0.33      0.25      0.29         4

                                         accuracy                           0.44        48
                                        macro avg       0.37      0.43      0.39        48
                                     weighted avg       0.38      0.44      0.40        

In [51]:
ans= np.array(['Dependencies between design parameters',
       'Effects of design parameters on objectives',
       'Exploration of design parameter values', 'Monitoring objective values',
       'NAN', 'Selected design parameter values for objective(s)',
       'Tradeoff between objectives'])[predicted ]

In [52]:
for x, y in zip(X_t, ans):
  print("Chat body: ",x)
  print("Predicted intent: ", y, "\n")

Chat body:  My design depends on the piston diameter, what happens if you decrease it? 
Predicted intent:  Dependencies between design parameters 

Chat body:  Does increasing the flywheel thickness increase or decrease "c"?
Predicted intent:  Dependencies between design parameters 

Chat body:  That is what my document specifies. I will keep c low unless the piston needs it to be higher
Predicted intent:  Dependencies between design parameters 

Chat body:  thats if D = 45 where D is the piston bore diameter
Predicted intent:  Effects of design parameters on objectives 

Chat body:  Does the flywheel shaft diameter "ds" affect any variables in your control?
Predicted intent:  Dependencies between design parameters 

Chat body:  But if i increase the thickness i could make it a lot higher
Predicted intent:  Dependencies between design parameters 

Chat body:  Increasing the offset increases the mass and decreases the factor of safety.
Predicted intent:  Tradeoff between objectives 

Ch