In [44]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Conv1D, MaxPooling1D
from keras.layers.pooling import GlobalMaxPooling1D
from keras.layers import Bidirectional
from keras import initializers, regularizers, optimizers, layers
from keras.models import load_model
from sklearn import metrics
from keras.utils.np_utils import to_categorical
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import keras
import numpy as np
import pandas as pd
import os
import tensorflow as tf

In [46]:
def loadData_Tokenizer(X_train, X_test,MAX_NB_WORDS=5000,MAX_SEQUENCE_LENGTH=200):
    np.random.seed(7)
    text = np.concatenate((X_train, X_test), axis=0)
    text = np.array(text)
    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    tokenizer.fit_on_texts(text)
    sequences = tokenizer.texts_to_sequences(text)
    word_index = tokenizer.word_index
    text = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH)
    print('Found %s unique tokens.' % len(word_index))
    indices = np.arange(text.shape[0])
    # np.random.shuffle(indices)
    text = text[indices]
    print(text.shape)
    X_train = text[0:len(X_train), ]
    X_test = text[len(X_train):, ]
    embeddings_index = {}
    with open(os.path.join('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Glove', 'glove.6B.100d.txt')) as f:
      for line in f:
          values = line.split()
          word = values[0]
          try:
              coefs = np.asarray(values[1:], dtype='float32')
          except:
             pass
          embeddings_index[word] = coefs
      f.close()
    print('Total %s word vectors.' % len(embeddings_index))
    return (X_train, X_test, word_index,embeddings_index)

In [47]:
def Build_Model_RCNN_Text(word_index, embeddings_index, nclasses, MAX_SEQUENCE_LENGTH=200, EMBEDDING_DIM=100):

    kernel_size = 5
    filters = 100
    pool_size = 4
    gru_node = 128

    embedding_matrix = np.random.random((len(word_index) + 1, EMBEDDING_DIM))
    for word, i in word_index.items():
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            # words not found in embedding index will be all-zeros.
            if len(embedding_matrix[i]) !=len(embedding_vector):
                print("could not broadcast input array from shape",str(len(embedding_matrix[i])),
                                 "into shape",str(len(embedding_vector))," Please make sure your"
                                 " EMBEDDING_DIM is equal to embedding_vector file ,GloVe,")
                exit(1)

            embedding_matrix[i] = embedding_vector



    model = Sequential()
    model.add(Embedding(len(word_index) + 1,
                                EMBEDDING_DIM,
                                weights=[embedding_matrix],
                                input_length=MAX_SEQUENCE_LENGTH,
                                trainable=False))
    model.add(Conv1D(filters=100, kernel_size=5,padding='same', activation='relu'))
    model.add(MaxPooling1D(pool_size=4))
    model.add(Bidirectional(LSTM(128, return_sequences=True,name='lstm_layer',dropout=0.3,recurrent_dropout=0.5)))
    #model.add(Bidirectional(LSTM(128, return_sequences=True,name='lstm_layer',dropout=0.4,recurrent_dropout=0.5)))
    model.add(GlobalMaxPooling1D())
    model.add(Dense(units=512, activation="relu",
    kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4),
    bias_regularizer=regularizers.l2(1e-4),
    activity_regularizer=regularizers.l2(1e-5)))
    model.add(Dropout(rate=0.4))
    model.add(Dense(7, activation="softmax"))
    opt = keras.optimizers.Adam(learning_rate=0.0003,beta_1=0.9,
    beta_2=0.999,
    epsilon=1e-07,
    amsgrad=False,
    name="Adam")
    model.compile(loss='categorical_crossentropy',
                  optimizer= opt,
                  metrics=['accuracy'])

    return model

In [48]:
def get_data_train():
  df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Data_Aug/trainset1.csv')
  df.head()
  X=df['body']
  Y = df['intent_2'].astype("category")
  cat_types= Y.cat.categories
  Y_nids = Y.cat.codes
  Y = to_categorical(Y_nids, num_classes=7)
  return X, Y

In [49]:
 df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Data_Aug/Validation_data.csv')
 df.head()
 X_1 = df['body']

In [50]:
def get_data_test():
  df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/Data_Aug/Validation_data.csv')
  df.head()
  X_t = df['body']
  Y = df['intent_2'].astype("category")
  cat_types= Y.cat.categories
  Y_nids = Y.cat.codes
  y_t = to_categorical(Y_nids, num_classes=7)
  return X_t, y_t


In [51]:
X_train,y_train= get_data_train()
X_val,y_val= get_data_test()
X_train,X_val, word_index, embeddings_index = loadData_Tokenizer(X_train,X_val)
nclasses=7

model_RCNN = Build_Model_RCNN_Text(word_index,embeddings_index, nclasses)


model_RCNN.summary()


Found 777 unique tokens.
(566, 200)
Total 400000 word vectors.
Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_3 (Embedding)      (None, 200, 100)          77800     
_________________________________________________________________
conv1d_3 (Conv1D)            (None, 200, 100)          50100     
_________________________________________________________________
max_pooling1d_3 (MaxPooling1 (None, 50, 100)           0         
_________________________________________________________________
bidirectional_3 (Bidirection (None, 50, 256)           234496    
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 512)               131584    
_________________________________________________________

In [52]:
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/wgt/weights.hdf5', monitor='val_loss', verbose=2, save_best_only=True, save_weights_only=False, mode='auto', period=1)

model_RCNN.fit(X_train, y_train,
                              validation_data=(X_val,y_val),
                              epochs=40,
                              batch_size=64,
                              verbose=2, callbacks=[checkpointer])

Epoch 1/40
9/9 - 10s - loss: 2.0540 - accuracy: 0.1467 - val_loss: 1.9990 - val_accuracy: 0.1667

Epoch 00001: val_loss improved from inf to 1.99896, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/wgt/weights.hdf5
Epoch 2/40
9/9 - 4s - loss: 2.0051 - accuracy: 0.2336 - val_loss: 1.9868 - val_accuracy: 0.3333

Epoch 00002: val_loss improved from 1.99896 to 1.98685, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/wgt/weights.hdf5
Epoch 3/40
9/9 - 4s - loss: 1.9725 - accuracy: 0.2413 - val_loss: 1.9685 - val_accuracy: 0.2083

Epoch 00003: val_loss improved from 1.98685 to 1.96847, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/wgt/weights.hdf5
Epoch 4/40
9/9 - 4s - loss: 1.9323 - accuracy: 0.2529 - val_loss: 1.9225 - val_accuracy: 0.2292

Epoch 00004: val_loss improved from 1.96847 to 1.92249, saving model to /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/wgt/weig

<tensorflow.python.keras.callbacks.History at 0x7f0d521b3250>

In [53]:
#score, acc = model_RCNN.evaluate(X_test,y_test
#                            ,batch_size=128)
#print('Test accuracy with charcrnn:', acc)

In [54]:
model_RCNN.save('/content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/saved_model/my_models1')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/Colab Notebooks/Data/Engine_design/CCNNLSTM/saved_model/my_models1/assets


In [55]:
predicted = model_RCNN.predict(X_val)

In [56]:
predicted = np.argmax(predicted, axis=1)

In [57]:
 target_names = ['Dependencies between design parameters',
       'Effects of design parameters on objectives',
       'Exploration of design parameter values', 'Monitoring objective values',
       'NAN', 'Selected design parameter values for objective(s)',
       'Tradeoff between objectives']

In [58]:
print(metrics.classification_report(np.argmax(y_val, axis=1), predicted , target_names=target_names))

                                                   precision    recall  f1-score   support

           Dependencies between design parameters       1.00      0.60      0.75         5
       Effects of design parameters on objectives       0.50      0.60      0.55        10
           Exploration of design parameter values       0.83      0.62      0.71         8
                      Monitoring objective values       0.42      0.71      0.53         7
                                              NAN       0.89      0.80      0.84        10
Selected design parameter values for objective(s)       1.00      0.25      0.40         4
                      Tradeoff between objectives       0.60      0.75      0.67         4

                                         accuracy                           0.65        48
                                        macro avg       0.75      0.62      0.63        48
                                     weighted avg       0.73      0.65      0.65        

In [59]:
ans= np.array(['Dependencies between design parameters',
       'Effects of design parameters on objectives',
       'Exploration of design parameter values', 'Monitoring objective values',
       'NAN', 'Selected design parameter values for objective(s)',
       'Tradeoff between objectives'])[predicted ]

In [60]:
for x, y in zip(X_1, ans):
  print("Chat body: ",x)
  print("Predicted intent: ", y, "\n")

Chat body:  My design depends on the piston diameter, what happens if you decrease it? 
Predicted intent:  Dependencies between design parameters 

Chat body:  Does increasing the flywheel thickness increase or decrease "c"?
Predicted intent:  Effects of design parameters on objectives 

Chat body:  That is what my document specifies. I will keep c low unless the piston needs it to be higher
Predicted intent:  Effects of design parameters on objectives 

Chat body:  thats if D = 45 where D is the piston bore diameter
Predicted intent:  Dependencies between design parameters 

Chat body:  Does the flywheel shaft diameter "ds" affect any variables in your control?
Predicted intent:  Dependencies between design parameters 

Chat body:  But if i increase the thickness i could make it a lot higher
Predicted intent:  Effects of design parameters on objectives 

Chat body:  Increasing the offset increases the mass and decreases the factor of safety.
Predicted intent:  Tradeoff between objecti