In [1]:
import numpy as np

In [2]:
#Loading pre=trained word2vec model

from gensim.models.keyedvectors import KeyedVectors

# You need to dowload google pre-trained model using below link
# https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit
#Change the path according to your directory

model_path = 'D:\GoogleNews_vectors_negative300\GoogleNews_vectors_negative300.bin'   
w2v_model = KeyedVectors.load_word2vec_format(model_path, binary=True)



In [3]:
class DocSim(object):
    def __init__(self, w2v_model , stopwords=[]):
        self.w2v_model = w2v_model
        self.stopwords = stopwords
        
    def vectorize(self, doc):
        """Identify the vector values for each word in the given document"""
        doc = doc.lower()
        words = [w for w in doc.split(" ") if w not in self.stopwords]
        word_vecs = []
        for word in words:
            try:
                vec = self.w2v_model[word]
                word_vecs.append(vec)
            except KeyError:
                # Ignore, if the word doesn't exist in the vocabulary
                pass

        # Assuming that document vector is the mean of all the word vectors

        vector = np.mean(word_vecs, axis=0)
        return vector

In [37]:
#Printing EA rulebook (Target rules)

ds = DocSim(w2v_model)

rules = '2.2.1.1 The safety benefits of a system for maintaining space intervals between trains (see section 2.1 of this operational concept document) are compromised if a train proceeds without an authority for its movement.'
if isinstance(rules, str):
    rules = [rules]
    x = ds.vectorize(" ".join(rules))
    print(x)            #print 300 cross sectional array/matrix
    print(type(x[0]))   # printing array/matrix value type
    y= x*100000000      #To remove the decimal point
    main_vec_EA=y.astype(int)       #Change the type from float to int
    print(main_vec_EA.shape)      # printing array/matrix shape and size
    print(main_vec_EA)

[ 1.34538012e-02  4.32942696e-02  8.60951766e-02  6.51537552e-02
 -7.59773254e-02 -5.02827950e-02  2.29860935e-02 -6.14217110e-02
  4.08172607e-02  2.64666881e-02 -7.42340088e-02 -7.17213973e-02
  1.95617676e-02  2.93019619e-02 -1.26561478e-01  5.12838364e-02
  1.88662205e-02  3.19341011e-02  4.25720215e-03 -5.42093925e-02
  5.24139404e-03 -1.90378819e-02 -8.87400284e-02  5.80949783e-02
  6.79873601e-02 -5.35074854e-03 -1.29161835e-01  9.32474136e-02
 -1.11738844e-02 -2.92612705e-02 -5.84538793e-03 -9.17002335e-02
 -5.75787239e-02 -1.98728237e-02  1.79901123e-02 -3.93406563e-02
 -5.30283451e-02 -2.54567470e-02  3.47633362e-02  4.42504883e-04
 -2.16776524e-02  2.96401978e-02  6.39902726e-02  1.53198242e-02
 -5.49799614e-02 -1.14622116e-01 -3.14515419e-02  8.31247941e-02
 -2.36663818e-02  4.33193855e-02  1.38518019e-02  4.96088676e-02
 -1.50273638e-02 -4.09817696e-02  8.56145192e-03  2.13902798e-02
 -3.64888497e-02 -3.08367405e-02  4.92900200e-02 -3.36176567e-02
 -4.85261269e-02  1.23990

In [38]:
#printing arrays for testbench in vivadeo

for index, value in enumerate(main_vec_EA):
   print(f"S1_vec({index}) <= {value};")

S1_vec(0) <= 1345380;
S1_vec(1) <= 4329426;
S1_vec(2) <= 8609517;
S1_vec(3) <= 6515375;
S1_vec(4) <= -7597732;
S1_vec(5) <= -5028279;
S1_vec(6) <= 2298609;
S1_vec(7) <= -6142171;
S1_vec(8) <= 4081726;
S1_vec(9) <= 2646668;
S1_vec(10) <= -7423400;
S1_vec(11) <= -7172139;
S1_vec(12) <= 1956176;
S1_vec(13) <= 2930196;
S1_vec(14) <= -12656147;
S1_vec(15) <= 5128383;
S1_vec(16) <= 1886622;
S1_vec(17) <= 3193410;
S1_vec(18) <= 425720;
S1_vec(19) <= -5420939;
S1_vec(20) <= 524139;
S1_vec(21) <= -1903788;
S1_vec(22) <= -8874002;
S1_vec(23) <= 5809497;
S1_vec(24) <= 6798736;
S1_vec(25) <= -535074;
S1_vec(26) <= -12916183;
S1_vec(27) <= 9324741;
S1_vec(28) <= -1117388;
S1_vec(29) <= -2926127;
S1_vec(30) <= -584538;
S1_vec(31) <= -9170023;
S1_vec(32) <= -5757872;
S1_vec(33) <= -1987282;
S1_vec(34) <= 1799011;
S1_vec(35) <= -3934065;
S1_vec(36) <= -5302834;
S1_vec(37) <= -2545674;
S1_vec(38) <= 3476333;
S1_vec(39) <= 44250;
S1_vec(40) <= -2167765;
S1_vec(41) <= 2964019;
S1_vec(42) <= 6399027;
S1_v

In [39]:
#Printing OCD rulebook (Source rules)

ds = DocSim(w2v_model)

rules = '2.2.1.1 Safety is compromised if a train proceeds without a movement autority'
if isinstance(rules, str):
    rules = [rules]
    x = ds.vectorize(" ".join(rules))
    print(x)            #print 300 cross sectional array/matrix
    print(type(x[0]))   # printing array/matrix value type
    y= x*100000000      #To remove the decimal point
    main_vec_OCD=y.astype(int)       #Change the type from float to int
    print(main_vec_OCD.shape)      # printing array/matrix shape and size
    print(main_vec_OCD)

[ 0.0767746  -0.00597763  0.109375    0.07145309 -0.07830811  0.00296021
  0.01101685 -0.07305908  0.11384583 -0.00817013 -0.08544922 -0.10095215
  0.01159668  0.04484558 -0.13621521  0.08744812  0.05703354  0.04449463
  0.02444458 -0.06213379  0.08985138  0.04165649 -0.04651642  0.08811951
  0.01948333 -0.0274353  -0.10290909  0.03647137  0.01586914 -0.0453949
  0.02858353 -0.04608154 -0.09906006 -0.02633667  0.03625488 -0.04343605
 -0.03178096 -0.00166321  0.03640747 -0.05722046 -0.04071045  0.02409363
  0.12742615 -0.06819153 -0.06832886 -0.09522247 -0.04205322  0.04959106
  0.08539581  0.00810242  0.00212669  0.08679199  0.04418945 -0.10202026
  0.03501892  0.00855255 -0.03404236 -0.03414536  0.01779175 -0.02732086
 -0.04037476  0.02577496  0.02252197 -0.10272217  0.05351257 -0.02786255
 -0.06092834  0.12841797 -0.05685425  0.0662632  -0.01501083  0.04388428
  0.05714417 -0.0275116  -0.12927246 -0.14615631  0.09205627  0.01477051
  0.05392456  0.13237953  0.08784485 -0.0682621   0.

In [40]:
#printing arrays for testbench in vivadeo

for index, value in enumerate(main_vec_OCD):
   print(f"S2_vec({index}) <= {value};")

S2_vec(0) <= 7677459;
S2_vec(1) <= -597763;
S2_vec(2) <= 10937500;
S2_vec(3) <= 7145309;
S2_vec(4) <= -7830810;
S2_vec(5) <= 296020;
S2_vec(6) <= 1101684;
S2_vec(7) <= -7305908;
S2_vec(8) <= 11384582;
S2_vec(9) <= -817012;
S2_vec(10) <= -8544921;
S2_vec(11) <= -10095214;
S2_vec(12) <= 1159667;
S2_vec(13) <= 4484558;
S2_vec(14) <= -13621520;
S2_vec(15) <= 8744812;
S2_vec(16) <= 5703353;
S2_vec(17) <= 4449462;
S2_vec(18) <= 2444458;
S2_vec(19) <= -6213378;
S2_vec(20) <= 8985137;
S2_vec(21) <= 4165649;
S2_vec(22) <= -4651641;
S2_vec(23) <= 8811950;
S2_vec(24) <= 1948332;
S2_vec(25) <= -2743530;
S2_vec(26) <= -10290908;
S2_vec(27) <= 3647136;
S2_vec(28) <= 1586914;
S2_vec(29) <= -4539489;
S2_vec(30) <= 2858352;
S2_vec(31) <= -4608154;
S2_vec(32) <= -9906005;
S2_vec(33) <= -2633666;
S2_vec(34) <= 3625488;
S2_vec(35) <= -4343605;
S2_vec(36) <= -3178095;
S2_vec(37) <= -166320;
S2_vec(38) <= 3640747;
S2_vec(39) <= -5722045;
S2_vec(40) <= -4071044;
S2_vec(41) <= 2409362;
S2_vec(42) <= 12742614;