# EmbeddingLayer.py

In [2]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Mar 20 23:41:51 2017

@author: red-sky
"""


import numpy as np
import theano
from theano import tensor as T


class EmbeddingLayer(object):
    def __init__(self, num_vocab, word_dim, rng, embedding_w=None):
        '''
        word_dim :: dimension of the word embeddings
        num_vocab :: number of word embeddings in the vocabulary
        embedding_w :: pre-train word vector
        '''

        if embedding_w is None:
            word_vectors = rng.uniform(-1.0, 1.0, (num_vocab, word_dim))
            self.embedding_w = theano.shared(word_vectors,
                                             name="EmbeddingLayer_W") \
                .astype(theano.config.floatX)
        else:
            self.embedding_w = theano.shared(embedding_w,
                                             name="EmbeddingLayer_W") \
                .astype(theano.config.floatX)

        self.params = [self.embedding_w]
        self.infor = [num_vocab, word_dim]

    def words_ind_2vec(self, index):
        map_word_vectors = self.embedding_w[index]
        output = T.mean(map_word_vectors,  axis=0)
        return output, map_word_vectors


if __name__ == "__main__":
    rng = np.random.RandomState(220495)
    arrWords = T.ivector("words")
    EMBD = EmbeddingLayer(100, 150, rng=rng)
    Word2Vec = theano.function(
        inputs=[arrWords],
        outputs=EMBD.words_ind_2vec(arrWords)
    )
    Vec = Word2Vec([1, 2, 3, 4])
    Vec = Word2Vec([2, 3, 4])
    print("Dim: ", Vec.shape)
    print("Val: ", Vec)

AttributeError: 'list' object has no attribute 'shape'

# EventEmbedding.py

In [3]:
import sys
import json
import theano
import pickle
import os.path
import numpy as np
import theano.tensor as T
from SmallUtils import createShareVar, ADAM_OPTIMIZER
from EmbeddingLayer import EmbeddingLayer
from RoleDependentLayer import RoleDependentLayer

更改過num_K: 150 -> 80

word_dim?? -> 100

更改all_index

修改ob1_real fake, act, obj2的datatype

In [6]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 17:36:33 2017

@author: red-sky
"""
import sys
import json
import theano
import pickle
import os.path
import numpy as np
import theano.tensor as T
from SmallUtils import createShareVar, ADAM_OPTIMIZER
from EmbeddingLayer import EmbeddingLayer
from RoleDependentLayer import RoleDependentLayer


class Input(object):
    def __init__(self, object1, object1_fake, action, object2, rng,
                 vovab_length=4000, wordDim=100, trainedWordsVectors=None,):
        # Init Embeding layer, input vector of index and ouput average
        # of word vector as ref Ding et al 2014
        self.EMBD = EmbeddingLayer(vovab_length, wordDim, rng=rng,
                                   embedding_w=trainedWordsVectors)

        object1_vector, _ = self.EMBD.words_ind_2vec(object1)
        action_vector, _ = self.EMBD.words_ind_2vec(action)
        object2_vector, _ = self.EMBD.words_ind_2vec(object2)
        object1_vector_fake, _ = self.EMBD.words_ind_2vec(object1_fake)

        self.output = [object1_vector, object1_vector_fake,
                       action_vector, object2_vector]
        self.params = self.EMBD.params

    def get_params(self):
        trainParams = {
            "WordWvec": self.EMBD.embedding_w.get_value()
        }
        return(trainParams)


class ModelBody(object):
    def __init__(self, vectorObjects, rng, n_out, n_in,
                 trainedModelParams=None):
        if trainedModelParams is None:
            trainedModelParams = {
                "roleDependentLayer1_": {
                    "T": None, "W1": None, "W2": None, "b": None
                },
                "roleDependentLayer2_": {
                    "T": None, "W1": None, "W2": None, "b": None
                },
                "roleDependentLayer3_": {
                    "T": None, "W1": None, "W2": None, "b": None
                }
            }

        Obj1, Ob1_fake, Act, Obj2 = vectorObjects
        
        ##
#         print(trainedModelParams["roleDependentLayer1_"]["T"].shape)
#         print(n_in, n_out)
        self.RoleDepen1 = RoleDependentLayer(
            left_dependent=T.stack([Obj1, Ob1_fake], axis=0),
            right_dependent=Act,
            n_in=n_in, n_out=n_out, rng=rng,
            trainedParams=trainedModelParams,
            name="roleDependentLayer1_"
        )
        self.RoleDepen1_output = self.RoleDepen1.output

        self.RoleDepen2 = RoleDependentLayer(
            left_dependent=Obj2,
            right_dependent=Act,
            n_in=n_in, n_out=n_out, rng=rng,
            trainedParams=trainedModelParams,
            name="roleDependentLayer2_"
        )
        self.RoleDepen2_output = T.flatten(self.RoleDepen2.output, outdim=1)

        self.RoleDepen3 = RoleDependentLayer(
            left_dependent=self.RoleDepen1_output,
            right_dependent=self.RoleDepen2_output,
            n_in=n_out, n_out=n_out, rng=rng,
            trainedParams=trainedModelParams,
            name="roleDependentLayer3_"
        )

        self.params = self.RoleDepen1.params + self.RoleDepen2.params + \
            self.RoleDepen3.params

        self.L2 = (
            self.RoleDepen1.L2 +
            self.RoleDepen2.L2 +
            self.RoleDepen3.L2
        )
        self.output = self.RoleDepen3.output

    def get_params(self):
        trainedModelParams = {
            "roleDependentLayer1_": self.RoleDepen1.get_params(),
            "roleDependentLayer2_": self.RoleDepen2.get_params(),
            "roleDependentLayer3_": self.RoleDepen3.get_params()
        }
        return(trainedModelParams)


class LogisticRegression(object):

    def __init__(self, rng, layerInput, n_in, n_out,
                 paramsLayer=None,
                 name="LogisticRegression_"):

        self.layerInput = layerInput
        if paramsLayer is None:
            self.W = createShareVar(rng=rng, name=name+"W",
                                    factor_for_init=n_out + n_in,
                                    dim=(n_in, n_out))
        else:
            self.W = theano.shared(value=paramsLayer["W"],
                                   name=name+"W", borrow=True)

        if paramsLayer is None:
            b_values = np.zeros((n_out,), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values,
                                   name=name+"b", borrow=True)
        else:
            self.b = theano.shared(value=paramsLayer["b"],
                                   name=name+"b", borrow=True)

        step1 = T.dot(self.layerInput, self.W)
        self.prob_givenX = T.tanh(step1 + self.b)
        self.y_predict = T.argmax(self.prob_givenX, axis=1)

        self.params = [self.W, self.b]
        self.L2 = sum([(param**2).sum() for param in self.params])

    def get_params(self):
        trainedParams = {
            "W": self.W.get_value(), "b": self.b.get_value()
        }
        return(trainedParams)

    def neg_log_likelihood(self, y_true):
        y_true = T.cast(y_true, "int32")
        log_prob = T.log(self.prob_givenX)
        nll = -T.mean(log_prob[T.arange(y_true.shape[0]), y_true])
        return nll

    def margin_loss(self):
        loss = T.max([0, 1 - self.prob_givenX[0, 0] + self.prob_givenX[1, 0]])
        return loss

    def cal_errors(self, y_true):
        if y_true.ndim != self.y_predict.ndim:
            raise TypeError(
                "y should have the same shape as self.y_pred",
                ("y_true", y_true.ndim, "y_pred", self.y_predict.ndim)
            )
        if y_true.dtype.startswith("int"):
            return T.mean(T.neq(self.y_predict, y_true))
        else:
            raise TypeError(
                "y_true should have type int ...",
                ("y_true", y_true.type, "y_pred", self.y_predict.type)
            )


# def main(dataPath, trainedParamsPath="modelTrained.pickle",
#          outputVectorPath="resultEmbeding.pickle",
#          learning_rate=0.005, L2_reg=0.0001,
#          n_epochs=500, num_K=150, word_dim=150):
def main(dataPath, trainedParamsPath="modelTrained.pickle",
         outputVectorPath="resultEmbeding.pickle",
         learning_rate=0.005, L2_reg=0.0001,
         n_epochs=500, num_K=80, word_dim=150):
    # CONSTANT VARIABLES
    RNG = np.random.RandomState(220495 + 280295 + 1)
    LABEL_NUM = 2
    if os.path.isfile(trainedParamsPath):
        with open(trainedParamsPath, 'rb') as handle:
            trainedParams = pickle.load(handle)
    else:
        print("No Trained Model, create new")
        I2V = np.load('./input/index2vector2.npy')
        trainedParams = {
            "Input": {"WordWvec": I2V}, "Body": None, "Output": None
        }
#     print(trainedParams['Input']['WordWvec'])
    OPTIMIZER = ADAM_OPTIMIZER
    # INPUT DATA
    data_indexed_events = np.load(dataPath, allow_pickle=True)
    
    N_sample = len(data_indexed_events)
#    N_sample = 1
    all_index = []
    for i in data_indexed_events:
        for j in i:
            for k in j:
                all_index.append(k)
    all_index = list(set(all_index))
#     all_index = list(set(np.hstack(data_indexed_events.flat)))
#    all_train_index = list(set(np.hstack(data_indexed_events[0:NNN].flat)))
    # Snip tensor at begin
    object1 = T.ivector("object1")
    object1_fake = T.ivector("object1_fake")
    action = T.ivector("action")
    object2 = T.ivector("object2")

    constainY = theano.shared(
        np.asarray([1, 0], dtype=theano.config.floatX),
        borrow=True
    )

    # WORDS EMBEDING VECTOR
    wordsEmbedLayer = Input(
        object1=object1, object1_fake=object1_fake,
        action=action, object2=object2, rng=RNG,
        wordDim=word_dim, vovab_length=len(all_index),
        trainedWordsVectors=trainedParams["Input"]["WordWvec"]
    )
    
#     print(trainedParams['Input']['WordWvec'])
    Obj1, Ob1_fake, Act, Obj2 = wordsEmbedLayer.output

    # EVENTS EMBEDING LAYER - THREE ROLE DEPENTDENT LAYER
    eventsEmbedingLayer = ModelBody(
        vectorObjects=wordsEmbedLayer.output,
        n_out=num_K, n_in=word_dim, rng=RNG,
        trainedModelParams=trainedParams["Body"]
    )
    
#     print(trainedParams['Input']['WordWvec'])
    # CLASSIFY LAYER
    predict_layers = LogisticRegression(
        layerInput=eventsEmbedingLayer.output,
        rng=RNG, n_in=num_K, n_out=1,
        paramsLayer=trainedParams["Output"]
    )

    # COST FUNCTION
    COST = (
        predict_layers.margin_loss() +
        L2_reg * predict_layers.L2 +
        L2_reg * eventsEmbedingLayer.L2
    )

    # GRADIENT CALCULATION and UPDATE
    all_params = wordsEmbedLayer.params + \
        eventsEmbedingLayer.params + predict_layers.params
    print("TRAIN: ", all_params)

    UPDATE = OPTIMIZER(COST, all_params, learning_rate=learning_rate)

    # TRAIN MODEL
    GET_COST = theano.function(
        inputs=[object1, object1_fake, action, object2],
        outputs=[predict_layers.margin_loss(),
                 predict_layers.prob_givenX],
    )

#    TEST = theano.function(
#        inputs=[object1, object1_fake, action, object2],
#        outputs=eventsEmbedingLayer.RoleDepen2.test,
#        on_unused_input='warn'
#    )

    TRAIN = theano.function(
        inputs=[object1, object1_fake, action, object2],
        outputs=[predict_layers.margin_loss()],
        updates=UPDATE
    )

    GET_EVENT_VECTOR = theano.function(
        inputs=[object1, object1_fake, action, object2],
        outputs=[predict_layers.margin_loss(),
                 eventsEmbedingLayer.output],
    )

    def generate_fake_object(all_index, RNG, obj):
        fake_obj = list(RNG.choice(all_index, len(obj)))
        while sorted(fake_obj) == sorted(obj):
            print("WRONG faking object 1", obj)
            fake_obj = list(RNG.choice(all_index, len(obj)))
        return(fake_obj)

    def generate_list_object(data_indexed_events, all_index, RNG):
        list_fake_object1 = [
            generate_fake_object(all_index, RNG, events[0])
            for events in data_indexed_events
        ]
        list_real_object = set([
            "_".join([str(a) for a in sorted(events[0])])
            for events in data_indexed_events
        ])
        wrong = 0
        while True:
            valid = True
            wrong += 1
            for i, obj in enumerate(list_fake_object1):
                s = "_".join([str(a) for a in sorted(obj)])
                if s in list_real_object:
                    valid = valid and False
                    list_fake_object1[i] = \
                        generate_fake_object(all_index, RNG, s)
                else:
                    valid = valid and True
            if valid:
                break
        print("There are %d wrong random loops" % wrong)
        return(list_fake_object1)

    print("*"*72)
    print("Begin Training process")

    for epoch in range(n_epochs):
        # create false label
        print("Begin new epoch: %d" % epoch)

        list_fake_object1 = generate_list_object(data_indexed_events,
                                                 all_index, RNG)
        cost_of_epoch = []
        set_index = set(range(N_sample))
        temp_variable = N_sample
        print("*" * 72+"\n")
        print("*" * 72+"\n")
        # train
        model_train = {
            "Input": wordsEmbedLayer.get_params(),
            "Body": eventsEmbedingLayer.get_params(),
            "Output": predict_layers.get_params()
        }
        RESULT = {}
        outCOST = []
        Max_inter = len(set_index)*2
        iter_num = 0
        while len(set_index) > 0 and iter_num <= Max_inter:
            iter_num += 1
            index = set_index.pop()
            ob1_real, act, obj2 = data_indexed_events[index]
            ob1_fake = list_fake_object1[index]
            
            ob1_real = np.array(ob1_real, dtype=np.int32)
            ob1_fake = np.array(ob1_fake, dtype=np.int32)
            act = np.array(act, dtype=np.int32)
            obj2 = np.array(obj2, dtype=np.int32)

            cost, probY = GET_COST(ob1_real, ob1_fake, act, obj2)
            outCOST.append(cost)
#            test = TEST(ob1_real, ob1_fake, act, obj2)
#            for a in test:
#                print(a, a.shape)

            if cost > 0:
                set_index.add(index)
                c = TRAIN(ob1_real, ob1_fake, act, obj2)
            else:
                RESULT[index] = GET_EVENT_VECTOR(ob1_real, ob1_fake, act, obj2)

            if (len(set_index) % 50 == 0 and
                    temp_variable != len(set_index)):
                temp_variable = len(set_index)
                print("There are %f %% left in this %d "
                      "epoch with average cost %f"
                      % (len(set_index)/float(N_sample)*100,
                         epoch, np.mean(outCOST[-50:])))
#             if iter_num > Max_inter - 5:
#                 print(set_index, ob1_real, ob1_fake, act, obj2)

        with open(trainedParamsPath, 'wb') as handle:
            pickle.dump(model_train, handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

        with open(outputVectorPath, 'wb') as handle:
            pickle.dump(RESULT, handle,
                        protocol=pickle.HIGHEST_PROTOCOL)

if __name__ == "__main__":
#     arg = ["", "Data/Query_Apple/2005-2010/IndexedEvents.npy",
#           "Data/Query_Apple/2005-2010/linhtinh/", "20"]
#     arg = sys.argv
    arg = ["", "/Users/arthur/Desktop/OpenIEStanza_EventEmbedding/input/event2index2.npy",
          "/Users/arthur/Desktop/OpenIEStanza_EventEmbedding/input/", "1"]
    main(dataPath=arg[1], trainedParamsPath=arg[2]+"TrainedParams2.pickle",
         outputVectorPath=arg[2]+"resultEmbeding2.pickle", n_epochs=int(arg[3]), word_dim=100)

No Trained Model, create new




TRAIN:  [EmbeddingLayer_W, roleDependentLayer1_T, roleDependentLayer1_W1, roleDependentLayer1_W2, roleDependentLayer1_b, roleDependentLayer2_T, roleDependentLayer2_W1, roleDependentLayer2_W2, roleDependentLayer2_b, roleDependentLayer3_T, roleDependentLayer3_W1, roleDependentLayer3_W2, roleDependentLayer3_b, LogisticRegression_W, LogisticRegression_b]
************************************************************************
Begin Training process
Begin new epoch: 0
WRONG faking object 1 [10043]
There are 3 wrong random loops
************************************************************************

************************************************************************

There are 99.988900 % left in this 0 epoch with average cost 0.994497
There are 99.967553 % left in this 0 epoch with average cost 0.940352
There are 99.946206 % left in this 0 epoch with average cost 0.958778
There are 99.924859 % left in this 0 epoch with average cost 0.781189
There are 99.903512 % left in this 0 epoch 

There are 97.683434 % left in this 0 epoch with average cost 0.840590
There are 97.662087 % left in this 0 epoch with average cost 0.771658
There are 97.640740 % left in this 0 epoch with average cost 0.869062
There are 97.619393 % left in this 0 epoch with average cost 0.733250
There are 97.598046 % left in this 0 epoch with average cost 0.693047
There are 97.576699 % left in this 0 epoch with average cost 0.695091
There are 97.555353 % left in this 0 epoch with average cost 0.757587
There are 97.534006 % left in this 0 epoch with average cost 0.768900
There are 97.512659 % left in this 0 epoch with average cost 0.670560
There are 97.491312 % left in this 0 epoch with average cost 0.754829
There are 97.469965 % left in this 0 epoch with average cost 0.610387
There are 97.448618 % left in this 0 epoch with average cost 0.788405
There are 97.427271 % left in this 0 epoch with average cost 0.669968
There are 97.405924 % left in this 0 epoch with average cost 0.669349
There are 97.384577 

There are 95.164499 % left in this 0 epoch with average cost 0.771365
There are 95.143152 % left in this 0 epoch with average cost 0.733846
There are 95.121805 % left in this 0 epoch with average cost 0.749346
There are 95.100459 % left in this 0 epoch with average cost 0.754251
There are 95.079112 % left in this 0 epoch with average cost 0.818450
There are 95.057765 % left in this 0 epoch with average cost 0.787683
There are 95.036418 % left in this 0 epoch with average cost 0.866281
There are 95.015071 % left in this 0 epoch with average cost 0.835504
There are 94.993724 % left in this 0 epoch with average cost 0.620011
There are 94.972377 % left in this 0 epoch with average cost 0.872610
There are 94.951030 % left in this 0 epoch with average cost 0.868929
There are 94.929683 % left in this 0 epoch with average cost 0.738596
There are 94.908336 % left in this 0 epoch with average cost 0.785501
There are 94.886989 % left in this 0 epoch with average cost 0.631302
There are 94.865643 

There are 92.645565 % left in this 0 epoch with average cost 0.734807
There are 92.624218 % left in this 0 epoch with average cost 0.575135
There are 92.602871 % left in this 0 epoch with average cost 0.685799
There are 92.581524 % left in this 0 epoch with average cost 0.668483
There are 92.560177 % left in this 0 epoch with average cost 0.942297
There are 92.538830 % left in this 0 epoch with average cost 0.801527
There are 92.517483 % left in this 0 epoch with average cost 0.594946
There are 92.496136 % left in this 0 epoch with average cost 0.636710
There are 92.474789 % left in this 0 epoch with average cost 0.669309
There are 92.453442 % left in this 0 epoch with average cost 0.708568
There are 92.432095 % left in this 0 epoch with average cost 0.579037
There are 92.410749 % left in this 0 epoch with average cost 0.819234
There are 92.389402 % left in this 0 epoch with average cost 0.572697
There are 92.368055 % left in this 0 epoch with average cost 0.895907
There are 92.346708 

There are 90.126630 % left in this 0 epoch with average cost 0.636070
There are 90.105283 % left in this 0 epoch with average cost 0.603029
There are 90.083936 % left in this 0 epoch with average cost 0.846683
There are 90.062589 % left in this 0 epoch with average cost 0.747341
There are 90.041242 % left in this 0 epoch with average cost 0.811997
There are 90.019895 % left in this 0 epoch with average cost 0.755912
There are 89.998548 % left in this 0 epoch with average cost 0.679138
There are 89.977202 % left in this 0 epoch with average cost 0.799716
There are 89.955855 % left in this 0 epoch with average cost 0.715542
There are 89.934508 % left in this 0 epoch with average cost 0.794693
There are 89.913161 % left in this 0 epoch with average cost 0.782743
There are 89.891814 % left in this 0 epoch with average cost 0.818303
There are 89.870467 % left in this 0 epoch with average cost 0.831712
There are 89.849120 % left in this 0 epoch with average cost 0.651800
There are 89.827773 

There are 87.607695 % left in this 0 epoch with average cost 0.681166
There are 87.586348 % left in this 0 epoch with average cost 0.747666
There are 87.565001 % left in this 0 epoch with average cost 0.732531
There are 87.543654 % left in this 0 epoch with average cost 0.897894
There are 87.522308 % left in this 0 epoch with average cost 0.771406
There are 87.500961 % left in this 0 epoch with average cost 0.898862
There are 87.479614 % left in this 0 epoch with average cost 0.636607
There are 87.458267 % left in this 0 epoch with average cost 0.563004
There are 87.436920 % left in this 0 epoch with average cost 0.559960
There are 87.415573 % left in this 0 epoch with average cost 0.758301
There are 87.394226 % left in this 0 epoch with average cost 0.825131
There are 87.372879 % left in this 0 epoch with average cost 0.700197
There are 87.351532 % left in this 0 epoch with average cost 0.746372
There are 87.330185 % left in this 0 epoch with average cost 0.666109
There are 87.308838 

There are 85.088760 % left in this 0 epoch with average cost 0.831826
There are 85.067414 % left in this 0 epoch with average cost 0.733215
There are 85.046067 % left in this 0 epoch with average cost 0.713457
There are 85.024720 % left in this 0 epoch with average cost 0.606279
There are 85.003373 % left in this 0 epoch with average cost 0.795378
There are 84.982026 % left in this 0 epoch with average cost 0.874221
There are 84.960679 % left in this 0 epoch with average cost 1.039737
There are 84.939332 % left in this 0 epoch with average cost 0.845442
There are 84.917985 % left in this 0 epoch with average cost 0.941468
There are 84.896638 % left in this 0 epoch with average cost 0.857356
There are 84.875291 % left in this 0 epoch with average cost 0.778380
There are 84.853944 % left in this 0 epoch with average cost 0.705068
There are 84.832598 % left in this 0 epoch with average cost 0.844496
There are 84.811251 % left in this 0 epoch with average cost 0.770024
There are 84.789904 

There are 82.569826 % left in this 0 epoch with average cost 0.814242
There are 82.548479 % left in this 0 epoch with average cost 0.874643
There are 82.527132 % left in this 0 epoch with average cost 0.740445
There are 82.505785 % left in this 0 epoch with average cost 0.687048
There are 82.484438 % left in this 0 epoch with average cost 0.923671
There are 82.463091 % left in this 0 epoch with average cost 0.704847
There are 82.441744 % left in this 0 epoch with average cost 0.897213
There are 82.420397 % left in this 0 epoch with average cost 0.760362
There are 82.399050 % left in this 0 epoch with average cost 0.862688
There are 82.377704 % left in this 0 epoch with average cost 0.773591
There are 82.356357 % left in this 0 epoch with average cost 0.730383
There are 82.335010 % left in this 0 epoch with average cost 1.037757
There are 82.313663 % left in this 0 epoch with average cost 0.844981
There are 82.292316 % left in this 0 epoch with average cost 0.777833
There are 82.270969 

There are 80.050891 % left in this 0 epoch with average cost 0.741580
There are 80.029544 % left in this 0 epoch with average cost 0.911338
There are 80.008197 % left in this 0 epoch with average cost 0.829777
There are 79.986850 % left in this 0 epoch with average cost 0.654949
There are 79.965503 % left in this 0 epoch with average cost 0.626267
There are 79.944156 % left in this 0 epoch with average cost 0.902206
There are 79.922810 % left in this 0 epoch with average cost 0.760560
There are 79.901463 % left in this 0 epoch with average cost 0.797917
There are 79.880116 % left in this 0 epoch with average cost 0.750542
There are 79.858769 % left in this 0 epoch with average cost 0.896113
There are 79.837422 % left in this 0 epoch with average cost 0.759152
There are 79.816075 % left in this 0 epoch with average cost 0.694114
There are 79.794728 % left in this 0 epoch with average cost 0.870960
There are 79.773381 % left in this 0 epoch with average cost 0.725098
There are 79.752034 

There are 77.531956 % left in this 0 epoch with average cost 0.800258
There are 77.510609 % left in this 0 epoch with average cost 0.721286
There are 77.489263 % left in this 0 epoch with average cost 0.743629
There are 77.467916 % left in this 0 epoch with average cost 0.700834
There are 77.446569 % left in this 0 epoch with average cost 0.802197
There are 77.425222 % left in this 0 epoch with average cost 0.740567
There are 77.403875 % left in this 0 epoch with average cost 0.851850
There are 77.382528 % left in this 0 epoch with average cost 0.815870
There are 77.361181 % left in this 0 epoch with average cost 0.631679
There are 77.339834 % left in this 0 epoch with average cost 0.764005
There are 77.318487 % left in this 0 epoch with average cost 0.851835
There are 77.297140 % left in this 0 epoch with average cost 0.773982
There are 77.275793 % left in this 0 epoch with average cost 0.558883
There are 77.254447 % left in this 0 epoch with average cost 0.739836
There are 77.233100 

There are 75.013022 % left in this 0 epoch with average cost 0.811210
There are 74.991675 % left in this 0 epoch with average cost 0.731817
There are 74.970328 % left in this 0 epoch with average cost 0.664340
There are 74.948981 % left in this 0 epoch with average cost 0.674898
There are 74.927634 % left in this 0 epoch with average cost 0.711191
There are 74.906287 % left in this 0 epoch with average cost 0.545648
There are 74.884940 % left in this 0 epoch with average cost 0.841985
There are 74.863593 % left in this 0 epoch with average cost 0.825509
There are 74.842246 % left in this 0 epoch with average cost 0.817456
There are 74.820899 % left in this 0 epoch with average cost 0.629096
There are 74.799553 % left in this 0 epoch with average cost 0.703804
There are 74.778206 % left in this 0 epoch with average cost 0.940185
There are 74.756859 % left in this 0 epoch with average cost 0.529802
There are 74.735512 % left in this 0 epoch with average cost 0.613720
There are 74.714165 

There are 72.494087 % left in this 0 epoch with average cost 0.710564
There are 72.472740 % left in this 0 epoch with average cost 0.631752
There are 72.451393 % left in this 0 epoch with average cost 0.755137
There are 72.430046 % left in this 0 epoch with average cost 0.612871
There are 72.408699 % left in this 0 epoch with average cost 0.653136
There are 72.387352 % left in this 0 epoch with average cost 0.654775
There are 72.366005 % left in this 0 epoch with average cost 0.783609
There are 72.344659 % left in this 0 epoch with average cost 0.691031
There are 72.323312 % left in this 0 epoch with average cost 0.629411
There are 72.301965 % left in this 0 epoch with average cost 0.819410
There are 72.280618 % left in this 0 epoch with average cost 0.868826
There are 72.259271 % left in this 0 epoch with average cost 0.733392
There are 72.237924 % left in this 0 epoch with average cost 0.717786
There are 72.216577 % left in this 0 epoch with average cost 0.676713
There are 72.195230 

KeyboardInterrupt: 

# RoleDependentLayer.py

In [4]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 16:13:18 2017

@author: red-sky
"""

import theano
import numpy as np
import theano.tensor as T
from SmallUtils import createShareVar


class RoleDependentLayer(object):
    def __init__(self, left_dependent, right_dependent, rng,
                 n_in=100, n_out=4, trainedParams=None,
                 name="RoleDependentEmbedding_"):
        if trainedParams is None:
            trainedParams = {
                name: {
                    "T": None, "W1": None, "W2": None, "b": None
                }
            }
#        print(name)
        if trainedParams[name]["T"] is not None:
#            print(trainedParams[name]["T"])
#            print(trainedParams[name]["T"].shape)
#            print(n_out, n_in, n_in)
            assert trainedParams[name]["T"].shape == (n_out, n_in, n_in)
            self.T = theano.shared(value=trainedParams[name]["T"],
                                   name=name+"T", borrow=True)
        else:
            self.T = createShareVar(rng=rng, name=name+"T",
                                    factor_for_init=n_out + n_in,
                                    dim=(n_out, n_in, n_in))

        if trainedParams[name]["W1"] is not None:
            assert trainedParams[name]["W1"].shape == (n_in, n_out)
            self.W1 = theano.shared(value=trainedParams[name]["W1"],
                                    name=name+"W1", borrow=True)
        else:
            self.W1 = createShareVar(rng=rng, name=name+"W1",
                                     factor_for_init=n_out + n_in,
                                     dim=(n_in, n_out))

        if trainedParams[name]["W2"] is not None:
            assert trainedParams[name]["W2"].shape == (n_in, n_out)
            self.W2 = theano.shared(value=trainedParams[name]["W2"],
                                    name=name+"W2", borrow=True)
        else:
            self.W2 = createShareVar(rng=rng, name=name+"W2",
                                     factor_for_init=n_out + n_in,
                                     dim=(n_in, n_out))

        if trainedParams[name]["b"] is not None:
            assert trainedParams[name]["b"].shape == (n_out,)
            self.b = theano.shared(value=trainedParams[name]["b"],
                                   name=name+"b", borrow=True)
        else:
            b_values = np.zeros(shape=(n_out,), dtype=theano.config.floatX)
            self.b = theano.shared(value=b_values, name=name+"b", borrow=True)

        # list of layer params
        self.params = [self.T, self.W1, self.W2, self.b]

        # L2 regulation
        self.L2 = sum([(param**2).sum() for param in self.params])

        # Bi-linear step
        def one_kernel(Tk, left, right):
            first_bi_libear = theano.dot(left, Tk)
            seccon_bi_linear = theano.dot(first_bi_libear, right)
            return(seccon_bi_linear.flatten())

        bi_1, _ = theano.scan(
            fn=one_kernel,
            sequences=[self.T],
            non_sequences=[left_dependent, right_dependent],
            n_steps=n_out
        )

        # Feed forward network step
        feedforward_step1 = theano.dot(left_dependent, self.W1)
        feedforward_step2 = theano.dot(right_dependent, self.W2)
        feedforward_step3 = (feedforward_step1 +
                             feedforward_step2.dimshuffle("x", 0) +
                             self.b.dimshuffle("x", 0))
        feedforward_step4 = bi_1.dimshuffle(1, 0) + feedforward_step3
        self.output = theano.tensor.tanh(feedforward_step4)
        self.test = [feedforward_step3]

    def output_(self, left_dependent, right_dependent):

        def one_kernel(Tk, left, right):
            first_bi_libear = theano.dot(left, Tk)
            seccon_bi_linear = theano.dot(first_bi_libear, right)
            return(seccon_bi_linear.flatten())

        bi_linear_tensor, _ = theano.scan(
            fn=one_kernel,
            sequences=[self.T],
            non_sequences=[left_dependent, right_dependent],
            n_steps=n_out
        )

        bi_linear_tensor = bi_linear_tensor.dimshuffle(1, 0)
        feedforward_step1 = theano.dot(left_dependent, self.W1)
        feedforward_step2 = theano.dot(right_dependent, self.W2)
        feedforward_step3 = (feedforward_step1 +
                             feedforward_step2.dimshuffle("x", 0) +
                             self.b.dimshuffle("x", 0))
        feedforward_step4 = bi_linear_tensor + feedforward_step3
        output = theano.tensor.tanh(feedforward_step4)
        return(output)

    def get_params(self):
        trainedParams = {
            "T": self.T.get_value(), "W1": self.W1.get_value(),
            "W2": self.W2.get_value(), "b": self.b.get_value()
        }
        return(trainedParams)

# SmallUtils.py

In [5]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 25 15:55:14 2017

@author: red-sky
"""
import theano
import theano.tensor as T
import numpy as np

def createShareVar(rng, dim, name, factor_for_init):
    var_values = np.asarray(
        rng.uniform(
            low=-np.sqrt(6.0 / factor_for_init),
            high=np.sqrt(6.0 / factor_for_init),
            size=dim,
        )
    )
    Var = theano.shared(value=var_values, name=name, borrow=True)
    return Var


def adadelta(lr, tparams, cost, grads, listInput):
    """
    An adaptive learning rate optimizer

    Parameters
    ----------
    lr : Theano SharedVariable
        Initial learning rate
    tpramas: Theano SharedVariable
        Model parameters
    grads: Theano variable
        Gradients of cost w.r.t to parameres

    cost: Theano variable
        Objective fucntion to minimize

    Notes
    -----
    For more information, see [ADADELTA]_.

    .. [ADADELTA] Matthew D. Zeiler, *ADADELTA: An Adaptive Learning
       Rate Method*, arXiv:1212.5701.
    """
    np_float = np.asarray(0., dtype=theano.config.floatX)
    zipped_grads = [theano.shared(p.get_value() * np_float,
                                  name='%s_grad' % k)
                    for k, p in enumerate(tparams)]
    running_up2 = [theano.shared(p.get_value() * np_float,
                                 name='%s_rup2' % k)
                   for k, p in enumerate(tparams)]
    running_grads2 = [theano.shared(p.get_value() * np_float,
                                    name='%s_rgrad2' % k)
                      for k, p in enumerate(tparams)]

    zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
    rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2))
             for rg2, g in zip(running_grads2, grads)]

    f_grad_shared = theano.function(inputs=listInput,
                                    outputs=cost,
                                    updates=zgup + rg2up,
                                    name='adadelta_f_grad_shared')

    updir = [-T.sqrt(ru2 + 1e-6) / T.sqrt(rg2 + 1e-6) * zg
             for zg, ru2, rg2 in zip(zipped_grads,
                                     running_up2,
                                     running_grads2)]
    ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2))
             for ru2, ud in zip(running_up2, updir)]
    param_up = [(p, p + ud) for p, ud in zip(tparams, updir)]

    f_update = theano.function([lr], [], updates=ru2up + param_up,
                               on_unused_input='ignore',
                               name='adadelta_f_update')

    return f_grad_shared, f_update


def ADAM_OPTIMIZER(loss, all_params, learning_rate=0.001,
                   b1=0.9, b2=0.999, e=1e-8, gamma=1-1e-8):
    """
    CITE: http://sebastianruder.com/optimizing-gradient-descent/index.html#adam
    ADAM update rules
    Default values are taken from [Kingma2014]
    References:
    [Kingma2014] Kingma, Diederik, and Jimmy Ba.
    "Adam: A Method for Stochastic Optimization."
    arXiv preprint arXiv:1412.6980 (2014).
    http://arxiv.org/pdf/1412.6980v4.pdf
    """
    updates = []
    all_grads = theano.grad(loss, all_params)
    alpha = learning_rate
    t = theano.shared(np.float32(1))
    # (Decay the first moment running average coefficient)
    b1_t = b1*gamma**(t-1)

    for params_previous, g in zip(all_params, all_grads):
        init_moment = np.zeros(params_previous.get_value().shape,
                               dtype=theano.config.floatX)
        # (the mean)
        first_moment = theano.shared(init_moment)
        # (the uncentered variance)
        second_moment = theano.shared(init_moment)

        # (Update biased first moment estimate)
        bias_m = b1_t*first_moment + (1 - b1_t)*g

        # (Update biased second raw moment estimate)
        bias_v = b2*second_moment + (1 - b2)*g**2

        # (Compute bias-corrected first moment estimate)
        unbias_m = bias_m / (1-b1**t)

        # (Compute bias-corrected second raw moment estimate)
        unbias_v = bias_v / (1-b2**t)

        # (Update parameters)
        update_term = (alpha * unbias_m) / (T.sqrt(unbias_v) + e)
        params_new = params_previous - update_term

        updates.append((first_moment, bias_m))
        updates.append((second_moment, bias_v))
        updates.append((params_previous, params_new))
    updates.append((t, t + 1.))
    return updates


## 下面不用了

In [23]:
import os
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor, Lambda
# pip install torchvision

In [5]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [37]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()

In [8]:
X = torch.rand(1, 28, 28, device=device)
logits = model(X)
pred_probab = nn.Softmax(dim=1)(logits)
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

Predicted class: tensor([2])


In [13]:
X
X[0, 0, 1]

tensor(0.9494)

In [19]:
#1
input_image = torch.rand(3,28,28)
print(input_image.size())

#2
flatten = nn.Flatten()
flat_image = flatten(input_image)
print(flat_image.size())

#3
layer1 = nn.Linear(in_features=28*28, out_features=20)
hidden1 = layer1(flat_image)
print(hidden1.size())

#4
print(f"Before ReLU: {hidden1}\n\n")
hidden1 = nn.ReLU()(hidden1)
print(f"After ReLU: {hidden1}")

#5
seq_modules = nn.Sequential(
    flatten,
    layer1,
    nn.ReLU(),
    nn.Linear(20, 10)
)
input_image = torch.rand(3,28,28)
logits = seq_modules(input_image)

#6
softmax = nn.Softmax(dim=1)
pred_probab = softmax(logits)
print(pred_probab)

#7
print(f"Model structure: {model}\n\n")

for name, param in model.named_parameters():
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

torch.Size([3, 28, 28])
torch.Size([3, 784])
torch.Size([3, 20])
Before ReLU: tensor([[ 0.0305, -0.5532,  0.7130,  0.8898,  0.0230, -0.2422, -0.2477, -0.2424,
         -0.0642, -0.1797,  0.2153,  0.0757,  0.2777,  0.0609,  0.1148, -0.9117,
          0.3570, -0.4060,  0.4346,  0.3369],
        [ 0.1104, -0.2133,  0.6152,  0.4745, -0.0112,  0.0866, -0.3030, -0.4700,
          0.0877, -0.2403,  0.0436,  0.0757,  0.0117, -0.0613, -0.1673, -0.9643,
          0.4333, -0.3328,  0.6907,  0.1493],
        [ 0.0368, -0.3006,  0.8761,  0.7100,  0.0723,  0.3564, -0.4708, -0.2543,
         -0.0399, -0.2391,  0.0966,  0.3240,  0.1662,  0.2291, -0.1094, -0.9933,
          0.5804, -0.2566,  0.5009,  0.3386]], grad_fn=<AddmmBackward0>)


After ReLU: tensor([[0.0305, 0.0000, 0.7130, 0.8898, 0.0230, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.2153, 0.0757, 0.2777, 0.0609, 0.1148, 0.0000, 0.3570, 0.0000,
         0.4346, 0.3369],
        [0.1104, 0.0000, 0.6152, 0.4745, 0.0000, 0.0866, 0.0000, 0.00

In [28]:
torch.tensor(np.array([2, 1]))

tensor([2, 1])

In [29]:
O1 = torch.rand(50,100,1)
P = torch.rand(50,100,1)
O2 = torch.rand(50,100,1)

In [38]:
pwd

'/Users/arthur/Desktop/OpenIEStanza_EventEmbedding'