initial commit for CRCN

cesc-park · Nov 17, 2015 · d7ffbc7 · d7ffbc7
commit d7ffbc7
Show file tree

Hide file tree

Showing 90 changed files with 13,034 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+*.p
+*.dat
+*.pyc
diff --git a/README.md b/README.md
@@ -0,0 +1,160 @@
+
+# CRCN
+Coherence + Recurrent Neural Network +  Convolutional Neural Network
+
+This project hosts the code for our NIPS 2015 paper.
+
++ Cesc Chunseong Park and Gunhee Kim. Expressing an Image Stream with a Sequence of Natural Sentences. In NIPS 2015
+[[pdf](http://www.cs.cmu.edu/~gunhee/publish/nips15_stream2text.pdf)]
+
+##Reference
+
+If you use this code as part of any published research, please acknowledge the following paper.
+
+> @inproceedings{Cesc:2015:NIPS,
+> author    = {Cesc Chunseong Park and Gunhee Kim},
+> title     = "{Expressing an Image Stream with a Sequence of Natural Sentences}",
+> booktitle = {NIPS},
+> year      = 2015
+> }
+
+
+#Running Code
+
+git clone https://chunseong@bitbucket.org/chunseong/test-crcn.git crcn
+
+##Pre-requisite
+
+1. stanford NLP
+
+	Download stanford-parser.jar, stanford-parser-3.5.2-models.jar, englishPCFG.caseless.ser.gz
+	```
+	wget http://nlp.stanford.edu/software/stanford-parser-full-2015-04-20.zip
+	wget http://nlp.stanford.edu/software/stanford-corenlp-full-2015-04-20.zip
+	unzip stanford-parser-full-2015-04-20.zip
+	unzip stanford-corenlp-full-2015-04-20.zip
+	mv stanford-parser-full-2015-04-20 stanford-parser
+	mv stanford-corenlp-full-2015-04-20 stanford-core
+	cd stanford-parser
+	jar xvf stanford-parser-3.5.2-models.jar
+	```
+2. Brown courpus
+	We need browncourpus package to extract entity feature.
+	```
+	wget https://bitbucket.org/melsner/browncoherence/get/d46d5cd3fc57.zip -O browncoherence.zip
+	unzip browncoherence.zip
+	mv melsner-browncoherence-d46d5cd3fc57 browncoherence
+	cd browncoherence
+	mkdir lib64
+	mkdir bin64
+	vim Makefile
+	```
+
+	change 
+	```
+	WORDNET = 1
+	WORDNET = 0
+	```
+	change
+
+	```
+	CFLAGS = $(WARNINGS) -Iinclude $(WNINCLUDE) $(TAO_PETSC_INCLUDE) $(GSLINCLUDE)
+	CFLAGS = $(WARNINGS) -Iinclude $(WNINCLUDE) $(TAO_PETSC_INCLUDE) $(GSLINCLUDE) -fpermissive 
+	```
+	change 
+	```
+	WNLIBS = -L$(WNDIR)/lib -lWN
+	WNLIBS = -L$(WNDIR)/lib -lwordnet
+	```
+	Then build TestGrid.
+	```
+	make TestGrid
+	cd ..
+	```
+3.  python modules
+
+	Install all dependencies.
+	```
+	for req in $(cat python_requirements.txt); do pip install $req; done
+	```
+
+
+##Make New Dataset
+
+1. Prepare Dataset
+
+	Check out dataformat from json_data_format.txt
+
+
+2. Get Parsed Tree
+	We use StanfordCoreNLP tools implemented with java to extract parsedtree. 
+	```
+	cd tree
+	python spliter_for_parser.py
+	javac -d . -cp .:./json-simple-1.1.1.jar:../stanford-core/stanford-corenlp-3.5.2.jar:../stanford-core/xom.jar:../stanford-core/stanford-corenlp-3.5.2-models.jar:../stanford-core/joda-time.jar:../stanford-core/jollyday.jar: StanfordCoreNlpTreeAdder.java
+	java -cp .:./json-simple-1.1.1.jar:../stanford-core/stanford-corenlp-3.5.2.jar:../stanford-core/xom.jar:../stanford-core/stanford-corenlp-3.5.2-models.jar:../stanford-core/joda-time.jar:../stanford-core/jollyday.jar: parser.StanfordCoreNlpTreeAdder
+	python merger_for_parser.py
+	```
+
+
+##Training
+
+Make directory for training
+
+```
+mkdir model
+```
+
+
+1. Doc2Vec
+	First, we have to training doc2vec model.
+
+	```
+	python doc2vec_training.py
+	```
+
+2. RCN
+	Trainig RCN model.
+	If you want to use GPU (In this example device is 0), excute below code.
+
+	```
+	CUDA_VISIBLE_DEVICES=0 THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python rcn_training.py
+	```
+	If you want to use CPU, excute below code instead of above code.
+
+	```
+	python rcn_training.py
+	```
+
+3. CRCN
+	Trainig RCN model.
+	If you want to use GPU (In this example device is 0), excute below code.
+	```
+	CUDA_VISIBLE_DEVICES=0 THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python crcn_training.py
+	```
+	If you want to use CPU, excute below code instead of above code.
+
+	```
+	python crcn_training.py
+	```
+
+
+##Generate Output
+
+Generating output is easy. The program will load training and test datasets, then automatically generate output.
+
+```
+python generate_output.py
+```
+
+
+
+## Authors
+
+[Cesc Chunseong Park](http://vision.snu.ac.kr/cesc/) and [Gunhee Kim](http://www.cs.cmu.edu/~gunhee/),  
+[Vision and Learning Lab](http://vision.snu.ac.kr/), 
+Seoul National University
+
+## License
+BSD license
+
diff --git a/crcn_training.py b/crcn_training.py
@@ -0,0 +1,122 @@
+import sys
+sys.path.append("./keras")
+sys.path.append("./entity")
+import theano
+from theano import tensor
+from keras.models import Sequential
+from keras.layers.embeddings import Embedding
+from keras.layers.core import Dense, Dropout, Activation, Flatten
+from keras.layers.convolutional import Convolution2D, MaxPooling2D
+from keras.optimizers import SGD
+from keras.layers.recurrent import CRCN,RCN,SimpleDeepRNN
+import numpy as np
+import pickle
+from gensim import models
+import json
+import os
+import scipy.io
+from entity_score import *
+MAX_SEQ_LEN= 10
+
+model = Sequential()
+# the GRU below returns sequences of max_caption_len vectors of size 256 (our word embedding size)
+model.add(CRCN(300, 300, return_sequences=True,activation='relu',init='he_normal'))
+model.add(Activation('relu'))
+model.add(Dropout(0.5))
+model.add(Embedding(300, 512,init='he_normal'))
+model.add(Dropout(0.5))
+model.add(Embedding(512, 4096,init='he_normal'))
+model.add(Dropout(0.7))
+
+model.compile(loss='crcn_cost_func', optimizer='rmsprop')
+# "images" is a numpy array of shape (nb_samples, nb_channels=3, width, height)
+# "captions" is a numpy array of shape (nb_samples, max_caption_len=16, embedding_dim=256)
+# captions are supposed already embedded (dense vectors).
+
+#fisrt sentenceseq (training data nb, vector nb,  dimension)
+
+DOC2VEC_MODEL_PATH='./model/example.doc2vec'
+doc2vecmodel = models.Doc2Vec.load(DOC2VEC_MODEL_PATH)
+
+jsonfile = open('./data/example_tree.json', 'r')
+
+
+json_data=jsonfile.read()
+jsondata=json.loads(json_data)
+json_imgs=jsondata['images']
+
+
+features_path = os.path.join('./data/', 'example.mat')
+features_struct = scipy.io.loadmat(features_path)['feats'].transpose()
+
+
+contents={}
+
+for i,json_img in enumerate(json_imgs):
+    pageurl=os.path.basename(json_img['docpath']).encode('ascii','ignore')
+    imgfeature=features_struct[i] #1 * 4096
+    concatstring=""
+    concattree=""
+    for sentence in json_img['sentences']:
+        if sentence['raw'] not in concatstring:
+            concatstring+=sentence['raw']#re.sub('\.+','.',sentence['raw'].encode('ascii','ignore')).replace('.','.\n')
+            concattree+=sentence['tree']
+    if contents.has_key(pageurl):
+        #already in
+        contents[pageurl].append({'imgid':str(i),'filename':json_img['filename'],'feature':imgfeature,'raw':concatstring,'tree':concattree})
+    else:
+        contents[pageurl]=[]
+        contents[pageurl].append({'imgid':str(i),'filename':json_img['filename'],'feature':imgfeature,'raw':concatstring,'tree':concattree})
+
+
+#data cleaning image must have pair of sentence
+
+data_list=[]
+for k,item in contents.iteritems():
+    itemcopy=[]
+    for imgpair in item:
+        try:
+            doc2vecmodel.docvecs[imgpair['imgid']]
+            itemcopy.append(imgpair)
+        except:
+            pass
+    if len(itemcopy)>3:
+        if len(itemcopy)>MAX_SEQ_LEN:
+            iternum=len(itemcopy)/MAX_SEQ_LEN
+            for i in range(0,iternum):
+                data_list.append(itemcopy[i*MAX_SEQ_LEN:(i+1)*MAX_SEQ_LEN])
+            if len(itemcopy)- iternum*MAX_SEQ_LEN >4:
+                data_list.append(itemcopy[iternum*MAX_SEQ_LEN:])
+        else:
+            data_list.append(itemcopy)
+
+document_trees=[]
+for tdata_seq in data_list:
+    document_tree=""
+    for tdata in tdata_seq:
+        document_tree+=tdata['tree']
+    document_trees.append(document_tree)
+print "entity feature extracting..."
+
+entity_feat=entity_feature(document_trees)
+
+training_num=len(data_list)
+Sentenceseq=np.zeros((training_num,MAX_SEQ_LEN+1,300))
+Imageseq=np.zeros((training_num,MAX_SEQ_LEN,4096))
+
+
+for i,seq_list in enumerate(data_list):
+    for j,seq_elem in enumerate(seq_list):
+        Imageseq[i][j]=seq_elem['feature']
+        Sentenceseq[i][j]=doc2vecmodel.docvecs[seq_elem['imgid']]
+    Sentenceseq[i][MAX_SEQ_LEN]=np.pad(entity_feat[i],(0,300-64),'constant', constant_values=0)
+
+
+for i in range(1,20):
+    print "Number of stage", i
+    model.fit(Sentenceseq, Imageseq, batch_size=100, nb_epoch=5,validation_split=0.1,shuffle=True)
+    print "Checkpoint saved"
+    model.save_weights('./model/crcn_'+str(i)+'.hdf5')
+
+
+
diff --git a/data/example.json b/data/example.json
diff --git a/data/example.mat b/data/example.mat
diff --git a/data/example_test.json b/data/example_test.json
diff --git a/data/example_test.mat b/data/example_test.mat
diff --git a/data/example_tree.json b/data/example_tree.json
diff --git a/doc2vec_training.py b/doc2vec_training.py
@@ -0,0 +1,44 @@
+
+from gensim import corpora, models, similarities
+import json
+import os
+
+# from cpython cimport PyCObject_AsVoidPtr
+# from scipy.linalg.blas import cblasfrom scipy.linalg.blas import cblas
+
+# ctypedef void (*saxpy_ptr) (const int *N, const float *alpha, const float *X, const int *incX, float *Y, const int *incY) nogil
+# cdef saxpy_ptr saxpy=<saxpy_ptr>PyCObject_AsVoidPtr(cblas.saxpy._cpointer)
+
+jsonfile = open('./data/example.json', 'r')
+json_data=jsonfile.read()
+jsondata=json.loads(json_data)
+json_imgs=jsondata['images']
+sentences=[]
+for i,jsonimg in enumerate(json_imgs):
+	concatpara=""
+	for sentence in jsonimg['sentences']:
+		ensent=sentence['raw'].encode('ascii','ignore')
+		if ensent not in concatpara:
+			concatpara+=ensent
+	key=str(i)
+	sentences.append(models.doc2vec.TaggedDocument(concatpara.split(), [key]))
+model = models.Doc2Vec(size=300,alpha=0.025, min_alpha=0.025,window=8, min_count=5, seed=1,sample=1e-5, workers=4)  # use fixed learning rate
+model.build_vocab(sentences)
+for epoch in range(100):
+	print epoch
+	model.train(sentences)
+	model.alpha -= 0.0001  # decrease the learning rate
+	model.min_alpha = model.alpha
+	# if epoch%200==0 and epoch!=0:
+	# 	print "save check point"
+	# 	accuracy_list=model.accuracy('./model/questions-words.txt')
+	# 	error=0
+	# 	correct=0
+	# 	for accuracy in accuracy_list:
+	# 		error=error+len(accuracy['incorrect'])
+	# 		correct=correct+len(accuracy['correct'])
+	# 	print "accuracy :", correct*1.0/(correct+error)
+	# 	model.save('./model/disney_model.doc2vec')
+#model.init_sims(replace=True)
+
+model.save('./model/example.doc2vec')