## Setup

In [1]:
import os, sys, re, json, time, unittest, datetime, shutil
import itertools, collections
from importlib import reload
from IPython.display import display, HTML

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss

**Note:** The data below is only on the 10k datasets for now. This will be updated to leverage the full datasets. 

# Loading Data

In [2]:
users_df = pd.read_json("../dataset/user_10k.json", lines=True)


In [3]:
users_df.head()

Unnamed: 0,average_stars,compliment_cool,compliment_cute,compliment_funny,compliment_hot,compliment_list,compliment_more,compliment_note,compliment_photos,compliment_plain,...,cool,elite,fans,friends,funny,name,review_count,useful,user_id,yelping_since
0,4.67,0,0,0,0,0,0,0,0,1,...,0,[],0,"[cvVMmlU1ouS3I5fhutaryQ, nj6UZ8tdGo8YJ9lUMTVWN...",0,Johnny,8,0,oMy_rEb0UBEmMlu-zcxnoQ,2014-11-03
1,3.7,0,0,0,0,0,0,0,0,0,...,0,[],0,"[0njfJmB-7n84DlIgUByCNw, rFn3Xe3RqHxRSxWOU19Gp...",0,Chris,10,0,JJ-aSuM4pCFPdkfoZ34q0Q,2013-09-24
2,2.0,0,0,0,0,0,0,0,0,0,...,0,[],0,[],0,Tiffy,1,0,uUzsFQn_6cXDh6rPNGbIFA,2017-03-02
3,4.67,0,0,0,0,0,0,0,0,0,...,0,[],0,[],0,Mark,6,0,mBneaEEH5EMyxaVyqS-72A,2015-03-13
4,4.67,0,0,0,0,0,0,0,0,0,...,0,[],0,[],0,Evelyn,3,0,W5mJGs-dcDWRGEhAzUYtoA,2016-09-08


In [4]:
# All types of reviews - 10K dataset
# reviews_df = pd.read_json("../dataset/review_10k.json", lines=True)

# Just restaurant reviews - 10K dataset
reviews_df = pd.read_json("../dataset/restaurant_reviews_10k.json", lines=True)

# All types of reviews
# reviews_df = pd.read_json("../../../final_project/full_dataset/review.json", lines=True)

In [5]:
reviews_df.head()

Unnamed: 0,business_id,cool,date,funny,stars,text,useful,user_id
0,--6MefnULPED_I942VcFNA,0,2017-08-17,0,4,This is one of my top 3 places to get BBQ pork...,2,FEg8v92qx3kK4Hu4TF28Fg
1,--6MefnULPED_I942VcFNA,0,2017-05-31,0,3,This restaurant is famous for their BBQ dishes...,0,HPtjvIrhzAUkKsiVkeT4MA
2,--6MefnULPED_I942VcFNA,0,2016-10-23,0,2,Roasted pork is one of my favorite things... A...,1,MpvqV7lQcl15rflTBEUhXA
3,--6MefnULPED_I942VcFNA,0,2017-07-30,0,2,I walked by the restaurant more than 5 years a...,1,x-Gbs8sVid3yhJIoHD6Gfw
4,--6MefnULPED_I942VcFNA,0,2017-02-07,1,2,I came here to order a roast duck over rice to...,0,7Dykd1HolQx8mKPYhYDYSg


In [6]:
reviews_df.shape

(10000, 8)

In [7]:
business_df = pd.read_json("../dataset/business_10k.json", lines=True)

In [8]:
business_df.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,neighborhood,postal_code,review_count,stars,state
0,"4855 E Warner Rd, Ste B9","{'AcceptsInsurance': True, 'ByAppointmentOnly'...",FYWN1wneV18bWNgQjJ2GNg,"[Dentists, General Dentistry, Health & Medical...",Ahwatukee,"{'Friday': '7:30-17:00', 'Tuesday': '7:30-17:0...",1,33.33069,-111.978599,Dental by Design,,85044,22,4.0,AZ
1,3101 Washington Rd,"{'BusinessParking': {'garage': False, 'street'...",He-G7vWjzVUysIKrfNbPUQ,"[Hair Stylists, Hair Salons, Men's Hair Salons...",McMurray,"{'Monday': '9:00-20:00', 'Tuesday': '9:00-20:0...",1,40.291685,-80.1049,Stephen Szabo Salon,,15317,11,3.0,PA
2,"6025 N 27th Ave, Ste 1",{},KQPW8lFf1y5BT2MxiSZ3QA,"[Departments of Motor Vehicles, Public Service...",Phoenix,{},1,33.524903,-112.11531,Western Motor Vehicle,,85017,18,1.5,AZ
3,"5000 Arizona Mills Cr, Ste 435","{'BusinessAcceptsCreditCards': True, 'Restaura...",8DShNS-LuFqpEWIp0HxijA,"[Sporting Goods, Shopping]",Tempe,"{'Monday': '10:00-21:00', 'Tuesday': '10:00-21...",0,33.383147,-111.964725,Sports Authority,,85282,9,3.0,AZ
4,581 Howe Ave,"{'Alcohol': 'full_bar', 'HasTV': True, 'NoiseL...",PfOCPjBrlQAnz__NXj9h_w,"[American (New), Nightlife, Bars, Sandwiches, ...",Cuyahoga Falls,"{'Monday': '11:00-1:00', 'Tuesday': '11:00-1:0...",1,41.119535,-81.47569,Brick House Tavern + Tap,,44221,116,3.5,OH


In [9]:
checkin_df = pd.read_json("../dataset/checkin_10k.json", lines=True)

In [10]:
checkin_df.head()

Unnamed: 0,business_id,time
0,7KPBkxAOEtb3QeIL9PEErg,"{'Thursday': {'21:00': 4, '1:00': 1, '4:00': 1..."
1,kREVIrSBbtqBhIYkTccQUg,"{'Monday': {'13:00': 1}, 'Thursday': {'20:00':..."
2,tJRDll5yqpZwehenzE2cSg,"{'Monday': {'12:00': 1, '1:00': 1}, 'Friday': ..."
3,r1p7RAMzCV_6NPF0dNoR3g,"{'Thursday': {'23:00': 1}, 'Saturday': {'21:00..."
4,mDdqgfrvROGAumcQdZ3HIg,"{'Monday': {'12:00': 1, '21:00': 1}, 'Wednesda..."


In [11]:
photos_df = pd.read_json("../dataset/photos_10k.json", lines=True)

In [12]:
photos_df.head()

Unnamed: 0,business_id,caption,label,photo_id
0,OnAzbTDn79W6CFZIriqLrA,,inside,soK1szeyan202jnsGhUDmA
1,OnAzbTDn79W6CFZIriqLrA,,inside,dU7AyRB_fHOZkflodEyN5A
2,OnAzbTDn79W6CFZIriqLrA,,outside,6T1qlbBdKkXA1cDNqMjg2g
3,OnAzbTDn79W6CFZIriqLrA,Bakery area,inside,lHhMNhCA7rAZmi-MMfF3ZA
4,XaeCGHZzsMwvFcHYq3q9sA,,food,oHSCeyoK9oLIGaCZq-wRJw


In [13]:
tip_df = pd.read_json("../dataset/tip_10k.json", lines=True)

In [14]:
tip_df.head()

Unnamed: 0,business_id,date,likes,text,user_id
0,tJRDll5yqpZwehenzE2cSg,2012-07-15,0,Get here early enough to have dinner.,zcTZk7OG8ovAmh_fenH21g
1,jH19V2I9fIslnNhDzPmdkA,2015-08-12,0,Great breakfast large portions and friendly wa...,ZcLKXikTHYOnYt5VYRO5sg
2,dAa0hB2yrnHzVmsCkN4YvQ,2014-06-20,0,Nice place. Great staff. A fixture in the tow...,oaYhjqBbh18ZhU0bpyzSuw
3,dAa0hB2yrnHzVmsCkN4YvQ,2016-10-12,0,Happy hour 5-7 Monday - Friday,ulQ8Nyj7jCUR8M83SUMoRQ
4,ESzO3Av0b1_TzKOiqzbQYQ,2017-01-28,0,"Parking is a premium, keep circling, you will ...",ulQ8Nyj7jCUR8M83SUMoRQ


# Baseline Model

In [15]:
n_features = 100000

text = reviews_df["text"]

print("Fitting Count Vectorizer")
# vectorizer = CountVectorizer(max_df=0.95, min_df=2,
#                                 max_features=n_features,
#                                 stop_words='english')
# word_vector = vectorizer.fit_transform(text)

# No setting of hyper-parameters
vectorizer = CountVectorizer()
word_vector = vectorizer.fit_transform(text)

print(np.shape(word_vector))

Fitting Count Vectorizer
(10000, 24872)


In [16]:
#Print example text, stars, and embeddings

print(reviews_df["text"][102])
print(reviews_df["stars"][102])
print(word_vector[102])

At ces trade show and looking for lunch. I show up at 2:03 and the host jokingly says we are closed. We laughed. But he meant it. Last year my burger ordered medium came out almost raw. I am never going back
1
  (0, 17650)	1
  (0, 3376)	1
  (0, 13684)	1
  (0, 12582)	1
  (0, 4549)	1
  (0, 19037)	1
  (0, 11962)	1
  (0, 22483)	1
  (0, 3953)	1
  (0, 10897)	1
  (0, 13729)	1
  (0, 24528)	1
  (0, 12)	1
  (0, 12556)	1
  (0, 13164)	1
  (0, 15363)	1
  (0, 13056)	1
  (0, 19747)	2
  (0, 1101)	1
  (0, 10472)	1
  (0, 1133)	1
  (0, 3582)	1
  (0, 15453)	1
  (0, 14751)	1
  (0, 2016)	1
  (0, 1762)	2
  (0, 9793)	1
  (0, 23190)	1
  (0, 23929)	2
  (0, 1555)	1
  (0, 1239)	2
  (0, 8885)	1
  (0, 3440)	1
  (0, 22022)	1
  (0, 11748)	1
  (0, 14510)	1


## NB Training and Scoring

In [17]:
# x_train_user_reviews = reviews_df["text"][0:6000]
# x_dev_user_reviews = reviews_df["text"][6001:8000]
# x_test_user_reviews = reviews_df["text"][8001:10000]


# x_train_user_reviews = word_vector[0:6000]
# x_dev_user_reviews = word_vector[6001:8000]
x_train_user_reviews = word_vector[0:8000]
x_test_user_reviews = word_vector[8001:10000]

# print("x_train_user_reviews", x_train_user_reviews)
# print("shape x_train_user_reviews", np.shape(x_train_user_reviews))



# y_train_user_stars = reviews_df["stars"][0:6000]
# y_dev_user_stars = reviews_df["stars"][6001:8000]
y_train_user_stars = reviews_df["stars"][0:8000]
y_test_user_stars = reviews_df["stars"][8001:10000]

In [19]:
nb = MultinomialNB()

nb.fit(x_train_user_reviews, y_train_user_stars)

y_pred = nb.predict(x_test_user_reviews)

acc = accuracy_score(y_pred, y_test_user_stars)
print("Accuracy on test set: {:.02%}".format(acc))
# pred_proba = nb.predict_proba(y_pred)
# log_loss_metric = log_loss(y_test_user_stars, pred_proba)
# print("Log-loss on test set: {:.02%}".format(log_loss_metric))

Accuracy on test set: 55.93%


In [20]:
# Print example prediction

print(y_pred[400])

4


In [23]:
#!pip cPickle

ERROR: unknown command "cPickle"


## RNN with Attention (old) code

In [84]:
# # encoding=utf8

# import sys
# import re
# # import cPickle
# import _pickle as cPickle
# import numpy as np

# # For special characters
# reload(sys)

# # sys.setdefaultencoding('utf8')

# _PAD = b"_PAD"
# _GO = b"_GO"
# _EOS = b"_EOS"
# _UNK = b"_UNK"
# _START_VOCAB = [_PAD, _GO, _EOS, _UNK]

# PAD_ID = 0
# GO_ID = 1
# EOS_ID = 2
# UNK_ID = 3

# _WORD_SPLIT = re.compile(b"([.,!?\"':;)(])")
# _DIGIT_RE = re.compile(BR"\d")

# def basic_tokenizer(sentence):
#     """ Split sentence into list of tokens """
#     words = []
#     for space_separated_item in sentence.strip().split():
# #         words.extend(_WORD_SPLIT.split(space_separated_item))
#         words.extend(space_separated_item)
#     return [w for w in words if w] # if w removes the ""

# def get_vocab(tokenized, max_vocab_size):
#     """
#     Get vocab_list, vocab_dict and rev_vocab_dict given the
#     tokenized sentences.
#     """
#     # Replace word count
#     vocab = {}
# #     for sentence in tokenized:
# #         for word in sentence:
# #             if word in vocab:
# #                 vocab[word] += 1
# #             else:
# #                 vocab[word] = 1


# ### Minimizing looops

# #     print("tokenized", tokenized)
#     for word in tokenized:
# #         print("word", word)
#         if word in vocab:
#             vocab[word] += 1
#         else:
#             vocab[word] = 1


#     vocab_list = _START_VOCAB + sorted(vocab, key=vocab.get, reverse=True)
#     if len(vocab_list) > max_vocab_size:
#         vocab_list = vocab_list[:max_vocab_size]

#     # Get vocab dict (word -> token) and rev dict (token -> word)
#     vocab_dict = dict([(x,y) for (y,x) in enumerate(vocab_list)])
    
# #     print("vocab_dict", vocab_dict)
    
# #     rev_vocab_dict = {v: k for k, v in vocab_dict.iteritems()}
#     rev_vocab_dict = {v: k for k, v in vocab_dict.items()}

    
#     print("vocab_list", vocab_list)
    
#     return vocab_list, vocab_dict, rev_vocab_dict





# # def sentence_to_token_ids(sentence, vocab_dict, target_lang,
# #     normalize_digits=True):
    
    
# def sentence_to_token_ids(sentence, vocab_dict, normalize_digits=True):
    
#     """
#     Convert a single sentence of words to token ids.
    
#     ############### If it is the target   
#     ############### language, we will append an EOS token to the end.
#     """
#     if not normalize_digits:
#         # replace words not in vocab_dict with UNK_ID
#         tokens = [vocab_dict.get(w, UNK_ID) for w in sentence]
#     else:
# #         tokens = [vocab_dict.get(_DIGIT_RE.sub(b"0", w), UNK_ID)
# #             for w in sentence]
# #         tokens = [vocab_dict.get(_DIGIT_RE.sub(b"0", w), UNK_ID)
# #             for w in sentence]
#         tokens = [vocab_dict.get(w, UNK_ID) for w in sentence]

# #     # Append EOS token if target langauge sentence
# #     if target_lang:
# #         tokens.append(EOS_ID)

#     tokens.append(EOS_ID)

    
    
#     print("tokens", tokens)
    
    
#     return tokens


# # def data_to_token_ids(tokenized, vocab_dict, target_lang,
# #     normalize_digits=True):
    
# def data_to_token_ids(tokenized, vocab_dict, normalize_digits=True):
#     """
#     Convert tokens into ids used vocab_dict and normalize all digits
#     to 0.
#     """
#     data_as_tokens = []
#     seq_lens = []
#     max_len = max(len(sentence) for sentence in tokenized) + 1 # +1 for EOS

#     for sentence in tokenized:
# #         token_ids = sentence_to_token_ids(sentence, vocab_dict, target_lang,
# #             normalize_digits)
#         token_ids = sentence_to_token_ids(sentence, vocab_dict, normalize_digits)
#         # Padding
#         data_as_tokens.append(token_ids + [PAD_ID]*(max_len - len(token_ids)))
#         # Store original sequence length
#         seq_lens.append(len(token_ids))

#     return np.array(data_as_tokens), np.array(seq_lens)






# # def process_data(datafile, max_vocab_size, target_lang):

# def process_data(datafile, max_vocab_size):
    
    
    
    
    
#     """
#     ############### Read the sentences from our datafiles.
    
#     text = reviews_df["text"]
    
    
#     """
# #     with open(datafile, 'rb') as f:
# #         sentences = cPickle.load(f)

#     ##############
#     sentences = datafile

#     # Split into tokens
#     tokenized = []
    
    
    
#     # From baseline
# #     vectorizer = CountVectorizer()
# #     word_vector = vectorizer.fit_transform(text)
    
# #     vectorizer = CountVectorizer()
# #     tokenized = vectorizer.fit_transform(sentences)
    
    
    
    
# #######     for i in xrange(len(sentences)):
#     for i in range(len(sentences)):
#         tokenized.append(basic_tokenizer(sentences[i]))
    
#     print("tokenized type", type(tokenized))

#     print("tokenized[200]", tokenized[200])
# #     print("tokenized", tokenized)
    

#     # Get vocab information
#     vocab_list, vocab_dict, rev_vocab_dict = get_vocab(tokenized,
#         max_vocab_size)

#     # Convert data to token ids
# #     data_as_tokens, seq_lens = data_to_token_ids(tokenized, vocab_dict,
# #         target_lang, normalize_digits=True)
    
    
#     data_as_tokens, seq_lens = data_to_token_ids(tokenized, vocab_dict, normalize_digits=True)

    
    
    
#     return data_as_tokens, seq_lens, vocab_dict, rev_vocab_dict




# ################### Don't need

# ################### Don't need################### Don't need################### Don't need
# ################### Don't need
# def split_data(en_token_ids, sp_token_ids,
#     en_seq_lens, sp_seq_len, train_ratio=0.8):
#     """
#     Split the into train and validation sets.
#     """

#     decoder_inputs = []
#     targets = []
#     # Add go token to decoder inputs and create targets
#     for sentence in sp_token_ids:
#         decoder_inputs.append(np.array([GO_ID] + list(sentence)))
#         targets.append(np.array(([GO_ID] + list(sentence))[1:] + [0]))

#     sp_token_ids = np.array(decoder_inputs)
#     targets = np.array(targets)

#     # Splitting index
#     last_train_index = int(0.8*len(en_token_ids))

#     train_encoder_inputs = en_token_ids[:last_train_index]
#     train_decoder_inputs = sp_token_ids[:last_train_index]
#     train_targets = targets[:last_train_index]
#     train_en_seq_lens = en_seq_lens[:last_train_index]
#     train_sp_seq_len = sp_seq_len[:last_train_index]

#     valid_encoder_inputs = en_token_ids[last_train_index:]
#     valid_decoder_inputs = sp_token_ids[last_train_index:]
#     valid_targets = targets[last_train_index:]
#     valid_en_seq_lens = en_seq_lens[last_train_index:]
#     valid_sp_seq_len = sp_seq_len[last_train_index:]

# #     print "%i training samples and %i validations samples" % (
# #         len(train_encoder_inputs), len(valid_encoder_inputs))

#     return train_encoder_inputs, train_decoder_inputs, train_targets, \
#         train_en_seq_lens, train_sp_seq_len, \
#         valid_encoder_inputs, valid_decoder_inputs, valid_targets, \
#         valid_en_seq_lens, valid_sp_seq_len

        
        
        
# def generate_epoch(encoder_inputs, decoder_inputs, targets, en_seq_lens, sp_seq_lens,
#     num_epochs, batch_size):

#     for epoch_num in range(num_epochs):
#         yield generate_batch(encoder_inputs, decoder_inputs, targets,
#             en_seq_lens, sp_seq_lens, batch_size)

# def generate_batch(encoder_inputs, decoder_inputs, targets,
#     en_seq_lens, sp_seq_lens, batch_size):

#     data_size = len(encoder_inputs)

#     num_batches = (data_size // batch_size)
#     for batch_num in range(num_batches):
#         start_index = batch_num * batch_size
#         end_index = min((batch_num + 1) * batch_size, data_size)

#         yield encoder_inputs[start_index:end_index], \
#             decoder_inputs[start_index:end_index], \
#             targets[start_index:end_index], \
#             en_seq_lens[start_index:end_index], \
#             sp_seq_lens[start_index:end_index]


In [2]:
# ### (old)

# test = process_data(text, 10000)

# print('test', test)
# # test = process_data("At ces trade show and looking for lunch. I show up at 2:03 and the host jokingly says we are closed. We laughed. But he meant it.", 10000)

NameError: name 'process_data' is not defined

### RNN with Attention (new)

In [10]:
import tensorflow as tf
import os

# from tensorflow.models.rnn import rnn, rnn_cell

from tensorflow.python.ops import rnn, rnn_cell

rnn_cell = tf.nn.rnn_cell


#rnn= tf.nn.rnn
rnn= tf.nn.dynamic_rnn




import time
import numpy as np
import csv
import random
import collections
import util
from random import shuffle
from util import xavier_weight_init
import sys

class Config(object):
      """Holds model hyperparams and data information.
      The config class is used to store various hyperparameters and dataset
      information parameters. Model objects are passed a Config() object at
      instantiation.
      """
      batch_size =32
      batches_per_epoch =  15
      step_size= 128 # number of words in a review
      input_dim= 128 # this is the word vector size
      hidden_dim = 100 # number of nerons per hidden layer
      label_dim = 5 # we have a total of classes (like or not like)
      max_epochs = 500
      early_stopping = 3
      dropout =1
      learning_rate = 0.001
      forget_bias = 1.0
      #model = 'RNN' #'BiRNN'
      model = 'BiRNN'
      cell_type = 'LSTM'
      #cell_type = 'GRU'
      stack = 1
      use_peepholes = False
      cell_clip = 1.0
      train_file = ""
      label_file = ""
      run_type = "regression"
      multi_learn = False
      train_data_dir = "Data/train"
      val_data_dir = "Data/val"
      attention=True
      test_data_dir = "Data/test"
      # train_num_reviews = 1
      val_num_reviews = 1
      marker_list = []
      cur_marker = 0
      epoch_per_val=4
      init='norm'
      weight_dir='default'
      grad_clip_threshold=5
      residual=False



class Models(object):

    def read_markers(self, data_dir):
        for f in os.listdir(data_dir):
            if f[0:8] == 'compress':
                self.config.marker_list.append(f)
    def read_train_file(self, data_dir):
            '''
            Read the data and label file.
            assumed file name conventions:
                -file starts with x indicates data file, starts with y indicates label file
                -file name x_(# words in a review)_(size of the word vector)_(#of reviews in the file)_(corresponding label marker).data
                -file name y_(type of label)_(bucket or regression)_(# words in a review)_(size of the word vector)_(#of reviews in the file)_(corresponding label marker).data
            input: data file directory
            output:
                it outputs a 3 hyper-dimensional structrue as data and a 2 hyper-dimensional structrue as label:
                data : [number of reviews [number of words in the review x dimension of word vector]]
                label: [number of reviews, [one hot vector if classification, number if regression]]
            '''

            loaded=np.load(os.path.join(data_dir, self.config.marker_list[int(self.config.cur_marker)]))
            self.training_data = loaded['training_data']
            self.training_label = loaded['training_label']
            self.config.input_dim = self.training_data.shape[2]
            self.config.step_size = self.training_data.shape[1]
            self.config.label_dim = self.training_label.shape[1]
            return

    def read_val_file(self, data_dir):

        loaded=np.load(os.path.join(data_dir, 'compress_val.npz'))
        self.val_data = loaded['training_data']
        self.val_label = loaded['training_label']

        return


    def print_model_params(self):
        print('*'*99)
        print( 'Run Type:', str(self.config.run_type))
        print( 'Model:', self.config.model)

        print( 'Cell type:', self.config.cell_type)
        print( 'Hidden Units:', str(self.config.hidden_dim))

        print( "\n")
        print( 'Learning rate:', str(self.config.learning_rate))
        print( 'init:', str(self.config.init))
        print( 'Dropout:', str(self.config.dropout))
        print( 'graident threshold', str(self.config.grad_clip_threshold))
        print( "\n")

        print( 'attention:', str(self.config.attention))
        print( 'residual:', str(self.config.residual))
        print( 'Stack:', str(self.config.stack))
        print( 'step size:', self.config.step_size)
        print( 'input dim:', self.config.input_dim)
        print( 'batch isze', self.config.batch_size)
        print( "\n")
        # print() 'review per training file', self.config.train_num_reviews)
        print( 'marker list', self.config.marker_list)
        print( 'Forget Bias:', str(self.config.forget_bias))
        print( 'Peehole:', str(self.config.use_peepholes))
        print( '*'*99)

    def init_variables(self):
            '''
            initialize model parameters, note LSTM and BiRNN requires twice the hidden dimenssion due their design
            '''
            weight_size=self.config.hidden_dim
            if self.config.model=='BiRNN':
                weight_size_out=2*weight_size
                attention_weight = 2*self.config.hidden_dim
            else:
                weight_size_out=weight_size
                attention_weight = self.config.hidden_dim

            if self.config.attention:
                weight_size_out = self.config.step_size
            elif self.config.model!='BiRNN':
                weight_size_out = weight_size

            xavier_initializer = xavier_weight_init()
            # Define weights and bias
            with tf.variable_scope(str('test')):
                if self.config.init=='norm':
                      weights_hidden = tf.Variable(tf.random_normal([self.config.input_dim, weight_size])) # Hidden layer weights
                      weights_out = tf.Variable(tf.random_normal([weight_size_out, self.config.label_dim]))
                      biases_hidden = tf.Variable(tf.random_normal([weight_size]))
                      biases_out = tf.Variable(tf.random_normal([self.config.label_dim]))
                      wegiths_attention=tf.Variable(tf.random_normal([attention_weight]))
                elif self.config.init=='xaiver':
                      weights_hidden = tf.Variable(xavier_initializer((self.config.input_dim, weight_size)))
                      weights_out = tf.Variable(xavier_initializer((weight_size_out, self.config.label_dim)))
                      biases_hidden =tf.Variable(xavier_initializer((weight_size,)))
                      biases_out = tf.Variable(xavier_initializer((self.config.label_dim,)))
                      wegiths_attention =tf.Variable(xavier_initializer((attention_weight,)))

                self.weights = {
                    'hidden': weights_hidden,
                    'out1': weights_out
                }
                self.biases = {
                   'hidden': biases_hidden,
                    'out1': biases_out
                }
                for i in range(self.config.step_size):
                    self.weights[i]=wegiths_attention#tf.Variable(tf.random_normal([weight_size_out]))
                    self.biases[i]=tf.Variable(tf.random_normal([self.config.batch_size]))

    def BiRNN(self, scope):
            '''
            bidirection rnn model
            Note: bidirectional model is most useful when tacking RNNs, in single stack case it just averaging two outputs
            input: information needed to construct a model. F_bias is only relevant when cell type is LSTM
            output:
                linear combination of the rnn results and output weights
            '''
            # input shape: (batch_size, step_size, input_dim)
            # we need to permute step_size and batch_size(change the position of step and batch size)
            data = tf.transpose(self.input_data, [1, 0, 2])

            # Reshape to prepare input to hidden activation
            # (step_size*batch_size, n_input), flattens the batch and step
            #after the above transformation, data is now (step_size*batch_size, input_dim)
            data = tf.reshape(data, [-1, self.config.input_dim])

            # Define lstm cells with tensorflow
            with tf.variable_scope(str(scope)):
                  # Linear activation
                  data = tf.matmul(data, self.weights['hidden']) + self.biases['hidden']
                  data = tf.nn.dropout(data, self.config.dropout)
                  # Define a cell
                  if self.config.cell_type == 'GRU':
                      lstm_fw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
                      lstm_bw_cell = rnn_cell.GRUCell(self.config.hidden_dim)
                  else:
                      lstm_fw_cell = rnn_cell.LSTMCell(self.config.hidden_dim, forget_bias=self.config.forget_bias,
                                                       use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip, state_is_tuple=True)
                      lstm_bw_cell = rnn_cell.LSTMCell(self.config.hidden_dim, forget_bias=self.config.forget_bias,
                                                       use_peepholes=self.config.use_peepholes, cell_clip=self.config.cell_clip, state_is_tuple=True)

                  self.init_state_bw = lstm_bw_cell.zero_state(self.config.batch_size, dtype=tf.float32)
                  self.init_state_fw = lstm_fw_cell.zero_state(self.config.batch_size, dtype=tf.float32)

                  # Split data because rnn cell needs a list of inputs for the RNN inner loop
                  data = tf.split(0, self.config.step_size, data) # step_size * (batch_size, hidden_dim)

                  if self.config.stack == 2:
                      print('running stack 2.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")
                  elif self.config.stack == 3:
                      print('running stack 3.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs2, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")

                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs2,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN3")
                  elif self.config.stack == 4:
                      print('running stack 4.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs2, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")
                      outputs3, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs2,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN3")
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs3,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN4")
                  elif self.config.stack == 5:
                      print('running stack 5.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs2, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")
                      outputs3, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs2,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN3")
                      outputs4, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs3,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN4")
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs4,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN5")
                  elif self.config.stack == 6:
                      print('running stack 6.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs2, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")
                      outputs3, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs2,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN3")
                      outputs4, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs3,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN4")
                      outputs5, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs4,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN5")
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs5,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN6")
                  elif self.config.stack == 7:
                      print('running stack 7.......')
                      outputs1, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN1")
                      outputs2, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs1,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN2")
                      outputs3, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs2,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN3")
                      outputs4, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs3,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN4")
                      outputs5, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs4,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN5")
                      outputs6, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs5,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN6")
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, outputs6,
                                                              initial_state_fw=self.init_state_fw,
                                                              initial_state_bw=self.init_state_bw, scope="RNN7")
                  else:
                      print('running single stack Bi-directional RNN.......')
                      outputs, output_state_fw,output_state_bw  = tf.nn.bidirectional_rnn(lstm_fw_cell, lstm_bw_cell, data,
                                                                    initial_state_fw=self.init_state_fw,
                                                                    initial_state_bw=self.init_state_bw, scope="RNN1")

                  if self.config.attention:
                        pred = self.compute_output(outputs, data)
                  else:
                        pred = self.compute_output(outputs[-1], data)
                  return pred


    def RNN(self, scope):
            '''
            standard rnn model
            input: information needed to construct a model. F_bias is only relevant when cell type is LSTM
            output:
                linear combination of the rnn results and output weights
            '''
            # input shape: (batch_size, step_size, input_dim)
            # we need to permute step_size and batch_size(change the position of step and batch size)
            data = tf.transpose(self.input_data, [1, 0, 2])
            # Reshape to prepare input to hidden activation
            # (step_size*batch_size, n_input), flattens the batch and step
            #after the above transformation, data is now (step_size*batch_size, input_dim)
            data = tf.reshape(data, [-1, self.config.input_dim])

            with tf.variable_scope(str(scope)):
                  data = tf.nn.dropout(tf.matmul(data, self.weights['hidden']) + self.biases['hidden'], self.config.dropout)

                  # Define a lstm cell with tensorflow
                  if self.config.cell_type == 'GRU':
                      lstm_cell = rnn_cell.GRUCell(self.config.hidden_dim)
                  else:
                      lstm_cell = rnn_cell.LSTMCell(self.config.hidden_dim, forget_bias=self.config.forget_bias, state_is_tuple=True)
                  self.init_state = lstm_cell.zero_state(self.config.batch_size, dtype=tf.float32)
                  # Split data because rnn cell needs a list of inputs for the RNN inner loop
                  data = tf.split(0, self.config.step_size, data) # step_size * (batch_size, hidden_dim)

                  if self.config.stack == 2:
                      print('running stack 2.......')
                      output1, states = tf.nn.rnn(lstm_cell, data, initial_state=self.init_state, scope="RNN1")
                      outputs, states = tf.nn.rnn(lstm_cell, output1, initial_state=self.init_state, scope="RNN2")
                  elif self.config.stack == 3:
                      print('running stack 3.......')
                      output1, states = tf.nn.rnn(lstm_cell, data, initial_state=self.init_state, scope="RNN1")
                      output2, states = tf.nn.rnn(lstm_cell, output1, initial_state=self.init_state, scope="RNN2")
                      outputs, states = tf.nn.rnn(lstm_cell, output2, initial_state=self.init_state, scope="RNN3")
                  elif self.config.stack == 4:
                      print('running stack 4.......')
                      output1, states = tf.nn.rnn(lstm_cell, data, initial_state=self.init_state, scope="RNN1")
                      output2, states = tf.nn.rnn(lstm_cell, output1, initial_state=self.init_state, scope="RNN2")
                      output3, states = tf.nn.rnn(lstm_cell, output2, initial_state=self.init_state, scope="RNN3")
                      outputs, states = tf.nn.rnn(lstm_cell, output3, initial_state=self.init_state, scope="RNN4")
                  else:
                      print('running single stack RNN.......')
                      outputs, states = tf.nn.rnn(lstm_cell, data, initial_state=self.init_state, scope="RNN1")

                  # Get lstm cell output
                  outputs, states = tf.nn.rnn(lstm_cell, data, initial_state=self.init_state)

                  # we really just interested in the last state's output
                  # return [tf.matmul(outputs[-1], self.weights['out1']) + self.biases['out1']]
                  if self.config.attention:
                        pred=self.compute_output(outputs)
                  else:
                        pred =self.compute_output(outputs[-1])
                  return pred

    def compute_output(self, outputs, data):
            if not self.config.attention:
                print('running none attention mode.......')
                # Linear activation
                # for basic rnn prediction we really just interested in the last state's output, we need to average them in this case
                return [tf.nn.dropout(tf.matmul(outputs, self.weights['out1']) + self.biases['out1'], self.config.dropout)]
            else:
                print('running attention mode.......')
                # print total_outputs.get_shape()
                # print outputs[-1].get_shape()
                # we now need to do apply the attention model, the output of each layer comes out from outputs[0], total layer = step_size
                # I will first iterate through each layer and multiply the output to its weights
                # I will follow the example below, which essentially produces a matrix vector product
                # x = tf.constant(5.0, shape=[5, 6])
                # w = tf.constant([0.0, 1.0, 2.0, 3.0, 4.0, 5.0])
                # xw = tf.mul(x, w)
                # max_in_rows = tf.reduce_max(xw, 1), i need to ues reduce sum here
                #
                # sess = tf.Session()
                # print sess.run(xw)
                # # ==> [[0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
                # #      [0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
                # #      [0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
                # #      [0.0, 5.0, 10.0, 15.0, 20.0, 25.0],
                # #      [0.0, 5.0, 10.0, 15.0, 20.0, 25.0]]
                #
                # print sess.run(max_in_rows)
                # # ==> [25.0, 25.0, 25.0, 25.0, 25.0]
                # print self.weights[1].get_shape() #(256,)
                # print outputs[27].get_shape() #(?,256)
                # attention_list = [tf.reduce_sum(tf.mul(outputs[i], weights[i]),1)+bias[i] for i in range(len(outputs))]
                if self.config.residual:
                    print('running residual mode.......')
                    data = tf.transpose(self.input_data, [1, 0, 2])
                    for i in range(self.config.step_size):
                        data1 = tf.concat(1, [data[i], data[i]])
                        outputs[i]+=data1
                else:
                    print('running non-residual mode.......')
                attention_list = [tf.reduce_sum(tf.mul(outputs[i], self.weights[i]),1)+self.biases[i] for i in range(self.config.step_size)]
                #after obtaining the attention list I need to make a vector out of it
                attention_vec = tf.transpose(tf.pack(attention_list))
                #attention_vec = tf.add(attention_vec,data)
                # print self.weights['out1'].get_shape()
                pred=[tf.nn.dropout(tf.matmul(attention_vec, self.weights['out1']) + self.biases['out1'], self.config.dropout)]
                return pred

    def add_placeholders(self):
            '''
            feeding information to the input placeholders
            this function is call as the init process, data are feed in by tensor flow graph
            '''
            # define graph input place holders
            self.input_data = tf.placeholder("float", [None, self.config.step_size, self.config.input_dim])
            self.input_label = tf.placeholder("float", [None, self.config.label_dim])

    def get_feed_dict(self, data, label):
        if (self.config.model == 'BiRNN'):
            feed_dict = {self.input_data: data,
                         self.input_label: label}
        else:
            feed_dict = {self.input_data: data,
                         self.input_label: label}
        return feed_dict

    def run_model(self, scope=None, debug=False):
            '''
            this is the core function that launches the model, it initializes the weights and call the model specified in the config
            after model execution it records the test and training loss.
            input: model, training data, label, test data/label, and all other paramters needed to run the model
            output:
                the best learning rate found through cross vaildation.
            '''
            self.print_model_params()
            #making predictions, this actives the rnn model
            if (self.config.model =="BiRNN"): pred = self.BiRNN(scope)
            elif (self.config.model=="RNN"): pred = self.RNN(scope)

             # Define loss and optimizer
            label1 = tf.split(1, self.config.label_dim, self.input_label)

            if self.config.run_type=='regression':
                cost = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(pred[0], self.input_label))))

            if self.config.run_type=='classification':
                cost = tf.reduce_mean(
                    tf.nn.softmax_cross_entropy_with_logits(pred[0], self.input_label))


            optimizer = tf.train.AdamOptimizer(learning_rate=self.config.learning_rate).minimize(cost) # Adam Optimizer

            #opt_func=tf.train.AdamOptimizer(learning_rate=self.config.learning_rate)
            #tvars=tf.trainable_variables()

            #clip the graident
            # tf.gradients(ys, xs, grad_ys=None, name='gradients', colocate_gradients_with_ops=False, gate_gradients=False, aggregation_method=None)
            # Constructs symbolic partial derivatives of sum of ys w.r.t. x in xs.
            #grads, _=tf.clip_by_global_norm(tf.gradients(cost, tvars), self.config.grad_clip_threshold)
            #optimizer=opt_func.apply_gradients(zip(grads, tvars))

            #compute accuracy for classification
            class_one_hot_prediction = tf.argmax(self.input_label, 1)
            classification_prediction=tf.argmax(tf.nn.softmax(pred[0]),1)
            classification_acc =tf.reduce_sum(tf.cast(tf.equal(classification_prediction, class_one_hot_prediction), 'int32'))

             # Initializing the variables
            init = tf.global_variables_initializer()
            saver = tf.train.Saver()

            def ValidationError(_type):
                #-------------------------validation starts here-------------------------------------------
                    val_loss=[]
                    val_epoch = 0
                    if _type == 'val':
                          print('running validation loss')
                          self.read_val_file(self.config.val_data_dir)
                    elif _type == 'test':
                          print('running test loss')
                          self.read_val_file(self.config.test_data_dir)

                    train_dropout=self.config.dropout
                    self.config.dropout = train_dropout
                    val_i=1
                    val_last_index = 0
                    while val_i*self.config.batch_size <= len(self.val_data):
                        samples=[i for i in range(val_last_index, val_i*self.config.batch_size)]
                        val_last_index = val_i*self.config.batch_size
                        val_i+=1
                        sample = np.array(samples)
                        input_training_data=self.val_data[sample, :]
                        input_training_label=self.val_label[sample, :]
                        feed_dict = self.get_feed_dict(input_training_data, input_training_label)
                        if self.config.run_type == 'classification':
                            loss, match = sess.run([cost, classification_acc], feed_dict)
                            acc= 1.0*match/len(input_training_label)
                        elif self.config.run_type == 'regression':
                            acc = sess.run(cost, feed_dict)
                        val_loss.append(acc)
                    self.config.dropout=train_dropout
                    return 1.0*sum(val_loss)/(len(val_loss))

            def SaveWeights():
                  # if not os.path.exists("./weights"):
                  if not os.path.exists("./"+self.config.weight_dir):
                        os.makedirs("./"+self.config.weight_dir)
                  path=saver.save(sess, './'+self.config.weight_dir+'/', global_step=None, latest_filename=None, meta_graph_suffix='meta', write_meta_graph=True, write_state=True)

            saver = tf.train.Saver()
            #Launch the graph
            with tf.Session() as sess:
                #saver.restore(sess, './'+self.config.weight_dir+'/')
                #print 'weights restored...'
                #test_accuracy = ValidationError('test')
                #print 'test accuracy', test_accuracy
                #return test_accuracy

                sess.run(init)
                best_val_epoch = 0
                if self.config.run_type=='classification':
                    best_val_accuracy= float('-inf')
                if self.config.run_type=='regression':
                    best_val_accuracy= float('inf')
                #-------------------------training starts here-------------------------------------------
                # I have batches per epoch and epoch per validation check out which is my max_epoch
                # I will read one file per time and taking batches out of the file, once the file is exhausted I will move
                # on to the next file without interupting the epoch run
                # note, number batchs per epoch * batch size must be less than the numver of reviews in a file
                index = 1
                last_index = 0
                total_epoch = 0
                val_epoch = 0
                val_loss = []
                for epoch in xrange(self.config.max_epochs):
                    total_epoch +=1
                    val_epoch +=1
                    train_accuarcy = []
                    test_accuracy = 0
                    train_loss = []
                    counter = 0
                    # Training
                    total_traing_data = self.training_label.shape[0]
                    while counter  < self.config.batches_per_epoch:
                        current_index=index*self.config.batch_size
                        if current_index >= total_traing_data:
                            samples=[i for i in range(total_traing_data-self.config.batch_size, total_traing_data)]
                            sample = np.array(samples)
                            #samples=np.random.randint(total_traing_data, size=self.config.batch_size)
                            input_training_data=self.training_data[sample, :]
                            input_training_label=self.training_label[sample, :]
                            index = 1
                            last_index=0
                            self.config.cur_marker+=1
                            if self.config.cur_marker == len(self.config.marker_list): self.config.cur_marker = 0
                            self.read_train_file(self.config.train_data_dir)
                        else:
                            samples=[i for i in range(last_index, current_index)]
                            last_index = current_index
                            index +=1
                            sample = np.array(samples)
                            input_training_data=self.training_data[sample, :]
                            input_training_label=self.training_label[sample, :]

                        feed_dict = self.get_feed_dict(input_training_data, input_training_label)
                        sess.run(optimizer, feed_dict)
                        if self.config.run_type == 'classification':
                            loss, match = sess.run([cost, classification_acc], feed_dict)
                            acc= 1.0*match/len(input_training_label)
                        elif self.config.run_type == 'regression':
                            acc = sess.run(cost, feed_dict)
                            loss = acc
                        train_accuarcy.append(acc)
                        train_loss.append(loss)
                        counter += 1

                    epoch_loss=sum(train_loss)/counter
                    epoch_acc=sum(train_accuarcy)/counter
                    print("Epoch " + str(epoch) + ", Loss= " + "{:.6f}".format(epoch_loss) + ", Accuracy= " + "{:.6f}".format(epoch_acc))
                    sys.stdout.flush()
                    if val_epoch == self.config.epoch_per_val:
                        val_epoch = 0
                        val_accuracy = ValidationError('val')
                        if self.config.run_type=='classification':
                            if best_val_accuracy<val_accuracy:
                                best_val_epoch=total_epoch
                                best_val_accuracy= val_accuracy
                                SaveWeights()
                        if self.config.run_type == 'regression':
                            if best_val_accuracy>val_accuracy:
                                best_val_epoch=total_epoch
                                best_val_accuracy= val_accuracy
                                SaveWeights()

                        print('*'*30)
                        print(str(self.config.run_type)+' validation accuracy at epoch %d: %f'%(total_epoch, val_accuracy))
                        print('best validation accuracy so far at epoch %d: %f'%(total_epoch, best_val_accuracy))
                        print('*'*30)
                print("Optimization Finished!")

                saver.restore(sess, './'+self.config.weight_dir+'/')
                print('weights restored...')
                test_accuracy = ValidationError('test')
                print('test accuracy', test_accuracy)
                return test_accuracy


    def __init__(self, config):
      self.config = config
      if len(self.config.marker_list) == 0: self.read_markers(self.config.train_data_dir)
      self.config_cur_marker=self.config.marker_list[0]
      self.read_train_file(self.config.train_data_dir)
      self.add_placeholders()
      self.init_variables()
      self.val_data=[]

def run_regression(config=None, stack=1, attention=False, res=False):
      for i in range(stack):
            ts = int(time.time())
            if config is None:
                config = Config()
            config.run_type='regression'
            config.train_data_dir='Data/train/regression'
            config.val_data_dir='Data/val/regression'
            config.test_data_dir='Data/test/regression'

            config.cell_type='LSTM'
            #config.cell_type='GRU'
            config.model="BiRNN"
            #config.model="RNN"
            config.learning_rate=0.001
            config.batch_size=16
            config.batches_per_epoch=80
            config.max_epochs=40
            config.dropout=1
            config.hidden_dim=300
            config.epoch_per_val=5
            config.stack=i+1
            config.attention=attention
            #config.init='norm'
            config.init='xaiver'
            config.grad_clip_threshold = 10000
            config.residual=res

            config.weight_dir="regression_"+str(config.model)+"_"+str(config.cell_type)+"_"+"stack"+str(config.stack)+"_"+str(ts)
            if not os.path.exists("./"+config.weight_dir):
              os.makedirs("./"+config.weight_dir)
            f=open("./"+config.weight_dir+'/run_file.txt', 'a')
            sys.stdout = f
            model = Models(config)
            loss_val = model.run_model(scope=str(i))

def run_2classification(config=None,stack=1, attention=False, res=False):
    for i in range(5,8):
            ts = int(time.time())
            if config is None:
              config = Config()
            config.run_type='classification'
            config.train_data_dir='Data/train/2_classification'
            config.val_data_dir='Data/val/2_classification'
            config.test_data_dir='Data/test/2_classification'
            config.cell_type='LSTM'
            #config.cell_type='GRU'
            config.model="BiRNN"
            #config.model="RNN"
            config.learning_rate=0.001
            config.batch_size=16
            config.batches_per_epoch=80
            config.max_epochs=40
            config.dropout=1
            config.hidden_dim=300
            config.epoch_per_val=5
            config.stack=i+1
            config.attention=attention
            #config.init='norm'
            config.init='xaiver'
            config.grad_clip_threshold = 10000
            config.residual=res

            config.weight_dir="attention_2_classification_"+str(config.model)+"_"+str(config.cell_type)+"_"+"stack"+str(config.stack)+"_"+str(ts)
            if not os.path.exists("./"+config.weight_dir):
              os.makedirs("./"+config.weight_dir)
            f=open("./"+config.weight_dir+'/run_file.txt', 'a')
            sys.stdout = f
            model = Models(config)
            loss_val = model.run_model(scope=str(i))

#def run_3classification(config=None,stack=1, attention=False):
#      for i in range(stack):
#            ts = int(time.time())
#            ts = int(time.time())
#            if config is None:
#              config = Config()
#
#            config.run_type='classification'
#            config.train_data_dir='Data/train/3_classification'
#            config.val_data_dir='Data/val/3_classification'
#            config.test_data_dir='Data/test/3_classification'
#            config.weight_dir="3_classification_"+str(ts)
#            config.cell_type='LSTM'
#            #config.cell_type='GRU'
#            config.model="BiRNN"
#            #config.model="RNN"
#            config.learning_rate=0.001
#            config.batch_size=128
#            config.batches_per_epoch=5
#            config.max_epochs=30
#            config.dropout=0.8
#            config.hidden_dim=300
#            config.epoch_per_val=5
#            config.stack=i+1
#            config.attention=attention
#            #config.init='norm'
#            config.init='xaiver'
#            config.grad_clip_threshold = 10000
#
#
#            config.weight_dir="3_classification_"+str(config.model)+"_"+str(config.cell_type)+"_"+"stack"+str(config.stack)+"_"+str(ts)
#            if not os.path.exists("./"+config.weight_dir):
#              os.makedirs("./"+config.weight_dir)
#            f=open("./"+config.weight_dir+'/run_file.txt', 'a')
#            sys.stdout = f
#            model = Models(config)
#            loss_val = model.run_model(scope=str(i))

if __name__ == "__main__":
    random.seed(31415)
    print(sys.argv[1])
    if sys.argv[1] == '2_classification':
        run_2classification(stack=1, attention=False)
    elif sys.argv[1] == '3_classification':
        run_3classification(stack=4, attention=True)
    elif sys.argv[1] == 'regression':
        run_regression(stack=1, attention=False)
    elif sys.argv[1]=='stack_regression':
        run_regression(stack=7, attention=True, res=True)
    elif sys.argv[1]=='stack_classification':
        run_2classification(stack=7, attention=True, res=True)
    else:
        print('you must select a task to run')

ModuleNotFoundError: No module named 'util'