In [1]:
import numpy as np 
import pandas as pd 
import re
from zemberek import TurkishMorphology
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
import tensorflow as tf
import time
from tensorflow.python.layers.core import Dense
from tensorflow.python.ops.rnn_cell_impl import _zero_state_tensors
print('TensorFlow Version: {}'.format(tf.__version__))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Burak\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


TensorFlow Version: 2.17.0


In [2]:
df = pd.read_csv('../dataset/train.csv', on_bad_lines='skip', engine='python')
df = df.drop(['id'],axis=1)

In [3]:
for i in range(3): #Looking at the first 3 samples
    print("Review #",i+1)
    print('Original Text : ')   
    print(df.article[i])
   
    print('\n\nSummary Text : ')
    print(df.highlights[i])
    
    print('===========================================================================================================\n\n')
     

Review # 1
Original Text : 
By . Associated Press . PUBLISHED: . 14:11 EST, 25 October 2013 . | . UPDATED: . 15:36 EST, 25 October 2013 . The bishop of the Fargo Catholic Diocese in North Dakota has exposed potentially hundreds of church members in Fargo, Grand Forks and Jamestown to the hepatitis A virus in late September and early October. The state Health Department has issued an advisory of exposure for anyone who attended five churches and took communion. Bishop John Folda (pictured) of the Fargo Catholic Diocese in North Dakota has exposed potentially hundreds of church members in Fargo, Grand Forks and Jamestown to the hepatitis A . State Immunization Program Manager Molly Howell says the risk is low, but officials feel it's important to alert people to the possible exposure. The diocese announced on Monday that Bishop John Folda is taking time off after being diagnosed with hepatitis A. The diocese says he contracted the infection through contaminated food while attending a con

In [4]:
'''predefined_stopwords = ['acaba', 'ama', 'aslında', 'az', 'bazı', 'belki', 'biri', 'birkaç', 'birşey', 'biz', 'bu',
                                                    'çok', 'çünkü', 'da', 'daha', 'de', 'defa', 'diye', 'eğer', 'en', 'gibi', 'hem', 'hep',
                                                    'hepsi', 'her', 'hiç', 'için', 'ile', 'ise', 'kez', 'ki', 'kim', 'mı', 'mu', 'mü', 'nasıl',
                                                    'ne', 'neden', 'nerde', 'nerede', 'nereye', 'niçin', 'niye', 'o', 'sanki', 'şey', 'siz',
                                                    'şu', 'tüm', 've', 'veya', 'ya', 'yani']
                                                    '''

"predefined_stopwords = ['acaba', 'ama', 'aslında', 'az', 'bazı', 'belki', 'biri', 'birkaç', 'birşey', 'biz', 'bu',\n                                                    'çok', 'çünkü', 'da', 'daha', 'de', 'defa', 'diye', 'eğer', 'en', 'gibi', 'hem', 'hep',\n                                                    'hepsi', 'her', 'hiç', 'için', 'ile', 'ise', 'kez', 'ki', 'kim', 'mı', 'mu', 'mü', 'nasıl',\n                                                    'ne', 'neden', 'nerde', 'nerede', 'nereye', 'niçin', 'niye', 'o', 'sanki', 'şey', 'siz',\n                                                    'şu', 'tüm', 've', 'veya', 'ya', 'yani']\n                                                    "

In [5]:
def clean_text(text):
    #Removing unwanted stopwords and characters to create fewer nulls word embeddings

    #Convert words to lower case
    text = text.lower()

    # Format words and remove unwanted characters
    text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'\\', ' ', text)
    text = re.sub(r'\'', ' ', text)
    text = re.sub(r'[^\w\s]','', text) 

    text = text.split()
    stops = set(stopwords.words('turkish'))
    text = [word for word in text if not word in stops]
    text = " ".join(text)

    return text

In [6]:
clean_summary = []
for s in df['highlights']:
    clean_summary.append(clean_text(s))

print("Summaries are complete!")

clean_article = []
for a in df['article']:
    clean_article.append(clean_text(a))

print('Articles are complete!')


Summaries are complete!
Articles are complete!


In [7]:
for i in range(5):
    print("Clean Review #",i+1)
    print(clean_summary[i])
    print(clean_article[i])
    print()

Clean Review # 1
bishop john folda of north dakota is taking time off after being diagnosed he contracted the infection through contaminated food in italy church members in fargo grand forks and jamestown could have been exposed
by associated press published 1411 est 25 october 2013 updated 1536 est 25 october 2013 the bishop of the fargo catholic diocese in north dakota has exposed potentially hundreds of church members in fargo grand forks and jamestown to the hepatitis a virus in late september and early october the state health department has issued an advisory of exposure for anyone who attended five churches and took communion bishop john folda pictured of the fargo catholic diocese in north dakota has exposed potentially hundreds of church members in fargo grand forks and jamestown to the hepatitis a state immunization program manager molly howell says the risk is low but officials feel it s important to alert people to the possible exposure the diocese announced on monday that 

In [8]:
def count_words(count_dict, text):
    '''Count the number of occurrences of each word in a set of text'''
    for sentence in text:
        for word in sentence.split():
            if word not in count_dict:
                count_dict[word] = 1
            else:
                count_dict[word] += 1

In [9]:
# Find the number of times each word was used and the size of the vocabulary
word_counts = {}
count_words(word_counts, clean_summary)
count_words(word_counts, clean_article)          
print("Size of Vocabulary:", len(word_counts))

Size of Vocabulary: 730965


In [10]:
# Load Conceptnet Numberbatch's (CN) embeddings, similar to GloVe, but probably better 
# (https://github.com/commonsense/conceptnet-numberbatch)
embeddings_index = {}
with open('../numberbatch-en.txt', encoding='utf-8') as f:
    for line in f:
        values = line.split(' ')
        word = values[0]
        embedding = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = embedding

print('Word embeddings:', len(embeddings_index))

Word embeddings: 516783


In [11]:
# Find the number of words that are missing from CN, and are used more than our threshold.
missing_words = 0
threshold = 20

for word, count in word_counts.items():
    if count > threshold:
        if word not in embeddings_index:
            missing_words += 1
            
missing_ratio = round(missing_words/len(word_counts),4)*100
            
print("Number of words missing from CN:", missing_words)
print("Percent of words that are missing from vocabulary: {}%".format(missing_ratio))

Number of words missing from CN: 33927
Percent of words that are missing from vocabulary: 4.64%


In [12]:
# Limit the vocab that we will use to words that appear ≥ threshold or are in GloVe

#dictionary to convert words to integers
vocab_to_int = {} 

value = 0
for word, count in word_counts.items():
    if count >= threshold or word in embeddings_index:
        vocab_to_int[word] = value
        value += 1

# Special tokens that will be added to our vocab
codes = ["","","",""]   

# Add codes to vocab
for code in codes:
    vocab_to_int[code] = len(vocab_to_int)

# Dictionary to convert integers to words
int_to_vocab = {}
for word, value in vocab_to_int.items():
    int_to_vocab[value] = word

usage_ratio = round(len(vocab_to_int) / len(word_counts),4)*100

print("Total number of unique words:", len(word_counts))
print("Number of words we will use:", len(vocab_to_int))
print("Percent of words we will use: {}%".format(usage_ratio))

Total number of unique words: 730965
Number of words we will use: 168639
Percent of words we will use: 23.07%


In [13]:
# Need to use 300 for embedding dimensions to match CN's vectors.
embedding_dim = 300
nb_words = max(vocab_to_int.values()) + 1 

# Create matrix with default values of zero
word_embedding_matrix = np.zeros((nb_words, embedding_dim), dtype=np.float32)
for word, i in vocab_to_int.items():
    if word in embeddings_index:
        word_embedding_matrix[i] = embeddings_index[word]
    else:
        # If word not in CN, create a random embedding for it
        new_embedding = np.array(np.random.uniform(-1.0, 1.0, embedding_dim))
        embeddings_index[word] = new_embedding
        word_embedding_matrix = np.zeros((nb_words, embedding_dim), dtype=np.float32)

# Check if value matches len(vocab_to_int)
print(len(word_embedding_matrix))

168640


In [14]:
def convert_to_ints(text, word_count, unk_count, eos=False):
    '''Convert words in text to an integer.
       If word is not in vocab_to_int, use UNK's integer.
       Total the number of words and UNKs.
       Add EOS token to the end of texts'''
    ints = []
    for sentence in text:
        sentence_ints = []
        for word in sentence.split():
            word_count += 1
            if word in vocab_to_int:
                sentence_ints.append(vocab_to_int[word])
            else:
                sentence_ints.append(vocab_to_int[""])
                unk_count += 1
        if eos:
            sentence_ints.append(vocab_to_int[""])
        ints.append(sentence_ints)
    return ints, word_count, unk_count

In [15]:
# Apply convert_to_ints to clean_summaries and clean_texts
word_count = 0
unk_count = 0

int_summaries, word_count, unk_count = convert_to_ints(clean_summary, word_count, unk_count)
int_texts, word_count, unk_count = convert_to_ints(clean_article, word_count, unk_count, eos=True)

unk_percent = round(unk_count/word_count,4)*100

print("Total number of words in headlines:", word_count)
print("Total number of UNKs in headlines:", unk_count)
print("Percent of words that are UNK: {}%".format(unk_percent))

Total number of words in headlines: 209646129
Total number of UNKs in headlines: 1569488
Percent of words that are UNK: 0.75%


In [16]:
def create_lengths(text):
    '''Create a data frame of the sentence lengths from a text'''
    lengths = []
    for sentence in text:
        lengths.append(len(sentence))
    return pd.DataFrame(lengths, columns=['counts'])

In [17]:
lengths_summaries = create_lengths(int_summaries)
lengths_texts = create_lengths(int_texts)

print("Summaries:")
print(lengths_summaries.describe())
print()
print("Texts:")
print(lengths_texts.describe())

Summaries:
              counts
count  287113.000000
mean       48.288120
std        20.298531
min         3.000000
25%        35.000000
50%        45.000000
75%        56.000000
max      1226.000000

Texts:
              counts
count  287113.000000
mean      682.898702
std       331.170675
min         9.000000
25%       437.000000
50%       623.000000
75%       866.000000
max      2171.000000


In [18]:
print(np.percentile(lengths_texts.counts, 90))
print(np.percentile(lengths_texts.counts, 95))
print(np.percentile(lengths_texts.counts, 99))

1150.0
1344.0
1647.0


In [19]:
# Inspect the length of summaries
print(np.percentile(lengths_summaries.counts, 90))
print(np.percentile(lengths_summaries.counts, 95))
print(np.percentile(lengths_summaries.counts, 99))

72.0
85.0
113.0


In [20]:
def unk_counter(sentence):
    '''Counts the number of time UNK appears in a sentence.'''
    count_unk = 0
    for i in sentence:
      if i == vocab_to_int['']:
        count_unk += 1
    return count_unk

In [21]:
sorted_summaries = []
sorted_texts = []
max_text_length = 959
max_summary_length = 116
min_length = 8
unk_text_limit = 1
unk_summary_limit = 0

for length in range(min(lengths_texts.counts), max_text_length): 
    for count, words in enumerate(int_summaries):
        if (len(int_summaries[count]) >= min_length and
            len(int_summaries[count]) <= max_summary_length and
            len(int_texts[count]) >= min_length and
            unk_counter(int_summaries[count]) <= unk_summary_limit and
        
            length == len(int_texts[count])
           ):
            sorted_summaries.append(int_summaries[count])
            sorted_texts.append(int_texts[count])
        
# Compare lengths to ensure they match
print(len(sorted_summaries))
print(len(sorted_texts))

170520
170520


In [22]:
def model_inputs():
    '''Create palceholders for inputs to the model'''
    
    input_data = tf.compat.v1.placeholder(tf.int32, [None, None], name='input')
    targets = tf.compat.v1.placeholder(tf.int32, [None, None], name='targets')
    lr = tf.compat.v1.placeholder(tf.float32, name='learning_rate')
    keep_prob = tf.compat.v1.placeholder(tf.float32, name='keep_prob')
    summary_length = tf.compat.v1.placeholder(tf.int32, (None,), name='summary_length')
    max_summary_length = tf.compat.v1.reduce_max(summary_length, name='max_dec_len')
    text_length = tf.compat.v1.placeholder(tf.int32, (None,), name='text_length')

    return input_data, targets, lr, keep_prob, summary_length, max_summary_length, text_length
     

In [23]:
def process_encoding_input(target_data, vocab_to_int, batch_size):
    '''Remove the last word id from each batch and concat the  to the begining of each batch'''
    
    ending = tf.strided_slice(target_data, [0, 0], [batch_size, -1], [1, 1])
    dec_input = tf.concat([tf.fill([batch_size, 1], vocab_to_int['']), ending], 1)

    return dec_input

In [24]:
@tf.function
def encoding_layer(rnn_size, sequence_length, num_layers, rnn_inputs, keep_prob):
    '''Create the encoding layer'''
    
    enc_cells = []

    for layer in range(num_layers):
        with tf.compat.v1.variable_scope('encoder_{}'.format(layer)):
            cell_fw = tf.keras.layers.LSTM(rnn_size,
                                           return_sequences=True,
                                           return_state=True,
                                           dropout=1 - keep_prob)
            cell_bw = tf.keras.layers.LSTM(rnn_size,
                                           return_sequences=True,
                                           return_state=True,
                                           dropout=1 - keep_prob)

            enc_cells.append((cell_fw, cell_bw))

    enc_outputs = []
    enc_states = []
    for cell_fw, cell_bw in enc_cells:
        output, state_fw, state_bw = tf.keras.layers.Bidirectional(cell_fw)(rnn_inputs)
        enc_outputs.append(output)
        enc_states.append((state_fw, state_bw))

    enc_output = tf.concat(enc_outputs, axis=-1)
    enc_state = tf.concat(enc_states, axis=0)

    return enc_output, enc_state


In [25]:
@tf.function
def training_decoding_layer(dec_embed_input, summary_length, dec_cell, initial_state, output_layer, 
                            vocab_size, max_summary_length):
    '''Create the training logits'''
    
    training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=dec_embed_input,
                                                        sequence_length=summary_length,
                                                        time_major=False)

    training_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell,
                                                       training_helper,
                                                       initial_state,
                                                       output_layer) 

    training_logits, _ , _ = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                                           output_time_major=False,
                                                           impute_finished=True,
                                                           maximum_iterations=max_summary_length)
    return training_decoder


In [26]:
@tf.function
def inference_decoding_layer(embeddings, start_token, end_token, dec_cell, initial_state, output_layer,
                             max_summary_length, batch_size):
    '''Create the inference logits'''
    
    start_tokens = tf.tile(tf.constant([start_token], dtype=tf.int32), [batch_size], name='start_tokens')
    
    inference_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings,
                                                                start_tokens,
                                                                end_token)
                
    inference_decoder = tf.contrib.seq2seq.BasicDecoder(dec_cell,
                                                        inference_helper,
                                                        initial_state,
                                                        output_layer)
                
    inference_logits, _ , _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
                                                            output_time_major=False,
                                                            impute_finished=True,
                                                            maximum_iterations=max_summary_length)
    
    return inference_decoder


In [27]:
@tf.function
def decoding_layer(dec_embed_input, embeddings, enc_output, enc_state, vocab_size, text_length, summary_length, 
                   max_summary_length, rnn_size, vocab_to_int, keep_prob, batch_size, num_layers):
    '''Create the decoding cell and attention for the training and inference decoding layers'''
    
    for layer in range(num_layers):
        with tf.compat.v1.variable_scope('decoder_{}'.format(layer)):
            lstm = tf.keras.layers.LSTM(rnn_size,
                                        return_sequences=True,
                                        return_state=True,
                                        dropout=1 - keep_prob)
            dec_cell = tf.keras.layers.Dropout(lstm, input_keep_prob=keep_prob)
    
    output_layer = Dense(vocab_size,
                         kernel_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.1))
    
    attn_mech = tf.keras.layers.Attention()  # BahdanauAttention

    dec_cell = tf.keras.layers.AttentionWrapper(dec_cell, attn_mech, rnn_size)
            
    initial_state = dec_cell.zero_state(batch_size=batch_size, dtype=tf.float32).clone(cell_state=enc_state[0])

    with tf.compat.v1.variable_scope("decode"):
        training_decoder = training_decoding_layer(dec_embed_input, 
                                                  summary_length, 
                                                  dec_cell, 
                                                  initial_state,
                                                  output_layer,
                                                  vocab_size, 
                                                  max_summary_length)
        
        training_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                  output_time_major=False,
                                  impute_finished=True,
                                  maximum_iterations=max_summary_length)

    with tf.compat.v1.variable_scope("decode", reuse=True):
        inference_decoder = inference_decoding_layer(embeddings,  
                                                    vocab_to_int[''], 
                                                    vocab_to_int[''],
                                                    dec_cell, 
                                                    initial_state, 
                                                    output_layer,
                                                    max_summary_length,
                                                    batch_size)
        
        inference_logits, _, _ = tf.contrib.seq2seq.dynamic_decode(inference_decoder,
                                  output_time_major=False,
                                  impute_finished=True,
                                  maximum_iterations=max_summary_length)

    return training_logits, inference_logits


In [28]:
def seq2seq_model(input_data, target_data, keep_prob, text_length, summary_length, max_summary_length, 
                  vocab_size, rnn_size, num_layers, vocab_to_int, batch_size):
    '''Use the previous functions to create the training and inference logits'''
    
    # Use Numberbatch's embeddings and the newly created ones as our embeddings
    embeddings = word_embedding_matrix
    
    enc_embed_input = tf.nn.embedding_lookup(embeddings, input_data)
    enc_output, enc_state = encoding_layer(rnn_size, text_length, num_layers, enc_embed_input, keep_prob)
    
    dec_input = process_encoding_input(target_data, vocab_to_int, batch_size)
    dec_embed_input = tf.nn.embedding_lookup(embeddings, dec_input)
    
    training_logits, inference_logits  = decoding_layer(dec_embed_input, 
                                                        embeddings,
                                                        enc_output,
                                                        enc_state, 
                                                        vocab_size, 
                                                        text_length, 
                                                        summary_length, 
                                                        max_summary_length,
                                                        rnn_size, 
                                                        vocab_to_int, 
                                                        keep_prob, 
                                                        batch_size,
                                                        num_layers)
    
    return training_logits, inference_logits
     


In [29]:
def pad_sentence_batch(sentence_batch):
    """Pad sentences with  so that each sentence of a batch has the same length"""
    max_sentence = max([len(sentence) for sentence in sentence_batch])
    return [sentence + [vocab_to_int['']] * (max_sentence - len(sentence)) for sentence in sentence_batch]

In [30]:
def get_batches(summaries, texts, batch_size):
    """Batch summaries, texts, and the lengths of their sentences together"""
    for batch_i in range(0, len(texts)//batch_size):
        start_i = batch_i * batch_size
        summaries_batch = summaries[start_i:start_i + batch_size]
        texts_batch = texts[start_i:start_i + batch_size]
        pad_summaries_batch = np.array(pad_sentence_batch(summaries_batch))
        pad_texts_batch = np.array(pad_sentence_batch(texts_batch))
        
        # Need the lengths for the _lengths parameters
        pad_summaries_lengths = []
        for summary in pad_summaries_batch:
            pad_summaries_lengths.append(len(summary))
        
        pad_texts_lengths = []
        for text in pad_texts_batch:
            pad_texts_lengths.append(len(text))
        
        yield pad_summaries_batch, pad_texts_batch, pad_summaries_lengths, pad_texts_lengths

In [31]:
# Set the Hyperparameters
epochs = 10 # 100
batch_size = 4
rnn_size = 256
num_layers = 2
learning_rate = 0.005
keep_probability = 0.75

In [32]:
# Build the graph
train_graph = tf.Graph()
# Set the graph to default to ensure that it is ready for training
with train_graph.as_default():
    
    # Load the model inputs    
    input_data, targets, lr, keep_prob, summary_length, max_summary_length, text_length = model_inputs()

    # Create the training and inference logits
    training_logits, inference_logits = seq2seq_model(tf.reverse(input_data, [-1]),
                                                      targets, 
                                                      keep_prob,   
                                                      text_length,
                                                      summary_length,
                                                      max_summary_length,
                                                      len(vocab_to_int)+1,
                                                      rnn_size, 
                                                      num_layers, 
                                                      vocab_to_int,
                                                      batch_size)
    
    # Create tensors for the training logits and inference logits
    training_logits = tf.identity(training_logits.rnn_output, 'logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')
    
    # Create the weights for sequence_loss
    masks = tf.sequence_mask(summary_length, max_summary_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        # Loss function
        cost = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)(targets, training_logits)

        # Optimizer
        optimizer = tf.keras.optimizers.Adam(learning_rate)

        # Gradient Clipping
        gradients = optimizer.get_gradients(cost, training_logits)
        capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in zip(gradients, train_graph.variables) if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

print("Graph is built.")

Msg: From C:\Users\Burak\AppData\Local\Temp\ipykernel_8664\611900757.py:4: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.




OperatorNotAllowedInGraphError: in user code:

    File "C:\Users\Burak\AppData\Local\Temp\ipykernel_8664\265531533.py", line 5, in build_graph  *
        training_logits, inference_logits = seq2seq_model(
    File "C:\Users\Burak\AppData\Local\Temp\ipykernel_8664\1271019391.py", line 9, in seq2seq_model  *
        enc_output, enc_state = encoding_layer(rnn_size, text_length, num_layers, enc_embed_input, keep_prob)
    File "C:\Users\Burak\AppData\Local\Temp\ipykernel_8664\1646290235.py", line 9, in encoding_layer  *
        cell_fw = tf.keras.layers.LSTM(rnn_size,
    File "c:\Users\Burak\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\layers\rnn\lstm.py", line 463, in __init__  **
        cell = LSTMCell(
    File "c:\Users\Burak\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\layers\rnn\lstm.py", line 133, in __init__
        self.dropout = min(1.0, max(0.0, dropout))

    OperatorNotAllowedInGraphError: Using a symbolic `tf.Tensor` as a Python `bool` is not allowed. You can attempt the following resolutions to the problem: If you are running in Graph mode, use Eager execution mode or decorate this function with @tf.function. If you are using AutoGraph, you can try decorating this function with @tf.function. If that does not work, then you may be using an unsupported feature or your source code may not be visible to AutoGraph. See https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/autograph/g3doc/reference/limitations.md#access-to-source-code for more information.


In [None]:
# Subset the data for training
start = 2000
end = start + 3000
sorted_summaries_short = sorted_summaries[start:end]
sorted_texts_short = sorted_texts[start:end]
print("The shortest text length:", len(sorted_texts_short[0]))
print("The longest text length:",len(sorted_texts_short[-1]))

In [None]:
# Train the Model
learning_rate_decay = 0.95
min_learning_rate = 0.0005
display_step = 20 # Check training loss after every 20 batches
stop_early = 0 
stop = 6 #3 # If the update loss does not decrease in 3 consecutive update checks, stop training
per_epoch = 3 # Make 3 update checks per epoch
update_check = (len(sorted_texts_short)//batch_size//per_epoch)-1

update_loss = 0 
batch_loss = 0
summary_update_loss = [] # Record the update losses for saving improvements in the model

  
tf.reset_default_graph()
checkpoint = "best_model.ckpt"  #300k sentence
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    
    # If we want to continue training a previous session
    # loader = tf.train.import_meta_graph(checkpoint + '.meta')
    # loader.restore(sess, checkpoint)
    #sess.run(tf.local_variables_initializer())
    for epoch_i in range(1, epochs+1):
        update_loss = 0
        batch_loss = 0
        for batch_i, (summaries_batch, texts_batch, summaries_lengths, texts_lengths) in enumerate(
                get_batches(sorted_summaries_short, sorted_texts_short, batch_size)):
            start_time = time.time()
            _, loss = sess.run(
                [train_op, cost],
                {input_data: texts_batch,
                 targets: summaries_batch,
                 lr: learning_rate,
                 summary_length: summaries_lengths,
                 text_length: texts_lengths,
                 keep_prob: keep_probability})

            batch_loss += loss
            update_loss += loss
            end_time = time.time()
            batch_time = end_time - start_time

            if batch_i % display_step == 0 and batch_i > 0:
                print('Epoch {:>3}/{} Batch {:>4}/{} - Loss: {:>6.3f}, Seconds: {:>4.2f}'
                      .format(epoch_i,
                              epochs, 
                              batch_i, 
                              len(sorted_texts_short) // batch_size, 
                              batch_loss / display_step, 
                              batch_time*display_step))
                batch_loss = 0
                
                #saver = tf.train.Saver() 
                #saver.save(sess, checkpoint)
                
            if batch_i % update_check == 0 and batch_i > 0:
                print("Average loss for this update:", round(update_loss/update_check,3))
                summary_update_loss.append(update_loss)
                
              
                  
                # If the update loss is at a new minimum, save the model
                if update_loss <= min(summary_update_loss):
                    print('New Record!') 
                    stop_early = 0
                    saver = tf.train.Saver() 
                    saver.save(sess, checkpoint)

                else:
                    print("No Improvement.")
                    stop_early += 1
                    if stop_early == stop:
                        break
                update_loss = 0
            
                    
        # Reduce learning rate, but not below its minimum value
        learning_rate *= learning_rate_decay
        if learning_rate < min_learning_rate:
            learning_rate = min_learning_rate
        
        if stop_early == stop:
            print("Stopping Training.")
            break
