# In this Notebook, I'll write the script for training the Order-Planner Model defined in the base referenced paper
-------------------------------------------------------------------------------------------------------------------
link to paper -> https://arxiv.org/abs/1709.00155

-------------------------------------------------------------------------------------------------------------------
# Technology used: Tensorflow

as usual, I'll start with the utility cells:

In [1]:
# packages used for processing: 
import matplotlib.pyplot as plt # for visualization
import numpy as np

# for operating system related stuff
import os
import sys # for memory usage of objects
from subprocess import check_output

# The tensorflow_graph_package for this implementation
from Summary_Generator.Tensorflow_Graph.utils import *
from Summary_Generator.Text_Preprocessing_Helpers.pickling_tools import *

# to plot the images inline
%matplotlib inline

In [2]:
# Input data files are available in the "../Data/" directory.

def exec_command(cmd):
    '''
        function to execute a shell command and see it's 
        output in the python console
        @params
        cmd = the command to be executed along with the arguments
              ex: ['ls', '../input']
    '''
    print(check_output(cmd).decode("utf8"))

In [3]:
# check the structure of the project directory
exec_command(['ls', '..'])

Data
LICENSE
Literature
README.md
Scripts
TensorFlow_implementation



In [4]:
np.random.seed(3) # set this seed for a device independant consistent behaviour

In [5]:
''' Set the constants for the script '''

# various paths of the files
data_path = "../Data" # the data path

data_files_paths = {
    "table_content": os.path.join(data_path, "train.box"),
    "nb_sentences" : os.path.join(data_path, "train.nb"),
    "train_sentences": os.path.join(data_path, "train.sent")
}

base_model_path = "Models"
plug_and_play_data_file = os.path.join(data_path, "plug_and_play.pickle")

# constants for the preprocessing script
train_percentage = 95 

## Unpickle the processed data file and create the train_dev pratitions for it

In [6]:
data = unPickleIt(plug_and_play_data_file)

In [15]:
field_encodings = data['field_encodings']
field_dict = data['field_dict']

content_encodings = data['content_encodings']
content_dict = data['content_dict']

label_encodings = data['label_encodings']
label_dict = data['label_dict']

## create a randomized cell that prints a complete sample to verify the sanity of the processed data

In [25]:
total_samples = len(field_encodings)

random_index = np.random.randint(total_samples)

# extract the three parts of this random sample
random_field_sample = field_encodings[random_index]
content_sample = content_encodings[random_index]
label_sample = label_encodings[random_index]

# print the extracted sample in meaningful format
print("Table Contents: ")
print([(field_dict[field], content_dict[content]) for (field, content) in zip(random_field_sample, content_sample)])

print("\n")
print("Summary: ")
print([label_dict[label] for label in label_sample])

Table Contents: 
[('image', 'jim'), ('image', 'bob'), ('image', 'at'), ('image', 'relentless'), ('image', 'garage.jpg'), ('caption', 'jim'), ('caption', 'bob'), ('caption', 'performing'), ('caption', 'at'), ('caption', 'the'), ('caption', 'garage'), ('caption', ','), ('caption', '2010'), ('name', 'jim'), ('name', 'bob'), ('imagesize', '<none>'), ('background', 'solo_singer'), ('birthdate', '22'), ('birthdate', 'november'), ('birthdate', '1960'), ('origin', 'london'), ('origin', ','), ('origin', 'england'), ('genre', 'punk'), ('genre', 'rock'), ('genre', ','), ('genre', 'acoustic'), ('yearsactive', '1985'), ('yearsactive', '--'), ('label', 'the'), ('label', 'ten'), ('label', 'forty'), ('label', 'sound'), ('label', 'cherry'), ('label', 'red'), ('label', 'emi'), ('label', 'big'), ('label', 'cat'), ('label', 'rough'), ('label', 'trade'), ('label', 'fierce'), ('label', 'panda'), ('associatedacts', 'carter'), ('associatedacts', 'usm'), ('associatedacts', 'jim'), ('associatedacts', "'s"), ('a

run the above cell multiple times to satisfy yourself that the data is still sane.