 # Fitting the neural network 

In [1]:
#Imports
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
def prepare_data(df):
    "Rerformates data so it is appropriate for Tensorflow DNNC"
    x = df.drop(['lang'], axis=1)
    x.columns = ['trigram_'+str(col) for col in list(range(len(x.columns)))]
    y = df['lang']
    y = y.map({"eng": 0, "deu": 1, "spa": 2, "fra": 3, "por": 4, "ita": 5})
    return (x,y)

In [3]:
def get_data(feat_type):
    "Gets the training, valid and test data bases for a specific feature type"
    train = pd.read_csv("ANN_features/train_{}.csv".format(feat_type),index_col=0)
    valid = pd.read_csv("ANN_features/valid_{}.csv".format(feat_type),index_col=0)
    
    train_red = train[0:50000] #Reduce number of records for testing purposes 
    valid_red = valid[0:5000]
    (train_x,train_y) = prepare_data(train_red)
    (valid_x,valid_y) = prepare_data(valid_red)
    return (train_x,train_y), (valid_x,valid_y)

In [4]:
(train_x,train_y), (valid_x,valid_y) = get_data('50')
print(len(train_x),len(valid_x))
train_x.head()

50000 5000


Unnamed: 0,trigram_0,trigram_1,trigram_2,trigram_3,trigram_4,trigram_5,trigram_6,trigram_7,trigram_8,trigram_9,...,trigram_187,trigram_188,trigram_189,trigram_190,trigram_191,trigram_192,trigram_193,trigram_194,trigram_195,trigram_196
0,0,0,0,0,0,0,0,0,0,0,...,0,1,1,0,0,1,0,0,2,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
#Input functions 
def train_input_fn(features, labels, batch_size =100):
    """An input function for training"""
    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
    # Shuffle, repeat, and batch the examples.
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    # Return the dataset.
    return dataset

def eval_input_fn(features, labels, batch_size=100):
    """An input function for evaluation or prediction"""
    features=dict(features)
    if labels is None:
        # No labels, use only features.
        inputs = features
    else:
        inputs = (features, labels)

    # Convert the inputs to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices(inputs)

    # Batch the examples
    assert batch_size is not None, "batch_size must not be None"
    dataset = dataset.batch(batch_size)

    # Return the dataset.
    return dataset


 #TensorFlow (2016) An Example of a DNNClassifier for the Iris dataset. [Source code]. WWW.tensorflow.org
    

In [6]:
# Feature columns describe how to use the input.
my_feature_columns = []
for key in train_x.keys():
     my_feature_columns.append(tf.feature_column.numeric_column(key=key))

In [7]:
"Fits a DNNC with the desired features and stores validation results "
# Build a DNN.
classifier = tf.estimator.DNNClassifier(
feature_columns=my_feature_columns,
# Two hidden layers of 196 nodes each.
hidden_units=[100],
# 6 languages.
n_classes=6)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/var/folders/s2/82vv6ll16mn27w7gcdbccp3m0000gn/T/tmpqog4kjkt', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1264b5f98>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [8]:
# Train the Model.
classifier.train(input_fn=lambda:train_input_fn(train_x, train_y),steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 0 into /var/folders/s2/82vv6ll16mn27w7gcdbccp3m0000gn/T/tmpqog4kjkt/model.ckpt.
INFO:tensorflow:loss = 187.12834, step = 1
INFO:tensorflow:global_step/sec: 50.2262
INFO:tensorflow:loss = 29.067337, step = 101 (1.994 sec)
INFO:tensorflow:global_step/sec: 68.0759
INFO:tensorflow:loss = 19.616302, step = 201 (1.466 sec)
INFO:tensorflow:global_step/sec: 70.2691
INFO:tensorflow:loss = 15.339317, step = 301 (1.423 sec)
INFO:tensorflow:global_step/sec: 73.2619
INFO:tensorflow:loss = 18.76421, step = 401 (1.365 sec)
INFO:tensorflow:global_step/sec: 72.9322
INFO:tensorflow:loss = 16.237034, step = 501 (1.371 sec)
INFO:tensorflow:global_step/sec: 73.3312
INFO:tensorflow:loss = 16.860235, step = 601 (1.364 sec)
INFO:tensorflo

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1264b5898>