# Tensorflow with Sentiment Analysis

In [3]:
#Importing necessary librarys
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import pandas as pd

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

# Loading Datasets with URLs and associated phish labels

In [4]:
train_data = pd.read_csv("Training_Urls.csv")
test_data = pd.read_csv("Testing_Urls.csv")
                         
print('Number of Samples in training: ', len(train_data))
print('Number of Samples in testing: ', len(test_data))
print('Training data looks like:\n')

# Sample
train_data.head(5)

Number of Samples in training:  7240
Number of Samples in testing:  1820
Training data looks like:



Unnamed: 0,URL,Result
0,http://usps-parcel.duvrahomeimprovement.com/pa...,0
1,http://www.firefixo.tk/pdf/home/download/fe501...,0
2,http://louisck.net/,1
3,http://www.vic.cat,1
4,http://www.ycdc.gov.mm/,1


# One Example URL and its Classification [0 = Phishing, 1 = Legitimate]

In [5]:
print("Review:\n\n",train_data.iloc[4].URL)
print("\n\nPolarity:",train_data.iloc[4].Result)

Review:

 http://www.ycdc.gov.mm/


Polarity: 1


# Setting Up the Inputs for Estimator 

In [6]:
#Training input
train_input_fn = tf.estimator.inputs.pandas_input_fn(x=train_data, 
                                                     y=train_data["Result"], 
                                                     num_epochs=None,
                                                     shuffle=True)

#Predicting (train) input
predict_train_input_fn = tf.estimator.inputs.pandas_input_fn(x=train_data,
                                                             y=train_data["Result"],
                                                             shuffle=False)

#Predicting (test) input
predict_test_input_fn = tf.estimator.inputs.pandas_input_fn(x=test_data, 
                                                            y=test_data["Result"], 
                                                            shuffle=False)

# Setting Up the Google to Vector module to be Applied on the Feature Column

In [7]:
embedded_text_feature_column = hub.text_embedding_column(key="URL", 
                                                         module_spec="https://tfhub.dev/google/nnlm-en-dim128/1",
                                                         trainable=True)

INFO:tensorflow:Using /tmp/tfhub_modules to cache modules.


# Setting Up the Estimator

In [8]:
estimator = tf.estimator.DNNClassifier(hidden_units=[1000, 128],
                                       feature_columns=[embedded_text_feature_column],
                                       n_classes=2,
                                       activation_fn = tf.nn.relu,
                                       optimizer=tf.train.AdagradOptimizer(learning_rate=0.001))

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_is_chief': True, '_save_checkpoints_steps': None, '_task_id': 0, '_service': None, '_session_config': None, '_global_id_in_cluster': 0, '_evaluation_master': '', '_save_checkpoints_secs': 600, '_task_type': 'worker', '_save_summary_steps': 100, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f6d19960550>, '_tf_random_seed': None, '_num_worker_replicas': 1, '_num_ps_replicas': 0, '_master': '', '_model_dir': '/tmp/tmpbu943ekz', '_log_step_count_steps': 100}


# Training the DNN

In [9]:
estimator.train(input_fn=train_input_fn, steps=1000)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Initialize variable dnn/input_from_feature_columns/input_layer/URL_hub_module_embedding/module/embeddings/part_0:0 from checkpoint b'/tmp/tfhub_modules/32f2b2259e1cc8ca58c876921748361283e73997/variables/variables' with embeddings
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Saving checkpoints for 1 into /tmp/tmpbu943ekz/model.ckpt.
INFO:tensorflow:step = 1, loss = 88.78236
INFO:tensorflow:global_step/sec: 133.393
INFO:tensorflow:step = 101, loss = 88.398285 (0.752 sec)
INFO:tensorflow:global_step/sec: 149.783
INFO:tensorflow:step = 201, loss = 88.1299 (0.668 sec)
INFO:tensorflow:global_step/sec: 156.739
INFO:tensorflow:step = 301, loss = 87.95284 (0.638 sec)
INFO:tensorflow:global_step/sec: 161.637
INFO:tensorflow:step = 401, loss = 87.19273 (0.618 sec)
INFO:tensorf

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f6d19973fd0>

# Evaluating the Model 

In [10]:
tf.logging.set_verbosity(tf.logging.ERROR)

train_eval_result = estimator.evaluate(input_fn=predict_train_input_fn)
test_eval_result = estimator.evaluate(input_fn=predict_test_input_fn)

print ("Training set accuracy: {accuracy}".format(**train_eval_result))
print ("Test set accuracy: {accuracy}".format(**test_eval_result))

Training set accuracy: 0.6743093729019165
Test set accuracy: 0.5076923370361328
