In [0]:
# Copyright 2019 Google Inc.

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at

#     http://www.apache.org/licenses/LICENSE-2.0

# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

In [0]:
import tensorflow as tf; print(tf.__version__)

1.15.2


In [0]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In [0]:
!pip install bert-tensorflow

Collecting bert-tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/a6/66/7eb4e8b6ea35b7cc54c322c816f976167a43019750279a8473d355800a93/bert_tensorflow-1.0.1-py2.py3-none-any.whl (67kB)
[K     |████▉                           | 10kB 17.0MB/s eta 0:00:01[K     |█████████▊                      | 20kB 4.5MB/s eta 0:00:01[K     |██████████████▋                 | 30kB 6.2MB/s eta 0:00:01[K     |███████████████████▍            | 40kB 7.8MB/s eta 0:00:01[K     |████████████████████████▎       | 51kB 5.3MB/s eta 0:00:01[K     |█████████████████████████████▏  | 61kB 6.1MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 4.2MB/s 
Installing collected packages: bert-tensorflow
Successfully installed bert-tensorflow-1.0.1


In [0]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




In [0]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
# Set the output directory for saving model file
# Optionally, set a GCP bucket location

OUTPUT_DIR = '/content/drive/My Drive/security_test_models'#@param {type:"string"}
#@markdown Whether or not to clear/delete the directory and create a new one
DO_DELETE = True #@param {type:"boolean"}
#@markdown Set USE_BUCKET and BUCKET if you want to (optionally) store model output on GCP bucket.
USE_BUCKET = False #@param {type:"boolean"}
BUCKET = 'BUCKET_NAME' #@param {type:"string"}

if USE_BUCKET:
  OUTPUT_DIR = 'gs://{}/{}'.format(BUCKET, OUTPUT_DIR)
  from google.colab import auth
  auth.authenticate_user()

if DO_DELETE:
  try:
    tf.gfile.DeleteRecursively(OUTPUT_DIR)
  except:
    # Doesn't matter if the directory didn't exist
    pass
tf.gfile.MakeDirs(OUTPUT_DIR)
print('***** Model output directory: {} *****'.format(OUTPUT_DIR))


***** Model output directory: /content/drive/My Drive/security_test_models *****


#Data

In [0]:
from tensorflow import keras
import os
import re

# Load all files from a directory in a DataFrame.
train_text=pd.read_csv('/content/drive/My Drive/train_raw_sentences.csv',dtype='str')
test_text=pd.read_csv('/content/drive/My Drive/test_raw_sentences.csv',dtype='str')
#train.to_csv('/content/drive/My Drive/train_data.csv')
#test.to_csv('/content/drive/My Drive/train_data.csv')

In [0]:
train_text

Unnamed: 0,0
0,"The currently used Rails version, in the stabl..."
1,"'This is a useful security improvement, that I..."
2,We just had an admin accidentally push to a br...
3,'It's possible to check publicly if a private ...
4,I agree that people are using self-signed cert...
...,...
103867,Cross-site scripting (XSS) vulnerability in in...
103868,SQL injection vulnerability in dispatch.php in...
103869,Cisco ASR 1000 devices with software before 3....
103870,Multiple cross-site scripting (XSS) vulnerabil...


In [0]:
train_text.head()

Unnamed: 0,0
0,"The currently used Rails version, in the stabl..."
1,"'This is a useful security improvement, that I..."
2,We just had an admin accidentally push to a br...
3,'It's possible to check publicly if a private ...
4,I agree that people are using self-signed cert...


In [0]:
test_text.head()

Unnamed: 0,0
0,"Vulnerability in the Java SE, Java SE Embedded..."
1,Remove duplicate content issue in doc template...
2,There's a bug in the GitHub importer library t...
3,Add Python 3.6 classifier to the Python packag...
4,The save_submission function in mod/assign/ext...


In [0]:
train_labels=pd.read_csv('/content/drive/My Drive/train_labels.csv')
test_labels=pd.read_csv('/content/drive/My Drive/test_labels.csv')

In [0]:
label_list = [0,1]

In [0]:
print(train_labels.shape)
print(train_text.shape)

(103872, 1)
(103872, 1)


In [0]:
print(test_labels.shape)
print(test_text.shape)

(11543, 1)
(11543, 1)


In [0]:
train_text.columns=['Text']
train_text.head()

Unnamed: 0,Text
0,"The currently used Rails version, in the stabl..."
1,"'This is a useful security improvement, that I..."
2,We just had an admin accidentally push to a br...
3,'It's possible to check publicly if a private ...
4,I agree that people are using self-signed cert...


In [0]:
test_text.columns=['Text']
test_text.head()

Unnamed: 0,Text
0,"Vulnerability in the Java SE, Java SE Embedded..."
1,Remove duplicate content issue in doc template...
2,There's a bug in the GitHub importer library t...
3,Add Python 3.6 classifier to the Python packag...
4,The save_submission function in mod/assign/ext...


In [0]:
train_labels.columns=['Label']
test_labels.columns=['Label']

In [0]:
train_labels['Label'].value_counts()

1    52150
0    51722
Name: Label, dtype: int64

In [0]:
test_labels['Label'].value_counts()

1    5793
0    5750
Name: Label, dtype: int64

In [0]:
train=pd.concat([train_text,train_labels],axis=1)
test=pd.concat([test_text,test_labels],axis=1)

In [0]:
train.head()

Unnamed: 0,Text,Label
0,"The currently used Rails version, in the stabl...",1
1,"'This is a useful security improvement, that I...",1
2,We just had an admin accidentally push to a br...,1
3,'It's possible to check publicly if a private ...,1
4,I agree that people are using self-signed cert...,1


In [0]:
train_label_diff=train_labels['Label'].subtract(train['Label'], fill_value=3)

In [0]:
train_label_diff.value_counts()

0    103872
Name: Label, dtype: int64

In [0]:
test_label_diff=test_labels['Label'].subtract(test['Label'], fill_value=3)
test_label_diff.value_counts()

0    11543
Name: Label, dtype: int64

In [0]:
def create_examples(lines, set_type, labels=None):
    guid = f'{set_type}'
    examples = []
    if guid == 'train':
        for line, label in zip(lines, labels):
            text_a = str(line)
            label = label
            examples.append(
              run_classifier.InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
            #print(examples)
    else:
        for line in lines:
            text_a = str(line)
            label = 0
            examples.append(
              run_classifier.InputExample(guid=guid, text_a=text_a, text_b=None, label=label))
    return examples

train_InputExamples=create_examples(train_text, 'train',labels=train_labels)
test_InputExamples=create_examples(test_text,'test',labels=test_labels)

In [0]:
type(train_text['Text'])

pandas.core.series.Series

In [0]:
# Use the InputExample class from BERT's run_classifier code to create examples from the data
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = str(x['Text']), 
                                                                   text_b = None, 
                                                                   label = int(x['Label'])), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = str(x['Text']), 
                                                                   text_b = None,
                                                                   label = int(x['Label'])), axis = 1)


In [0]:
train_InputExamples

0         <bert.run_classifier.InputExample object at 0x...
1         <bert.run_classifier.InputExample object at 0x...
2         <bert.run_classifier.InputExample object at 0x...
3         <bert.run_classifier.InputExample object at 0x...
4         <bert.run_classifier.InputExample object at 0x...
                                ...                        
103867    <bert.run_classifier.InputExample object at 0x...
103868    <bert.run_classifier.InputExample object at 0x...
103869    <bert.run_classifier.InputExample object at 0x...
103870    <bert.run_classifier.InputExample object at 0x...
103871    <bert.run_classifier.InputExample object at 0x...
Length: 103872, dtype: object

In [0]:
test_InputExamples

0        <bert.run_classifier.InputExample object at 0x...
1        <bert.run_classifier.InputExample object at 0x...
2        <bert.run_classifier.InputExample object at 0x...
3        <bert.run_classifier.InputExample object at 0x...
4        <bert.run_classifier.InputExample object at 0x...
                               ...                        
11538    <bert.run_classifier.InputExample object at 0x...
11539    <bert.run_classifier.InputExample object at 0x...
11540    <bert.run_classifier.InputExample object at 0x...
11541    <bert.run_classifier.InputExample object at 0x...
11542    <bert.run_classifier.InputExample object at 0x...
Length: 11543, dtype: object

In [0]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"
# BERT_MODEL_HUB="https://tfhub.dev/google/bert_uncased_L-24_H-1024_A-16/1"

def create_tokenizer_from_hub_module():
  """Get the vocab file and casing info from the Hub module."""
  with tf.Graph().as_default():
    bert_module = hub.Module(BERT_MODEL_HUB)
    tokenization_info = bert_module(signature="tokenization_info", as_dict=True)

    with tf.Session() as sess:
      vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                            tokenization_info["do_lower_case"]])
      
  return bert.tokenization.FullTokenizer(
      vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [0]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Writing example 0 of 103872


INFO:tensorflow:Writing example 0 of 103872


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the currently used rails version , in the stable branch , is ins ##ecure you should update the gem ##fi ##le . lock to hot ##fi ##x this . http : / / web ##log . ruby ##on ##rail ##s . org / 2014 / 2 / 18 / rails _ 3 _ 2 _ 17 _ 4 _ 0 _ 3 _ and _ 4 _ 1 _ 0 _ beta ##2 _ have _ been _ released / [SEP]


INFO:tensorflow:tokens: [CLS] the currently used rails version , in the stable branch , is ins ##ecure you should update the gem ##fi ##le . lock to hot ##fi ##x this . http : / / web ##log . ruby ##on ##rail ##s . org / 2014 / 2 / 18 / rails _ 3 _ 2 _ 17 _ 4 _ 0 _ 3 _ and _ 4 _ 1 _ 0 _ beta ##2 _ have _ been _ released / [SEP]


INFO:tensorflow:input_ids: 101 1996 2747 2109 15168 2544 1010 1999 1996 6540 3589 1010 2003 16021 29150 2017 2323 10651 1996 17070 8873 2571 1012 5843 2000 2980 8873 2595 2023 1012 8299 1024 1013 1013 4773 21197 1012 10090 2239 15118 2015 1012 8917 1013 2297 1013 1016 1013 2324 1013 15168 1035 1017 1035 1016 1035 2459 1035 1018 1035 1014 1035 1017 1035 1998 1035 1018 1035 1015 1035 1014 1035 8247 2475 1035 2031 1035 2042 1035 2207 1013 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1996 2747 2109 15168 2544 1010 1999 1996 6540 3589 1010 2003 16021 29150 2017 2323 10651 1996 17070 8873 2571 1012 5843 2000 2980 8873 2595 2023 1012 8299 1024 1013 1013 4773 21197 1012 10090 2239 15118 2015 1012 8917 1013 2297 1013 1016 1013 2324 1013 15168 1035 1017 1035 1016 1035 2459 1035 1018 1035 1014 1035 1017 1035 1998 1035 1018 1035 1015 1035 1014 1035 8247 2475 1035 2031 1035 2042 1035 2207 1013 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] ' this is a useful security improvement , that i recommend gets integrated into gi ##tl ##ab . it protects users , in the event that their password ##s get stolen from other sites , etc . i found a good gem for this : http : / / ruby ##do ##c . info / gi ##th ##ub / md ##p / rot ##p / master / frames , however , given that it appears gi ##tl ##ab uses devi ##se for au ##th , we should probably use this plug ##in : https : / / gi ##th ##ub . com / w ##ml ##ele / devi ##se - ot ##p i intend to submit a merge request for this , so i ' ll [SEP]


INFO:tensorflow:tokens: [CLS] ' this is a useful security improvement , that i recommend gets integrated into gi ##tl ##ab . it protects users , in the event that their password ##s get stolen from other sites , etc . i found a good gem for this : http : / / ruby ##do ##c . info / gi ##th ##ub / md ##p / rot ##p / master / frames , however , given that it appears gi ##tl ##ab uses devi ##se for au ##th , we should probably use this plug ##in : https : / / gi ##th ##ub . com / w ##ml ##ele / devi ##se - ot ##p i intend to submit a merge request for this , so i ' ll [SEP]


INFO:tensorflow:input_ids: 101 1005 2023 2003 1037 6179 3036 7620 1010 2008 1045 16755 4152 6377 2046 21025 19646 7875 1012 2009 18227 5198 1010 1999 1996 2724 2008 2037 20786 2015 2131 7376 2013 2060 4573 1010 4385 1012 1045 2179 1037 2204 17070 2005 2023 1024 8299 1024 1013 1013 10090 3527 2278 1012 18558 1013 21025 2705 12083 1013 9108 2361 1013 18672 2361 1013 3040 1013 11048 1010 2174 1010 2445 2008 2009 3544 21025 19646 7875 3594 14386 3366 2005 8740 2705 1010 2057 2323 2763 2224 2023 13354 2378 1024 16770 1024 1013 1013 21025 2705 12083 1012 4012 1013 1059 19968 12260 1013 14386 3366 1011 27178 2361 1045 13566 2000 12040 1037 13590 5227 2005 2023 1010 2061 1045 1005 2222 102


INFO:tensorflow:input_ids: 101 1005 2023 2003 1037 6179 3036 7620 1010 2008 1045 16755 4152 6377 2046 21025 19646 7875 1012 2009 18227 5198 1010 1999 1996 2724 2008 2037 20786 2015 2131 7376 2013 2060 4573 1010 4385 1012 1045 2179 1037 2204 17070 2005 2023 1024 8299 1024 1013 1013 10090 3527 2278 1012 18558 1013 21025 2705 12083 1013 9108 2361 1013 18672 2361 1013 3040 1013 11048 1010 2174 1010 2445 2008 2009 3544 21025 19646 7875 3594 14386 3366 2005 8740 2705 1010 2057 2323 2763 2224 2023 13354 2378 1024 16770 1024 1013 1013 21025 2705 12083 1012 4012 1013 1059 19968 12260 1013 14386 3366 1011 27178 2361 1045 13566 2000 12040 1037 13590 5227 2005 2023 1010 2061 1045 1005 2222 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] we just had an ad ##min accidentally push to a branch that is protected and they are listed as a developer in . if they are not explicitly listed as an owner / master , an ad ##min should not be able to push to a protected branch . if they need to , an administrator could easily add themselves to any project . this prevents accidental commits to locked branches . [SEP]


INFO:tensorflow:tokens: [CLS] we just had an ad ##min accidentally push to a branch that is protected and they are listed as a developer in . if they are not explicitly listed as an owner / master , an ad ##min should not be able to push to a protected branch . if they need to , an administrator could easily add themselves to any project . this prevents accidental commits to locked branches . [SEP]


INFO:tensorflow:input_ids: 101 2057 2074 2018 2019 4748 10020 9554 5245 2000 1037 3589 2008 2003 5123 1998 2027 2024 3205 2004 1037 9722 1999 1012 2065 2027 2024 2025 12045 3205 2004 2019 3954 1013 3040 1010 2019 4748 10020 2323 2025 2022 2583 2000 5245 2000 1037 5123 3589 1012 2065 2027 2342 2000 1010 2019 8911 2071 4089 5587 3209 2000 2151 2622 1012 2023 16263 17128 27791 2000 5299 5628 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2057 2074 2018 2019 4748 10020 9554 5245 2000 1037 3589 2008 2003 5123 1998 2027 2024 3205 2004 1037 9722 1999 1012 2065 2027 2024 2025 12045 3205 2004 2019 3954 1013 3040 1010 2019 4748 10020 2323 2025 2022 2583 2000 5245 2000 1037 5123 3589 1012 2065 2027 2342 2000 1010 2019 8911 2071 4089 5587 3209 2000 2151 2622 1012 2023 16263 17128 27791 2000 5299 5628 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] ' it ' s possible to check publicly if a private repository with a certain name exists . # # # steps to reproduce - try to push to existing / non - existing repository via http / https # # # # initial ##ize repository ` ` ` # initial ##ize repository gi ##t in ##it test cd test touch foo gi ##t add foo gi ##t commit - m " add foo " ` ` ` # # # # push to existing repository ` ` ` # push via https gi ##t push https : / / gi ##tl ##ab . com / gi ##tl ##ab - org / gi ##tl ##ab - ce . gi ##t master user ##name for ' https [SEP]


INFO:tensorflow:tokens: [CLS] ' it ' s possible to check publicly if a private repository with a certain name exists . # # # steps to reproduce - try to push to existing / non - existing repository via http / https # # # # initial ##ize repository ` ` ` # initial ##ize repository gi ##t in ##it test cd test touch foo gi ##t add foo gi ##t commit - m " add foo " ` ` ` # # # # push to existing repository ` ` ` # push via https gi ##t push https : / / gi ##tl ##ab . com / gi ##tl ##ab - org / gi ##tl ##ab - ce . gi ##t master user ##name for ' https [SEP]


INFO:tensorflow:input_ids: 101 1005 2009 1005 1055 2825 2000 4638 7271 2065 1037 2797 22409 2007 1037 3056 2171 6526 1012 1001 1001 1001 4084 2000 21376 1011 3046 2000 5245 2000 4493 1013 2512 1011 4493 22409 3081 8299 1013 16770 1001 1001 1001 1001 3988 4697 22409 1036 1036 1036 1001 3988 4697 22409 21025 2102 1999 4183 3231 3729 3231 3543 29379 21025 2102 5587 29379 21025 2102 10797 1011 1049 1000 5587 29379 1000 1036 1036 1036 1001 1001 1001 1001 5245 2000 4493 22409 1036 1036 1036 1001 5245 3081 16770 21025 2102 5245 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 8917 1013 21025 19646 7875 1011 8292 1012 21025 2102 3040 5310 18442 2005 1005 16770 102


INFO:tensorflow:input_ids: 101 1005 2009 1005 1055 2825 2000 4638 7271 2065 1037 2797 22409 2007 1037 3056 2171 6526 1012 1001 1001 1001 4084 2000 21376 1011 3046 2000 5245 2000 4493 1013 2512 1011 4493 22409 3081 8299 1013 16770 1001 1001 1001 1001 3988 4697 22409 1036 1036 1036 1001 3988 4697 22409 21025 2102 1999 4183 3231 3729 3231 3543 29379 21025 2102 5587 29379 21025 2102 10797 1011 1049 1000 5587 29379 1000 1036 1036 1036 1001 1001 1001 1001 5245 2000 4493 22409 1036 1036 1036 1001 5245 3081 16770 21025 2102 5245 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 8917 1013 21025 19646 7875 1011 8292 1012 21025 2102 3040 5310 18442 2005 1005 16770 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] i agree that people are using self - signed certificates a lot , but for real organizations , having ss ##l verification on web - hooks turned off isn ' t good . maybe this should be something that is con ##fi ##gur ##able ? https : / / gi ##tl ##ab . com / gi ##tl ##ab - org / gi ##tl ##ab - ce / b ##lo ##b / master / app / models / web _ hook . rb # l ##34 [SEP]


INFO:tensorflow:tokens: [CLS] i agree that people are using self - signed certificates a lot , but for real organizations , having ss ##l verification on web - hooks turned off isn ' t good . maybe this should be something that is con ##fi ##gur ##able ? https : / / gi ##tl ##ab . com / gi ##tl ##ab - org / gi ##tl ##ab - ce / b ##lo ##b / master / app / models / web _ hook . rb # l ##34 [SEP]


INFO:tensorflow:input_ids: 101 1045 5993 2008 2111 2024 2478 2969 1011 2772 17987 1037 2843 1010 2021 2005 2613 4411 1010 2383 7020 2140 22616 2006 4773 1011 18008 2357 2125 3475 1005 1056 2204 1012 2672 2023 2323 2022 2242 2008 2003 9530 8873 27390 3085 1029 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 8917 1013 21025 19646 7875 1011 8292 1013 1038 4135 2497 1013 3040 1013 10439 1013 4275 1013 4773 1035 8103 1012 21144 1001 1048 22022 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1045 5993 2008 2111 2024 2478 2969 1011 2772 17987 1037 2843 1010 2021 2005 2613 4411 1010 2383 7020 2140 22616 2006 4773 1011 18008 2357 2125 3475 1005 1056 2204 1012 2672 2023 2323 2022 2242 2008 2003 9530 8873 27390 3085 1029 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 8917 1013 21025 19646 7875 1011 8292 1013 1038 4135 2497 1013 3040 1013 10439 1013 4275 1013 4773 1035 8103 1012 21144 1001 1048 22022 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 10000 of 103872


INFO:tensorflow:Writing example 10000 of 103872


INFO:tensorflow:Writing example 20000 of 103872


INFO:tensorflow:Writing example 20000 of 103872


INFO:tensorflow:Writing example 30000 of 103872


INFO:tensorflow:Writing example 30000 of 103872


INFO:tensorflow:Writing example 40000 of 103872


INFO:tensorflow:Writing example 40000 of 103872


INFO:tensorflow:Writing example 50000 of 103872


INFO:tensorflow:Writing example 50000 of 103872


INFO:tensorflow:Writing example 60000 of 103872


INFO:tensorflow:Writing example 60000 of 103872


INFO:tensorflow:Writing example 70000 of 103872


INFO:tensorflow:Writing example 70000 of 103872


INFO:tensorflow:Writing example 80000 of 103872


INFO:tensorflow:Writing example 80000 of 103872


INFO:tensorflow:Writing example 90000 of 103872


INFO:tensorflow:Writing example 90000 of 103872


INFO:tensorflow:Writing example 100000 of 103872


INFO:tensorflow:Writing example 100000 of 103872


INFO:tensorflow:Writing example 0 of 11543


INFO:tensorflow:Writing example 0 of 11543


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] vulnerability in the java se , java se embedded component of oracle java se ( sub ##com ##pone ##nt : libraries ) . the supported version that is affected is java se : 8 ##u ##13 ##1 ; java se embedded : 8 ##u ##13 ##1 . easily exploit ##able vulnerability allows una ##uth ##ent ##icated attacker with network access via multiple protocols to compromise java se , java se embedded . successful attacks require human interaction from a person other than the attacker and while the vulnerability is in java se , java se embedded , attacks may significantly impact additional products . successful attacks of this vulnerability can result in takeover of java se , java se embedded . note : this vulnerability applies [SEP]


INFO:tensorflow:tokens: [CLS] vulnerability in the java se , java se embedded component of oracle java se ( sub ##com ##pone ##nt : libraries ) . the supported version that is affected is java se : 8 ##u ##13 ##1 ; java se embedded : 8 ##u ##13 ##1 . easily exploit ##able vulnerability allows una ##uth ##ent ##icated attacker with network access via multiple protocols to compromise java se , java se embedded . successful attacks require human interaction from a person other than the attacker and while the vulnerability is in java se , java se embedded , attacks may significantly impact additional products . successful attacks of this vulnerability can result in takeover of java se , java se embedded . note : this vulnerability applies [SEP]


INFO:tensorflow:input_ids: 101 18130 1999 1996 9262 7367 1010 9262 7367 11157 6922 1997 14721 9262 7367 1006 4942 9006 29513 3372 1024 8860 1007 1012 1996 3569 2544 2008 2003 5360 2003 9262 7367 1024 1022 2226 17134 2487 1025 9262 7367 11157 1024 1022 2226 17134 2487 1012 4089 18077 3085 18130 4473 14477 14317 4765 17872 17346 2007 2897 3229 3081 3674 16744 2000 12014 9262 7367 1010 9262 7367 11157 1012 3144 4491 5478 2529 8290 2013 1037 2711 2060 2084 1996 17346 1998 2096 1996 18130 2003 1999 9262 7367 1010 9262 7367 11157 1010 4491 2089 6022 4254 3176 3688 1012 3144 4491 1997 2023 18130 2064 2765 1999 15336 1997 9262 7367 1010 9262 7367 11157 1012 3602 1024 2023 18130 12033 102


INFO:tensorflow:input_ids: 101 18130 1999 1996 9262 7367 1010 9262 7367 11157 6922 1997 14721 9262 7367 1006 4942 9006 29513 3372 1024 8860 1007 1012 1996 3569 2544 2008 2003 5360 2003 9262 7367 1024 1022 2226 17134 2487 1025 9262 7367 11157 1024 1022 2226 17134 2487 1012 4089 18077 3085 18130 4473 14477 14317 4765 17872 17346 2007 2897 3229 3081 3674 16744 2000 12014 9262 7367 1010 9262 7367 11157 1012 3144 4491 5478 2529 8290 2013 1037 2711 2060 2084 1996 17346 1998 2096 1996 18130 2003 1999 9262 7367 1010 9262 7367 11157 1010 4491 2089 6022 4254 3176 3688 1012 3144 4491 1997 2023 18130 2064 2765 1999 15336 1997 9262 7367 1010 9262 7367 11157 1012 3602 1024 2023 18130 12033 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] remove duplicate content issue in doc template thanks to mark ##g for finding a conflict with zone sub ##na ##v line - up pr . [SEP]


INFO:tensorflow:tokens: [CLS] remove duplicate content issue in doc template thanks to mark ##g for finding a conflict with zone sub ##na ##v line - up pr . [SEP]


INFO:tensorflow:input_ids: 101 6366 24473 4180 3277 1999 9986 23561 4283 2000 2928 2290 2005 4531 1037 4736 2007 4224 4942 2532 2615 2240 1011 2039 10975 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 6366 24473 4180 3277 1999 9986 23561 4283 2000 2928 2290 2005 4531 1037 4736 2007 4224 4942 2532 2615 2240 1011 2039 10975 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] there ' s a bug in the gi ##th ##ub import ##er library that results in labels being applied to merge requests belonging to other projects and users , often private projects . as seen here : https : / / gi ##tl ##ab . com / gi ##tl ##ab - com / infrastructure / issues / 105 ##6 and here : https : / / gi ##tl ##ab . com / gi ##tl ##ab - com / support - forum / issues / 151 ##2 this is likely the cause of the mysterious " yo ! " spa ##m label appearing everywhere . gi ##th ##ub install ##s labels at the pull request level and not the project level and considers pull requests a type [SEP]


INFO:tensorflow:tokens: [CLS] there ' s a bug in the gi ##th ##ub import ##er library that results in labels being applied to merge requests belonging to other projects and users , often private projects . as seen here : https : / / gi ##tl ##ab . com / gi ##tl ##ab - com / infrastructure / issues / 105 ##6 and here : https : / / gi ##tl ##ab . com / gi ##tl ##ab - com / support - forum / issues / 151 ##2 this is likely the cause of the mysterious " yo ! " spa ##m label appearing everywhere . gi ##th ##ub install ##s labels at the pull request level and not the project level and considers pull requests a type [SEP]


INFO:tensorflow:input_ids: 101 2045 1005 1055 1037 11829 1999 1996 21025 2705 12083 12324 2121 3075 2008 3463 1999 10873 2108 4162 2000 13590 11186 7495 2000 2060 3934 1998 5198 1010 2411 2797 3934 1012 2004 2464 2182 1024 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 4012 1013 6502 1013 3314 1013 8746 2575 1998 2182 1024 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 4012 1013 2490 1011 7057 1013 3314 1013 16528 2475 2023 2003 3497 1996 3426 1997 1996 8075 1000 10930 999 1000 12403 2213 3830 6037 7249 1012 21025 2705 12083 16500 2015 10873 2012 1996 4139 5227 2504 1998 2025 1996 2622 2504 1998 10592 4139 11186 1037 2828 102


INFO:tensorflow:input_ids: 101 2045 1005 1055 1037 11829 1999 1996 21025 2705 12083 12324 2121 3075 2008 3463 1999 10873 2108 4162 2000 13590 11186 7495 2000 2060 3934 1998 5198 1010 2411 2797 3934 1012 2004 2464 2182 1024 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 4012 1013 6502 1013 3314 1013 8746 2575 1998 2182 1024 16770 1024 1013 1013 21025 19646 7875 1012 4012 1013 21025 19646 7875 1011 4012 1013 2490 1011 7057 1013 3314 1013 16528 2475 2023 2003 3497 1996 3426 1997 1996 8075 1000 10930 999 1000 12403 2213 3830 6037 7249 1012 21025 2705 12083 16500 2015 10873 2012 1996 4139 5227 2504 1998 2025 1996 2622 2504 1998 10592 4139 11186 1037 2828 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] add python 3 . 6 class ##ifier to the python package . no description provided . [SEP]


INFO:tensorflow:tokens: [CLS] add python 3 . 6 class ##ifier to the python package . no description provided . [SEP]


INFO:tensorflow:input_ids: 101 5587 18750 1017 1012 1020 2465 18095 2000 1996 18750 7427 1012 2053 6412 3024 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 5587 18750 1017 1012 1020 2465 18095 2000 1996 18750 7427 1012 2053 6412 3024 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] the save _ submission function in mod / assign / external ##lib . php in mood ##le through 2 . 6 . 11 , 2 . 7 . x before 2 . 7 . 13 , 2 . 8 . x before 2 . 8 . 11 , 2 . 9 . x before 2 . 9 . 5 , and 3 . 0 . x before 3 . 0 . 3 allows remote authentic ##ated users to bypass intended due - date restrictions by lever ##aging the student role for a web - service request . [SEP]


INFO:tensorflow:tokens: [CLS] the save _ submission function in mod / assign / external ##lib . php in mood ##le through 2 . 6 . 11 , 2 . 7 . x before 2 . 7 . 13 , 2 . 8 . x before 2 . 8 . 11 , 2 . 9 . x before 2 . 9 . 5 , and 3 . 0 . x before 3 . 0 . 3 allows remote authentic ##ated users to bypass intended due - date restrictions by lever ##aging the student role for a web - service request . [SEP]


INFO:tensorflow:input_ids: 101 1996 3828 1035 12339 3853 1999 16913 1013 23911 1013 6327 29521 1012 25718 1999 6888 2571 2083 1016 1012 1020 1012 2340 1010 1016 1012 1021 1012 1060 2077 1016 1012 1021 1012 2410 1010 1016 1012 1022 1012 1060 2077 1016 1012 1022 1012 2340 1010 1016 1012 1023 1012 1060 2077 1016 1012 1023 1012 1019 1010 1998 1017 1012 1014 1012 1060 2077 1017 1012 1014 1012 1017 4473 6556 14469 4383 5198 2000 11826 3832 2349 1011 3058 9259 2011 15929 16594 1996 3076 2535 2005 1037 4773 1011 2326 5227 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1996 3828 1035 12339 3853 1999 16913 1013 23911 1013 6327 29521 1012 25718 1999 6888 2571 2083 1016 1012 1020 1012 2340 1010 1016 1012 1021 1012 1060 2077 1016 1012 1021 1012 2410 1010 1016 1012 1022 1012 1060 2077 1016 1012 1022 1012 2340 1010 1016 1012 1023 1012 1060 2077 1016 1012 1023 1012 1019 1010 1998 1017 1012 1014 1012 1060 2077 1017 1012 1014 1012 1017 4473 6556 14469 4383 5198 2000 11826 3832 2349 1011 3058 9259 2011 15929 16594 1996 3076 2535 2005 1037 4773 1011 2326 5227 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:Writing example 10000 of 11543


INFO:tensorflow:Writing example 10000 of 11543


#Creating a model

In [0]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  """Creates a classification model."""

  bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
  bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
  bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

  # Use "pooled_output" for classification tasks on an entire sentence.
  # Use "sequence_outputs" for token-level output.
  output_layer = bert_outputs["pooled_output"]

  hidden_size = output_layer.shape[-1].value

  # Create our own layer to tune for politeness data.
  output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

  output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

  with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
    output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

    logits = tf.matmul(output_layer, output_weights, transpose_b=True)
    logits = tf.nn.bias_add(logits, output_bias)
    log_probs = tf.nn.log_softmax(logits, axis=-1)

    # Convert labels into one-hot encoding
    one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

    predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
    # If we're predicting, we want predicted labels and the probabiltiies.
    if is_predicting:
      return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)


Next we'll wrap our model function in a `model_fn_builder` function that adapts our model to work for training, evaluation, and prediction.

In [0]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
  """Returns `model_fn` closure for TPUEstimator."""
  def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
    """The `model_fn` for TPUEstimator."""

    input_ids = features["input_ids"]
    input_mask = features["input_mask"]
    segment_ids = features["segment_ids"]
    label_ids = features["label_ids"]

    is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)
    
    # TRAIN and EVAL
    if not is_predicting:

      (loss, predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      train_op = bert.optimization.create_optimizer(
          loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

      # Calculate evaluation metrics. 
      def metric_fn(label_ids, predicted_labels):
        accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
        f1_score = tf.contrib.metrics.f1_score(
            label_ids,
            predicted_labels)
        auc = tf.metrics.auc(
            label_ids,
            predicted_labels)
        recall = tf.metrics.recall(
            label_ids,
            predicted_labels)
        precision = tf.metrics.precision(
            label_ids,
            predicted_labels) 
        true_pos = tf.metrics.true_positives(
            label_ids,
            predicted_labels)
        true_neg = tf.metrics.true_negatives(
            label_ids,
            predicted_labels)   
        false_pos = tf.metrics.false_positives(
            label_ids,
            predicted_labels)  
        false_neg = tf.metrics.false_negatives(
            label_ids,
            predicted_labels)
        return {
            "eval_accuracy": accuracy,
            "f1_score": f1_score,
            "auc": auc,
            "precision": precision,
            "recall": recall,
            "true_positives": true_pos,
            "true_negatives": true_neg,
            "false_positives": false_pos,
            "false_negatives": false_neg
        }

      eval_metrics = metric_fn(label_ids, predicted_labels)

      if mode == tf.estimator.ModeKeys.TRAIN:
        return tf.estimator.EstimatorSpec(mode=mode,
          loss=loss,
          train_op=train_op)
      else:
          return tf.estimator.EstimatorSpec(mode=mode,
            loss=loss,
            eval_metric_ops=eval_metrics)
    else:
      (predicted_labels, log_probs) = create_model(
        is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

      predictions = {
          'probabilities': log_probs,
          'labels': predicted_labels
      }
      return tf.estimator.EstimatorSpec(mode, predictions=predictions)

  # Return the actual model function in the closure
  return model_fn


In [0]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [0]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [0]:
OUTPUT_DIR='/content/drive/My Drive/security_test_output'

In [0]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [0]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})


INFO:tensorflow:Using config: {'_model_dir': '/content/drive/My Drive/security_test_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff3ec0726d8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': '/content/drive/My Drive/security_test_output', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7ff3ec0726d8>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [0]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [0]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
INFO:tensorflow:Skipping training since max_steps has already saved.


INFO:tensorflow:Skipping training since max_steps has already saved.


Training took time  0:00:00.017784


Now let's use our test data to see how well our model did:

In [0]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [0]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2020-03-27T09:12:23Z


INFO:tensorflow:Starting evaluation at 2020-03-27T09:12:23Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /content/drive/My Drive/security_test_output/model.ckpt-9738


INFO:tensorflow:Restoring parameters from /content/drive/My Drive/security_test_output/model.ckpt-9738


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2020-03-27-09:13:27


INFO:tensorflow:Finished evaluation at 2020-03-27-09:13:27


INFO:tensorflow:Saving dict for global step 9738: auc = 0.5, eval_accuracy = 0.5018626, f1_score = 0.66832024, false_negatives = 0.0, false_positives = 5750.0, global_step = 9738, loss = 4.6356206, precision = 0.5018626, recall = 1.0, true_negatives = 0.0, true_positives = 5793.0


INFO:tensorflow:Saving dict for global step 9738: auc = 0.5, eval_accuracy = 0.5018626, f1_score = 0.66832024, false_negatives = 0.0, false_positives = 5750.0, global_step = 9738, loss = 4.6356206, precision = 0.5018626, recall = 1.0, true_negatives = 0.0, true_positives = 5793.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 9738: /content/drive/My Drive/security_test_output/model.ckpt-9738


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 9738: /content/drive/My Drive/security_test_output/model.ckpt-9738


{'auc': 0.5,
 'eval_accuracy': 0.5018626,
 'f1_score': 0.66832024,
 'false_negatives': 0.0,
 'false_positives': 5750.0,
 'global_step': 9738,
 'loss': 4.6356206,
 'precision': 0.5018626,
 'recall': 1.0,
 'true_negatives': 0.0,
 'true_positives': 5793.0}

In [0]:
predict_input_fn = run_classifier.input_fn_builder(\
    features=test_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
predictions = estimator.predict(predict_input_fn)

preds=[]
for prediction in predictions:
  preds.append(np.argmax(prediction['probabilities']))

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from /content/drive/My Drive/security_test_output/model.ckpt-9738


INFO:tensorflow:Restoring parameters from /content/drive/My Drive/security_test_output/model.ckpt-9738


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


In [0]:
from sklearn.metrics import accuracy_score,classification_report
print("Accuracy of BERT is:",accuracy_score(test['Label'],preds))

Accuracy of BERT is: 0.5018626007103872


In [0]:
print(classification_report(test['Label'],preds))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      5750
           1       0.50      1.00      0.67      5793

    accuracy                           0.50     11543
   macro avg       0.25      0.50      0.33     11543
weighted avg       0.25      0.50      0.34     11543



  _warn_prf(average, modifier, msg_start, len(result))
