In [1]:
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")

In [2]:
# !pip install sentence_transformers
# !pip install googlesearch-python

In [4]:
%%writefile "C:\Users\Asus\Desktop\Master\IR\New folder\project1\ques_ans_pipeline.py"

# Edited version of Pipeline.py from https://github.com/patil-suraj/question_generation.git
import itertools
import logging
from typing import Optional, Dict, Union

from nltk import sent_tokenize

import torch
from transformers import(
    AutoModelForSeq2SeqLM, 
    AutoTokenizer,
    PreTrainedModel,
    PreTrainedTokenizer,
)

logger = logging.getLogger(__name__)

class QGPipeline:
    """Poor man's QG pipeline"""
    def __init__(
        self,
        model: PreTrainedModel,
        tokenizer: PreTrainedTokenizer,
        ans_model: PreTrainedModel,
        ans_tokenizer: PreTrainedTokenizer,
        qg_format: str,
        use_cuda: bool
    ):
        self.model = model
        self.tokenizer = tokenizer

        self.ans_model = ans_model
        self.ans_tokenizer = ans_tokenizer

        self.qg_format = qg_format

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)

        if self.ans_model is not self.model:
            self.ans_model.to(self.device)

        assert self.model.__class__.__name__ in ["T5ForConditionalGeneration", "BartForConditionalGeneration"]
        
        if "T5ForConditionalGeneration" in self.model.__class__.__name__:
            self.model_type = "t5"
        else:
            self.model_type = "bart"

    def __call__(self, inputs: str):
        inputs = " ".join(inputs.split())
        sents, answers = self._extract_answers(inputs)
        flat_answers = list(itertools.chain(*answers))
        
        if len(flat_answers) == 0:
          return []

        if self.qg_format == "prepend":
            qg_examples = self._prepare_inputs_for_qg_from_answers_prepend(inputs, answers)
        else:
            qg_examples = self._prepare_inputs_for_qg_from_answers_hl(sents, answers)
        
        qg_inputs = [example['source_text'] for example in qg_examples]
        questions = self._generate_questions(qg_inputs)
        output = [{'answer': example['answer'], 'question': que} for example, que in zip(qg_examples, questions)]
        return output
    
    def _generate_questions(self, inputs):
        inputs = self._tokenize(inputs, padding=True, truncation=True)
        
        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device), 
            attention_mask=inputs['attention_mask'].to(self.device), 
            max_length=32,
            num_beams=4,
        )
        
        questions = [self.tokenizer.decode(ids, skip_special_tokens=True) for ids in outs]
        return questions
    
    def _extract_answers(self, context):
        sents, inputs = self._prepare_inputs_for_ans_extraction(context)
        inputs = self._tokenize(inputs, padding=True, truncation=True)

        outs = self.ans_model.generate(
            input_ids=inputs['input_ids'].to(self.device), 
            attention_mask=inputs['attention_mask'].to(self.device), 
            max_length=32,
        )
        
        dec = [self.ans_tokenizer.decode(ids, skip_special_tokens=False) for ids in outs]
        answers = [item.split('<sep>') for item in dec]
        answers = [i[:-1] for i in answers]
        
        return sents, answers
    
    def _tokenize(self,
        inputs,
        padding=True,
        truncation=True,
        add_special_tokens=True,
        max_length=512
    ):
        inputs = self.tokenizer.batch_encode_plus(
            inputs, 
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs
    
    def _prepare_inputs_for_ans_extraction(self, text):
        sents = sent_tokenize(text)

        inputs = []
        for i in range(len(sents)):
            source_text = "extract answers:"
            for j, sent in enumerate(sents):
                if i == j:
                    sent = "<hl> %s <hl>" % sent
                source_text = "%s %s" % (source_text, sent)
                source_text = source_text.strip()
            
            if self.model_type == "t5":
                source_text = source_text + " </s>"
            inputs.append(source_text)

        return sents, inputs
    
    def _prepare_inputs_for_qg_from_answers_hl(self, sents, answers):
        # print(sents)
        # print(answers)
        # print("hello")
        inputs = []
        for i, answer in enumerate(answers):
            if len(answer) == 0: continue
            # print(i)
            # print(answers)
            for answer_text in answer:
                # print(answer_text)
                sent = sents[i]
                # print(answer_text)
                # print(sent)
                # print(sents)
                sents_copy = sents[:]
                
                answer_text = answer_text.strip()
                # print(answer_text)
                if '<pad>' in answer_text:
                    answer_text = answer_text[6:]
                # answer_text = answer_text.split()[0]
                # print(answer_text)
                # print(sent)
                # # print(sent.index("a photo"))
                #
                # print(sent.index(answer_text))
                try:
                    ans_start_idx = sent.index(answer_text)
                except:
                    continue
                
                sent = f"{sent[:ans_start_idx]} <hl> {answer_text} <hl> {sent[ans_start_idx + len(answer_text): ]}"
                sents_copy[i] = sent
                
                source_text = " ".join(sents_copy)
                source_text = f"generate question: {source_text}" 
                if self.model_type == "t5":
                    source_text = source_text + " </s>"
                
                inputs.append({"answer": answer_text, "source_text": source_text})
        
        return inputs
    
    def _prepare_inputs_for_qg_from_answers_prepend(self, context, answers):
        flat_answers = list(itertools.chain(*answers))
        examples = []
        for answer in flat_answers:
            source_text = f"answer: {answer} context: {context}"
            if self.model_type == "t5":
                source_text = source_text + " </s>"
            
            examples.append({"answer": answer, "source_text": source_text})
        return examples

    
class MultiTaskQAQGPipeline(QGPipeline):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    
    def __call__(self, inputs: Union[Dict, str]):
        if type(inputs) is str:
            # do qg
            return super().__call__(inputs)
        else:
            # do qa
            return self._extract_answer(inputs["question"], inputs["context"])
    
    def _prepare_inputs_for_qa(self, question, context):
        source_text = f"question: {question}  context: {context}"
        if self.model_type == "t5":
            source_text = source_text + " </s>"
        return  source_text
    
    def _extract_answer(self, question, context):
        source_text = self._prepare_inputs_for_qa(question, context)
        inputs = self._tokenize([source_text], padding=False)
    
        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device), 
            attention_mask=inputs['attention_mask'].to(self.device), 
            max_length=16,
        )

        answer = self.tokenizer.decode(outs[0], skip_special_tokens=True)
        return answer


class E2EQGPipeline:
    def __init__(
        self,
        model: PreTrainedModel,
        tokenizer: PreTrainedTokenizer,
        use_cuda: bool
    ) :

        self.model = model
        self.tokenizer = tokenizer

        self.device = "cuda" if torch.cuda.is_available() and use_cuda else "cpu"
        self.model.to(self.device)

        assert self.model.__class__.__name__ in ["T5ForConditionalGeneration", "BartForConditionalGeneration"]
        
        if "T5ForConditionalGeneration" in self.model.__class__.__name__:
            self.model_type = "t5"
        else:
            self.model_type = "bart"
        
        self.default_generate_kwargs = {
            "max_length": 256,
            "num_beams": 4,
            "length_penalty": 1.5,
            "no_repeat_ngram_size": 3,
            "early_stopping": True,
        }
    
    def __call__(self, context: str, **generate_kwargs):
        inputs = self._prepare_inputs_for_e2e_qg(context)

        # TODO: when overrding default_generate_kwargs all other arguments need to be passsed
        # find a better way to do this
        if not generate_kwargs:
            generate_kwargs = self.default_generate_kwargs
        
        input_length = inputs["input_ids"].shape[-1]
        
        # max_length = generate_kwargs.get("max_length", 256)
        # if input_length < max_length:
        #     logger.warning(
        #         "Your max_length is set to {}, but you input_length is only {}. You might consider decreasing max_length manually, e.g. summarizer('...', max_length=50)".format(
        #             max_length, input_length
        #         )
        #     )

        outs = self.model.generate(
            input_ids=inputs['input_ids'].to(self.device), 
            attention_mask=inputs['attention_mask'].to(self.device),
            **generate_kwargs
        )

        prediction = self.tokenizer.decode(outs[0], skip_special_tokens=True)
        questions = prediction.split("<sep>")
        questions = [question.strip() for question in questions[:-1]]
        return questions
    
    def _prepare_inputs_for_e2e_qg(self, context):
        source_text = f"generate questions: {context}"
        if self.model_type == "t5":
            source_text = source_text + " </s>"
        
        inputs = self._tokenize([source_text], padding=False)
        return inputs
    
    def _tokenize(
        self,
        inputs,
        padding=True,
        truncation=True,
        add_special_tokens=True,
        max_length=512
    ):
        inputs = self.tokenizer.batch_encode_plus(
            inputs, 
            max_length=max_length,
            add_special_tokens=add_special_tokens,
            truncation=truncation,
            padding="max_length" if padding else False,
            pad_to_max_length=padding,
            return_tensors="pt"
        )
        return inputs


SUPPORTED_TASKS = {
    "question-generation": {
        "impl": QGPipeline,
        "default": {
            "model": "valhalla/t5-small-qg-hl",
            "ans_model": "valhalla/t5-small-qa-qg-hl",
        }
    },
    "multitask-qa-qg": {
        "impl": MultiTaskQAQGPipeline,
        "default": {
            "model": "valhalla/t5-small-qa-qg-hl",
        }
    },
    "e2e-qg": {
        "impl": E2EQGPipeline,
        "default": {
            "model": "valhalla/t5-small-e2e-qg",
        }
    }
}

def pipeline(
    task: str,
    model: Optional = None,
    tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    qg_format: Optional[str] = "highlight",
    ans_model: Optional = None,
    ans_tokenizer: Optional[Union[str, PreTrainedTokenizer]] = None,
    use_cuda: Optional[bool] = True,
    **kwargs,
):
    # Retrieve the task
    if task not in SUPPORTED_TASKS:
        raise KeyError("Unknown task {}, available tasks are {}".format(task, list(SUPPORTED_TASKS.keys())))

    targeted_task = SUPPORTED_TASKS[task]
    task_class = targeted_task["impl"]

    # Use default model/config/tokenizer for the task if no model is provided
    if model is None:
        model = targeted_task["default"]["model"]
    
    # Try to infer tokenizer from model or config name (if provided as str)
    if tokenizer is None:
        if isinstance(model, str):
            tokenizer = model
        else:
            # Impossible to guest what is the right tokenizer here
            raise Exception(
                "Impossible to guess which tokenizer to use. "
                "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
            )
    
    # Instantiate tokenizer if needed
    if isinstance(tokenizer, (str, tuple)):
        if isinstance(tokenizer, tuple):
            # For tuple we have (tokenizer name, {kwargs})
            tokenizer = AutoTokenizer.from_pretrained(tokenizer[0], **tokenizer[1])
        else:
            tokenizer = AutoTokenizer.from_pretrained(tokenizer)
    
    # Instantiate model if needed
    if isinstance(model, str):
        model = AutoModelForSeq2SeqLM.from_pretrained(model)
    
    if task == "question-generation":
        if ans_model is None:
            # load default ans model
            ans_model = targeted_task["default"]["ans_model"]
            ans_tokenizer = AutoTokenizer.from_pretrained(ans_model)
            ans_model = AutoModelForSeq2SeqLM.from_pretrained(ans_model)
        else:
            # Try to infer tokenizer from model or config name (if provided as str)
            if ans_tokenizer is None:
                if isinstance(ans_model, str):
                    ans_tokenizer = ans_model
                else:
                    # Impossible to guest what is the right tokenizer here
                    raise Exception(
                        "Impossible to guess which tokenizer to use. "
                        "Please provided a PretrainedTokenizer class or a path/identifier to a pretrained tokenizer."
                    )
            
            # Instantiate tokenizer if needed
            if isinstance(ans_tokenizer, (str, tuple)):
                if isinstance(ans_tokenizer, tuple):
                    # For tuple we have (tokenizer name, {kwargs})
                    ans_tokenizer = AutoTokenizer.from_pretrained(ans_tokenizer[0], **ans_tokenizer[1])
                else:
                    ans_tokenizer = AutoTokenizer.from_pretrained(ans_tokenizer)

            if isinstance(ans_model, str):
                ans_model = AutoModelForSeq2SeqLM.from_pretrained(ans_model)
    
    if task == "e2e-qg":
        return task_class(model=model, tokenizer=tokenizer, use_cuda=use_cuda)
    elif task == "question-generation":
        return task_class(model=model, tokenizer=tokenizer, ans_model=ans_model, ans_tokenizer=ans_tokenizer, qg_format=qg_format, use_cuda=use_cuda)
    else:
        return task_class(model=model, tokenizer=tokenizer, ans_model=model, ans_tokenizer=tokenizer, qg_format=qg_format, use_cuda=use_cuda)

Writing /kaggle/working/ques_ans_pipeline.py


In [5]:
%%writefile /kaggle/working/metrics.py
import torch
from sentence_transformers import SentenceTransformer
from nltk.translate.bleu_score import sentence_bleu

def sbertSimilarity(input_ans, majority_ans):
    
    sentence_transformer_model = SentenceTransformer('bert-base-nli-mean-tokens')
    
    sentence_embeddings = sentence_transformer_model.encode(input_ans)
    sentence_embeddings2 = sentence_transformer_model.encode(majority_ans)
   
    cosine = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
    score = torch.mean(cosine(torch.tensor(sentence_embeddings), torch.tensor(sentence_embeddings2))).item()
    
    return score


def matching_score(input_ans, majority_ans, newList):
    i = j = 0

    matching = non_matching = 0
    while (i < len(input_ans) and j < len(majority_ans)):
        if (input_ans[i] == majority_ans[j]):
            matching += 1
            newList.append(input_ans[i])
        else:
            non_matching += 1
            newList.append(majority_ans[i])
        i += 1
        j += 1
    score = (matching / len(input_ans)) * 100
    # if matching >= non_matching:
    #     print( "Claim is true" + str(score))
    # else :
    #     return print("Claim is false" + str(score))
    return score
  

def sentence_bleu_score(input_ans, majority_ans):
    score = sentence_bleu([input_ans], majority_ans, weights=(1, 0, 0, 0))
    return score

Writing /kaggle/working/metrics.py


In [7]:
%%writefile /kaggle/working/methods.py
import torch
import requests
from bs4 import BeautifulSoup
from googlesearch import search
from transformers import pipeline as transformers_pipeline

import lxml
def search_bing(query):
#     url = f"https://www.google.com/search?q={query}"
    print(query)
    url = f"https://www.bing.com/search?form=QBRE&q={query}" 
    user_agents_list = [
    'Mozilla/5.0 (iPad; CPU OS 12_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.83 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36']

    HEADERS = {
            "User-Agent": 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.106 Safari/537.36',
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.5",
            "Accept-Encoding": "gzip, deflate",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "none",
            "Sec-Fetch-User": "?1",
            "Cache-Control": "max-age=0",
        }
    
    response = requests.get( url, headers=HEADERS).text
    soup = BeautifulSoup(response, 'lxml')
    
    links = []
    for container in soup.select('.b_algo h2 a'):
        link = container['href']
        links.append(link)
    return links

def take_text(head):
#     print("Heading to be searched : ", head)
    query = head.split("\n")[0]
#     print("Query retrieved from head : ", query)
    links = []
    for link in search(query):
        links.append(link)
    # Format of links
    # links = ['https://www.newsweek.com/pope-francis-trump-immigration-jerusalem-christmas-758710','https://www.reuters.com/article/christmas-season-pope-urbi-et-orbi-idAFL8N1OP0DJ',]

    return links


def query_search_text(links, questions, answers, k):
    topK_links = links[0: k]
    idx = 0
    for link in links:
        if idx == k:
            break
        if (link.startswith('https://www.youtube.com') or link.startswith('https://www.facebook.com')):
            print("Continuing. link is : ", link)
            continue
        URL = link
        headers = {'User-Agent': 
                   'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36'}
        
        
        
        r = requests.get(URL, headers=headers)
        soup = BeautifulSoup(r.content, 'html5lib')
        tags = soup.find_all('h1')
        heading = ""
        i = 0
        for t in tags:
            try:
                if i == 0:
                    heading = t.text
                    i += 1
                # print(t.text)
            except:
                continue

        tags1 = soup.find_all('p')
        para = ""
        i = 0
        for t in tags1:
            try:
                # print(t.text)
                para += t.text
            except:
                continue
        answer_from_link(para, questions, answers)
        idx += 1

def answer_from_link(para, questions, answers):
#     print("Context: ", para)
    with torch.no_grad():
        question_answerer = transformers_pipeline("question-answering", model='distilbert-base-cased-distilled-squad')
        context = para
        ans = []
        for question in questions:
            result = question_answerer(question=question, context=context)
            ans.append(result['answer'])
        answers.append(ans)


def most_frequent(List):
    counter = 0
    num = List[0]

    for i in List:
        curr_frequency = List.count(i)
        if (curr_frequency > counter):
            counter = curr_frequency
            num = i

    return num

def getMajority(ans):
    n = len(ans)
    m = len(ans[0])
    final = []
    for i in range(0, m):
        temp = []
        for j in range(0, n):
            temp.append(ans[j][i])
        element = most_frequent(temp)
        final.append(element)
    return final


def get_answers(dict, ans):
    for ques in dict:
        ans.append(dict[ques])


def importf(dict, list):
    ques_ans = dict
    majority_l = list

Overwriting /kaggle/working/methods.py


In [31]:
from ques_ans_pipeline import pipeline
nlp = pipeline("multitask-qa-qg")
nlp(" A short-term fix to fund the Children’s Health Insurance Program into January will likely be part of a stop-gap government funding bill Congress is expected to approve this week. White House legislative affairs director Marc Short said on Wednesday. In an interview with MSNBC, Short also said a measure to protect immigrant youths known as “Dreamers” would probably not be considered until January")

[{'answer': 'a stop-gap government funding bill',
  'question': "What is a short-term fix to fund the Children's Health Insurance Program likely to be part of?"},
 {'answer': 'Marc Short',
  'question': "Who was the White House's legislative affairs director?"}]

In [8]:
%%writefile /kaggle/working/predict.py
from metrics import sbertSimilarity, matching_score, sentence_bleu_score
from ques_ans_pipeline import pipeline
from methods import take_text, query_search_text, getMajority, get_answers, importf, search_bing


# app = Flask(__name__)

# global lp
# lp = {}
ques_ans = {}
majority_l = {}
# @app.route('/')
# def hello_world():
#     return render_template('index.html')

# @app.route('/predict',methods=['POST'])
def predict(head,text):
    # ll = [x for x in request.form.values()]
    # head = ll[0]
    # text = ll[1]
    # session["head"] = head
    # session['text'] = text
    # return redirect(url_for('ques_ans', text=text))

    # print(text)
    print("Generating links")
    link_list =  search_bing(head)
    if not link_list:
        print("Links are empty : ", link_list)
        return -1, -1, -1
#     link_list = take_text(head)
    print(link_list)
    nlp = pipeline("multitask-qa-qg")
    print("Generating question-answers")
    question_ans = nlp(text)
    only_question = []
    ques_ans_dict = {}
    ans_all = []
    print("question_ans : ", question_ans)
    for dictionary in question_ans:
        ques_ans_dict[dictionary['question']] = dictionary['answer']
    only_question = list(ques_ans_dict.keys())
    lp=ques_ans_dict
    print("Generating answers with new links")
    query_search_text(link_list, only_question, ans_all, 2)
    dict_ans = []
    get_answers(ques_ans_dict, dict_ans)
    maj_list = getMajority(ans_all)
    # print(maj_list)
    finalList = []
    importf(dict_ans, maj_list)
    print("Evaluating")
    verdict = matching_score(dict_ans, maj_list, finalList)
    print("matching_score : ", verdict)
    verdict2 = sentence_bleu_score(dict_ans, maj_list)
    print("sentence_bleu_score : ", verdict2)
    verdict3 = sbertSimilarity(dict_ans, maj_list)
    print("sbertSimilarity : ", verdict3)

    # l = [score,ques_ans_dict,maj_list]
    # return redirect(url_for('ques_ans', score=l))
    # if request.method == 'POST':
    #     date = request.form.get('ques')
    #     return redirect(url_for('ques_ans', ques=date))
    # print(request.method)
    # if score >= 50:
    #     return render_template('index.html', prediction_text='This Article is Trustworthy with a S-Bert Score of {}'.format(score)
    # else:
    #     return render_template('index.html', prediction_text='This Article is Fake with a S-Bert Score of {}'.format(score))
    return verdict, verdict2,verdict3
    # print(request.method)
    # if request.method == 'POST':
    #     date = request.form.get('date')
    #     return redirect(url_for('booking', date=date))
#
# @app.route('/ques_ans/<score>')
# def ques_ans(score):
#     mylist = print_ques_ans()
#     print(request.method)
#     print(lp)
#     # head = session.get("head")
#     # text = session.get("text")
#     print(score)
#     r = score[1:6]
#     q = score[10:60]
#     return render_template('table.html', prediction_text = r, question_answer=q)







Writing /kaggle/working/predict.py


In [9]:
# true = pd.read_csv('./True_part_1.csv')
# fake = pd.read_csv('./Fake_part_1.csv')
true = pd.read_csv('/kaggle/input/true-fake-news/True_part_1.csv')
fake = pd.read_csv('/kaggle/input/true-fake-news/Fake_part_1.csv')

In [None]:
import pickle
with open('true_link_list_part_1.pkl', 'rb') as f:
    true_links = pickle.load(f)
with open('fake_link_list_part_1.pkl', 'rb') as f:
    false_links = pickle.load(f)

In [10]:
true_head = true['title']
true_text =  true['text']
fake_head = fake['title']
fake_text =  fake['text']

In [11]:
from predict import predict

In [12]:
metric1_score_true = []
sentence_bleu_score_true = []
sbertSimilarity_score_true = []

In [35]:
len(metric1_score_true), len(sentence_bleu_score_true) , len(sbertSimilarity_score_true)

(100, 100, 100)

In [34]:
for idx in range(len(true)):
        print("Data point : ", idx, true_text[idx])
        head = true_head[idx]
        text = true_text[idx]
        verdict, verdict2, verdict3 = predict(head,text)
        metric1_score_true.append(verdict)
        sentence_bleu_score_true.append(verdict2)
        sbertSimilarity_score_true.append(verdict3)

Data point :  74 WASHINGTON (Reuters) - Wealthy business owners, such as President Donald Trump, stand to gain from a provision in the Republican tax bill that creates a valuable deduction for owners of pass-through businesses, Democrats and some tax experts say. The provision creates a 20-percent business income deduction, with some limits, for sole proprietors and owners in partnerships and other non-corporate enterprises. It was initially sold by Republicans as a way to help small businesses and create jobs. But the final formula for determining what types of businesses can benefit has widened to take in companies with few, if any, workers, critics said. “The president will try to tell the American people that his great political victory is a win for working people, but they see all the benefits going to his type of businesses: real estate pass-throughs,” Democratic Senator Jack Reed said on the Senate floor. Trump, a real estate developer, wants to sign the Republican tax bill into

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9104030132293701
Data point :  75 (Reuters) - The U.S. tax overhaul as currently proposed will make housing less affordable, according to nearly half of the property market experts polled by Reuters, with another third saying it would not do anything to improve it. A decade on from the start of the crash that knocked more than a third off U.S. home values and led to a deep global recession, the housing market has bounced back smartly. U.S. house prices are expected to rise next year and in 2019, faster than predicted just a few months ago and at more than double the rate of underlying consumer inflation and wages. That is in sharp contrast to the outlook for Britain. [GB/HOMES] The S&P/Case Shiller composite index of U.S. home prices in 20 metropolitan areas is expected to gain 5.1 percent next year and 4.2 percent in 2019. The main challenge currently is a chronic shortage of homes, which is pushing prices beyond the reach of new buyers, who tend to be young and n

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7574611902236938
Data point :  76 NEW YORK (Reuters) - Sweeping U.S. tax legislation appears to be on the verge of approval, lifting the prospects in particular for banks, telecoms, transports and other industries that stand to gain the most from lower corporate tax rates. The Republican-led U.S. House of Representatives hit a last-minute snag on Tuesday in their drive to approve the legislation favored by President Donald Trump. The plan on Capitol Hill was for the Senate to delete three offending provisions in the House version and vote on the bill, then send it back to the House for a vote on Wednesday. The bill slashes the corporate income tax rate to 21 percent from 35 percent. That would boost overall earnings for S&P 500 companies by 9.1 percent, according to UBS equity strategists. For an interactive graphic on how the bill ripples through industries: tmsnrt.rs/2kf26gx Momentum behind the tax bill over the past month has helped propel the stock market, whic

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7697760462760925
Data point :  77 WASHINGTON (Reuters) - Eight months into his lifetime U.S. Supreme Court appointment, Neil Gorsuch has given every indication through his votes in key cases and remarks from the bench he will be a stalwart of the conservative legal agenda, as President Donald Trump promised. Still early in his career as a justice that could span decades, Gorsuch has already established himself as among the most conservative members of the top U.S. court, and has not been shy about expressing his views, sometimes in idiosyncratic ways. He also has made public appearances before conservative audiences, including a speech at the Republican president’s Trump International Hotel in downtown Washington, that have drawn rebukes by liberal critics who questioned his independence from the president who nominated him. Gorsuch’s record so far suggests “he is going to be a reliably conservative vote,” said Carolyn Shapiro, a law professor at Chicago-Kent Colle

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.8399377465248108
Data point :  78 WASHINGTON (Reuters) - Two Republican lawmakers told the U.S. Department of Justice on Tuesday that they want to question three senior FBI officials about an investigation of then-Democratic presidential candidate Hillary Clinton over her handling of classified information while she was secretary of state. The lawmakers, who are leading a joint probe into the Clinton investigation, said in a letter to Attorney General Jeff Sessions and his deputy, Rod Rosenstein, that they wanted to speak with FBI Deputy Director Andrew McCabe, FBI Chief of Staff Jim Rybicki and FBI counsel Lisa Page beginning on Thursday. The FBI declined to comment on the letter and referred reporters to the Justice Department. The request to meet the FBI officials for “transcribed interviews” was made after Republicans obtained more than 300 text messages sent last year between Page and FBI agent Peter Strzok critical of then-Republican presidential candidate Do

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9397900700569153
Data point :  79 WASHINGTON (Reuters) - Near the end of President Donald Trump’s rocky first year in office, White House aides view imminent victory on a tax overhaul as a starting point to strengthen his weak approval ratings ahead of key congressional elections next November. Some Republicans said any effort at a political turnaround must include reining in Trump’s habit of lashing out at critics on Twitter. White House aides said they recognized that Trump’s poll numbers needed to start rising to limit the damage in 2018 elections in which his fellow Republicans’ continued control of Congress will be at stake. A Democratic takeover of the House of Representatives and Senate could jeopardize Trump’s agenda. “We have to grow, we have to move up, and I think having more successes like the tax vote will be important to us,” said a senior White House official. Administration officials said Trump would seek to use momentum generated from the biggest t

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7708210945129395
Data point :  80 WASHINGTON (Reuters) - The U.S. Congress is struggling through another contentious week as infighting over defense spending, healthcare and other matters complicates the drive to pass a temporary spending bill by midnight on Friday to avert a partial government shutdown. In a week when President Donald Trump and his fellow Republicans in Congress are hoping to celebrate the passage of tax overhaul legislation, many in the party showed little appetite for a government shutdown at week’s end. But they sounded resigned to having to navigate through some drama over a package that includes so many disparate components, which could make for a messy process. “I’m going to vote for whatever I need to, to keep the government open,” Republican Representative Chris Collins told reporters. The last time government agencies had to shut down because Washington could not pay its bills was in October 2013. Leading Republicans in the Senate and Hou

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.6664371490478516
Data point :  81 (Reuters) - The U.S. Congress is weighing military spending, healthcare and other major decisions tied to a temporary funding bill to keep the government operating beyond Friday, as lawmakers rush to begin a year-end recess. Republicans control the House of Representatives and Senate, but disagreements between the two chambers, along with differences between Republicans and Democrats, make for potentially difficult days ahead. The following are the big initiatives under consideration: Money expires at midnight on Friday for the operation of most federal agencies. That is because Congress has failed to approve the regular appropriations bills for the fiscal year that began on Oct. 1 and Washington has been operating on a series of temporary funding bills. The House is proposing another temporary extension - one that would run through Jan. 19, 2018. It is unclear whether the Senate would stick with that date or seek a slightly later 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.4066748023033142
Data point :  82 WASHINGTON (Reuters) - The U.S. Senate parliamentarian has ruled against three provision of the Republican tax bill, forcing the House of Representatives to hold a second vote on the legislation, Senator Bernie Sanders said on Tuesday. Sanders, an independent on the Senate Budget Committee, said the ruling could mean that provisions related to educational savings accounts for home schooling and private university endowments could be struck from the measure unless 60 members of the Senate vote to uphold them.   
Generating links
Senate parliamentarian rules against tax bill provisions: Sanders
['https://www.reuters.com/article/us-usa-tax-provisions-idUSKBN1ED2SX', 'https://www.reuters.com/article/uk-usa-tax-provisions-idUKKBN1ED2SV', 'https://finance.yahoo.com/news/senate-parliamentarian-rules-against-tax-bill-provisions-sanders-215835147--sector.html', 'https://thehill.com/policy/finance/365691-senate-parliamentarian-rules-against-

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9696680307388306
Data point :  83 WASHINGTON (Reuters) - The U.S. Senate Banking Committee rejected U.S. President Donald Trump’s nominee to head the U.S. Export-Import Bank on Tuesday  but approved four other board nominees, enough to restore the trade bank’s full lending powers upon their confirmation by the full Senate. The committee voted 13-10 against Scott Garrett as EXIM president in a rebuke to conservatives who saw the former New Jersey Republican congressman as an ally who would keep tight controls on the government export lender.  Garrett helped lead a 2015 effort to shut down EXIM to end a source of “corporate welfare” for giant manufacturers such as Boeing Co and General Electric. After his nomination he pledged to keep the bank “fully open” but struggled to persuade senators that he now believed in the bank’s core mission of providing taxpayer-backed loans and guarantees for U.S. export transactions. “I believe he’s a principled man who simply believe

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.8336871266365051
Data point :  84 WASHINGTON (Reuters) - The No. 2 Republican in the U.S. House of Representatives said on Tuesday the House would likely need to vote again on tax legislation on Wednesday morning given that Democrats in the Senate were likely to prevail on a procedural objection. House Majority Leader Kevin McCarthy advised House lawmakers that Senate Democrats were likely to object that the legislation fails to comply with the so-called Byrd rule and were likely to be upheld, necessitating a second House vote. “As such, members are further advised that an additional procedural vote on the Motion to Concur is expected tomorrow morning, which will clear the bill for President Trump’s signature,” McCarthy said in a notice to House lawmakers. 
Generating links
House will likely need to vote again on tax bill: Republican leader
['https://www.reuters.com/article/us-usa-tax-house-vote-idUSKBN1ED2SR', 'https://finance.yahoo.com/news/u-house-likely-vote-ag

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  1.0
Data point :  85 (Reuters) - The Republican-controlled U.S. House of Representatives approved sweeping, debt-financed tax legislation on Tuesday, sending the bill to the Senate, where lawmakers were due to take up the package later in the evening. Story:		 The biggest overhaul of the U.S. tax system in more than 30 years could be signed into law by President Donald Trump as soon as Wednesday, if both chambers of Congress approve it. The bill passed the House by a vote of 227-203, overcoming united opposition from Democrats and 12 Republicans who voted against it. “The bond market today is reassessing the whole thing now that it looks like it’s going to pass. We’ve had a pretty significant rise in yields - we’re back up close to the highs for the quarter. “Stocks on the other hand have been in front of this for a long time, I think initially because lower taxes means more earnings are retained and then companies can distribute those to shareholders or not. But eit

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7931285500526428
Data point :  86 WASHINGTON (Reuters) - The Republican-led U.S. Senate voted on Tuesday to begin debate on sweeping tax legislation, setting the stage for lawmakers to hold their final vote on the tax cut package later in the evening. The Senate voted 51-48 to debate the legislation about an hour after the House of Representatives approved what is widely expected to become the first overhaul of the U.S. tax code in more than 30 years. The debate period is due to last 10 hours officially but could be shortened to as little as five hours by parliamentary procedure. 
Generating links
Senate begins debate on final Republican tax bill
['https://www.reuters.com/article/usa-tax-debate-idUSL1N1OJ1QZ', 'https://www.reuters.com/article/us-usa-tax-debate-idUSKBN1ED2OS', 'https://www.nytimes.com/live/2022/08/07/us/climate-tax-deal-vote', 'https://www.nytimes.com/live/2022/08/07/us/climate-tax-deal-vote', 'https://www.nytimes.com/live/2022/08/07/us/climate-tax-

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  1.0
Data point :  87 WASHINGTON (Reuters) - Lawmakers of both parties are working on legislation that would make members of Congress liable for settlements of sexual harassment claims against them, as new data shows public funds have been used to settle nearly a dozen cases of misconduct over the last decade. From 2008 through 2012, the employment office for Congress paid more than $166,000 in public money to settle eight claims against lawmakers alleging sexual harassment or discrimination, according to data it provided on Tuesday to Representative Gregg Harper, the Republican chairman of the House Administration Committee who is drafting a bill to overhaul how Congress handles sexual harassment. The Office of Compliance previously said it has resolved three other cases since 2013. Provisions in settlement agreements and other legal limits block the office from disclosing details of the payouts it has made on behalf of lawmakers, including identities of those involv

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.8476195335388184
Data point :  88 WASHINGTON (Reuters) - A coalition of nearly 20 environmental and Native American tribal groups sued the Trump administration on Tuesday, challenging its delay of a rule limiting emissions of the powerful greenhouse gas methane from oil and gas drilling operations on federal lands. Earlier this month, the Bureau of Land Management, part of the Department of the Interior, suspended implementation of the rule for a year, until Jan. 17, 2019, saying it wanted to avoid compliance costs for energy companies as it revises the regulation. The delay “is yet another action taken by the Trump administration to benefit the oil and gas industry at the expense of the American public, particularly the millions of Westerners” who use public lands for ranching, hunting, hiking and other purposes, Darin Schroeder, a lawyer with the Clean Air Task Force, said in a statement. His organization represented the National Wildlife Federation, one of the g

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.8183903694152832
Data point :  89 WASHINGTON (Reuters) - Steny Hoyer, the No. 2 Democrat in the U.S. House of Representatives, said on Tuesday that House Democratic leaders are asking their rank-and-file members to vote against a stopgap government funding bill, according to a CNN reporter on Twitter. A House Republican aide said earlier on Tuesday that the spending bill would fund the government until Jan. 19, and include funding for disaster aid and a five-year extension of the Children’s Health Insurance Program. 
Generating links
House Democratic leaders urging 'no' vote on spending bill: CNN reporter
['https://www.reuters.com/article/us-usa-congress-shutdown-democrats-idUSKBN1ED27W', 'https://news.yahoo.com/house-democratic-leaders-urging-no-vote-spending-bill-164430990--business.html', 'https://www.cnn.com/politics/live-news/spending-bill-house-vote-11-19-21/h_0dc25497c20d4218c89ede735b396c84', 'https://townhall.com/tipsheet/mattvespa/2022/12/23/heres-who-vot

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9757227897644043
Data point :  90 WASHINGTON (Reuters) - U.S. Vice President Mike Pence will preside over the Senate’s vote on sweeping tax legislation, his office confirmed on Tuesday. “The @VP will preside over the historic vote,” Alyssa Farah, a spokeswoman for Pence, said on Twitter. Republicans may need Pence’s vote in favor of the legislation to break a tie. 
Generating links
Pence to preside over Senate tax bill vote, his office confirms
['https://www.reuters.com/article/usa-tax-pence-idINW1N1NN00Z', 'https://www.reuters.com/article/us-usa-tax-pence-idUSKBN1ED24D', 'https://www.cnbc.com/2021/01/06/mike-pence-rejects-trumps-call-to-overturn-biden-election.html', 'https://www.cnbc.com/2021/01/06/mike-pence-rejects-trumps-call-to-overturn-biden-election.html', 'https://www.cnbc.com/2021/01/06/mike-pence-rejects-trumps-call-to-overturn-biden-election.html', 'https://www.cnbc.com/2021/01/06/mike-pence-rejects-trumps-call-to-overturn-biden-election.html', 'https:/

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9999999403953552
Data point :  91 WASHINGTON (Reuters) - The U.S. Senate will vote on final tax legislation on Tuesday evening, Senate Republican leader Mitch McConnell said, potentially allowing President Donald Trump to sign the bill into law as early as Wednesday. “Congress is standing at the doorstep of a historic opportunity,” McConnell said on the floor of the Senate as he announced the vote’s timing. The House of Representatives is scheduled to vote on the same legislation at 1:30 p.m. EST (1830 GMT). If both chambers of Congress pass the bill, Trump will be able to meet his goal of signing it into law before Christmas. 
Generating links
Senate to vote on final tax bill Tuesday evening: McConnell
['https://www.reuters.com/article/us-usa-tax-senate-idUSKBN1ED1YJ', 'https://news.yahoo.com/senate-vote-final-tax-bill-tuesday-evening-mcconnell-151734783--business.html', 'https://www.nytimes.com/interactive/2017/12/19/us/politics/tax-bill-senate-live-vote.html', '

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7207773923873901
Data point :  92 WASHINGTON (Reuters) - The Republican-controlled U.S. Congress appeared all but certain to pass sweeping tax legislation this week after two Senate Republican holdouts agreed on Monday to support a tax overhaul backed by President Donald Trump.   As the Republicans’ self-imposed Friday voting deadline loomed, Senators Susan Collins and Mike Lee each said they had decided to back the legislation hammered out last week among Republicans from the Senate and House of Representatives.  “The first major overhaul of our tax code since 1986, this legislation will provide tax relief to working families, encourage the creation of jobs right here in America and spur economic growth that will benefit all Americans,” Collins said on the floor of the Senate while announcing her support. The Maine Republican had been undecided on the legislation.  Lee had been similarly undecided until the Utah Republican tweeted earlier in the day that he would 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7867386937141418
Data point :  93 WASHINGTON (Reuters) - A U.S. District Court judge ruled on Monday that President Donald Trump’s administration must allow access to abortion for two pregnant teenagers who are in the country illegally, escalating a high-profile legal fight. Judge Tanya Chutkan put her order on hold, however, to give the U.S. Justice Department time to appeal her ruling.  The Justice Department filed its notice of appeal shortly afterward to the U.S. Court of Appeals for the District of Columbia Circuit. The judge’s temporary restraining order marked the latest chapter in a legal dispute with the Trump administration over whether minors who are illegal immigrants have the right to seek an abortion during their detention. The issue was ignited by a 17-year-old who petitioned the court in October to have an abortion, and ultimately had the procedure over the Trump administration’s objections.  In that instance, the U.S. Court of Appeals for the Distr

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7731854319572449
Data point :  94 WASHINGTON (Reuters) - Republicans in the U.S. House of Representatives unveiled on Monday an $81 billion aid package to deal with hurricanes and wildfires, far above President Donald Trump’s $44 billion request. The legislation would help Puerto Rico and several states recover from devastating hurricanes and California and other Western states cope with wildfires. It was unclear whether the latest natural-disaster aid plan would be rushed through the Republican-controlled Congress this week, before the start of a Christmas recess, or await congressional votes early next year. The bill, introduced by House Appropriations Committee Chairman Rodney Frelinghuysen, includes $27.6 billion for the Federal Emergency Management Agency and $26.1 billion for community development block grants.  “We have a commitment to our fellow citizens that are in the midst of major rebuilding efforts in all areas, including Texas, Florida, California, Lo

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7724484205245972
Data point :  95 WASHINGTON (Reuters) - The chairman of the U.S. House of Representatives Appropriations Committee introduced a bill on Monday to provide $81 billion in emergency aid for recent hurricanes and wildfires. The legislation includes $27.6 billion for the Federal Emergency Management Agency and $26.1 billion for community development block grants, Representative Rodney Frelinghuysen said in a statement. President Donald Trump had requested $44 billion last month, which was widely criticized by lawmakers as being insufficient. 
Generating links
House panel chair introduces $81 billion disaster aid bill
['https://www.reuters.com/article/us-usa-storms-aid-bill-idUSKBN1ED05Q', 'https://www.reuters.com/article/us-usa-congress-disaster-house-idUSKBN1EF2X2', 'https://appropriations.house.gov/news/press-releases/emergency-disaster-aid-package-introduced', 'https://thehill.com/homenews/house/365610-house-to-include-81b-disaster-aid-package-in-gov

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  1.0
Data point :  96 WASHINGTON (Reuters) - U.S. President Donald Trump on Monday said he plans to nominate Liberty University School of Law professor Caren Harp to oversee the Justice Department’s Office of Juvenile Justice and Delinquency Prevention. If ultimately confirmed by the U.S. Senate, Harp would oversee the Justice Department office that trains and works with state and local communities to develop effective juvenile justice programs and prevent delinquency. Harp previously was director of the National Juvenile Justice Prosecution Center at the American Prosecutors Research Institute.  According to Harp’s LinkedIn page, she is in her sixth year as a professor at Liberty, which is located in Lynchburg, Virginia. The law school’s website says its program is “taught from a Christian worldview” and says it offers a “uniquely tailored legal program taught with sound biblical principles.” Harp, who holds a law degree from the University of Arkansas-Fayetteville, 

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9197705388069153
Data point :  97 The following statements were posted to the verified Twitter accounts of U.S. President Donald Trump, @realDonaldTrump and @POTUS.  The opinions expressed are his own. Reuters has not edited the statements or confirmed their accuracy.  @realDonaldTrump : - Remember, Republicans are 5-0 in Congressional Races this year. The media refuses to mention this. I said Gillespie and Moore would lose (for very different reasons), and they did. I also predicted  “I” would win. Republicans will do well in 2018, very well! @foxandfriends [0623 EST] - Ivanka Trump will be interviewed on @foxandfriends. [0642 EST] - The train accident that just occurred in DuPont, WA shows more than ever why our soon to be submitted infrastructure plan must be approved quickly. Seven trillion dollars spent in the Middle East while our roads, bridges, tunnels, railways (and more) crumble! Not for long! [1341 EST] - My thoughts and prayers are with everyone involve

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.7925820350646973
Data point :  98 WASHINGTON (Reuters) - Two members of President Donald Trump’s Cabinet are set to visit Puerto Rico on Tuesday to assess the U.S. territory’s rebuilding in the three months since Hurricane Maria devastated homes, businesses and the power grid. Homeland Security Secretary Kirstjen Nielsen and Housing and Urban Development Secretary Ben Carson will travel to Puerto Rico, where about a third of the island’s 3.4 million residents are still without power, hundreds remain in shelters, and thousands have fled to the U.S. mainland. The visit comes as Republicans in the U.S. House of Representatives on Monday were planning to unveil a disaster aid package totaling $81 billion, according to a senior congressional aide. Some of that aid would go to Puerto Rico, but also to states like Texas and Florida that were hit by other hurricanes and to California, which is grappling with wild fires. Even before Maria savaged Puerto Rico, the island was

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9116520881652832
Data point :  99 WASHINGTON (Reuters) - The U.S. Senate will not consider an immigration bill as part of year-end legislation but will turn to a measure protecting immigrant youths known as “Dreamers” in January, No. 2 Senate Republican John Cornyn said on Monday. Cornyn also said that if Congress cannot meet an early March deadline for passing legislation providing the protections against deportation for undocumented immigrants who were brought illegally into the United States as children, President Donald Trump could consider extending the deadline. In interviews over the past several days, both Republican and Democratic lawmakers and aides said that talks on Deferred Action for Childhood Arrivals (DACA) have been quietly making progress. “The president has given us enough time to deal with this before March and so I think that’s plenty of time and I expect us to meet it,” Cornyn told reporters. “If we can’t, then the president could extend the d

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

sbertSimilarity :  0.9362745881080627


In [36]:
score_true_1 = pd.DataFrame(
    {'metric1_score_true': metric1_score_true,
     'sentence_bleu_score_true': sentence_bleu_score_true,
     'sbertSimilarity_score_true': sbertSimilarity_score_true
    })

score_true_1.to_csv('score_true_1.csv', index=False)


In [14]:
metric1_score_fake = []
sentence_bleu_score_fake = []
sbertSimilarity_score_fake = []

In [16]:
%%time
for idx in range(len(fake)):
    print("Data point : ", idx)
    head = fake_head[idx]
    text = fake_text[idx]
    verdict, verdict2, verdict3 = predict(head,text)
    metric1_score_fake.append(verdict)
    sentence_bleu_score_fake.append(verdict2)
    sbertSimilarity_score_fake.append(verdict3)


Data point :  0
Generating links
 Donald Trump Sends Out Embarrassing New Year’s Eve Message; This is Disturbing
Links are empty :  []
Data point :  1
Generating links
 Drunk Bragging Trump Staffer Started Russian Collusion Investigation
Links are empty :  []
Data point :  2
Generating links
 Sheriff David Clarke Becomes An Internet Joke For Threatening To Poke People ‘In The Eye’
Links are empty :  []
Data point :  3
Generating links
 Trump Is So Obsessed He Even Has Obama’s Name Coded Into His Website (IMAGES)
Links are empty :  []
Data point :  4
Generating links
 Pope Francis Just Called Out Donald Trump During His Christmas Speech
Links are empty :  []
Data point :  5
Generating links
 Racist Alabama Cops Brutalize Black Boy While He Is In Handcuffs (GRAPHIC IMAGES)
Links are empty :  []
Data point :  6
Generating links
 Fresh Off The Golf Course, Trump Lashes Out At FBI Deputy Director And James Comey
Links are empty :  []
Data point :  7
Generating links
 Trump Said Some INSANEL

In [17]:
score_fake_1 = pd.DataFrame(
    {'metric1_score_true': metric1_score_fake,
     'sentence_bleu_score_true': sentence_bleu_score_fake,
     'sbertSimilarity_score_true': sbertSimilarity_score_fake
    })

score_fake_1.to_csv('score_fake_1.csv', index=False)
