# Watermarking

In [None]:
! pip install gradio nltk scipy torch transformers tokenizers

Collecting gradio
  Downloading gradio-3.45.2-py3-none-any.whl (20.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.2/20.2 MB[0m [31m34.8 MB/s[0m eta [36m0:00:00[0m
Collecting transformers
  Downloading transformers-4.33.3-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m71.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tokenizers
  Downloading tokenizers-0.14.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m80.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.103.2-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.3/66.3 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Do

## homoglyphs.py

In [None]:
"""Updated version of core.py from
https://github.com/yamatt/homoglyphs/tree/main/homoglyphs_fork
for modern python3
"""

from collections import defaultdict
import json
from itertools import product
import os
import unicodedata

# Actions if char not in alphabet
STRATEGY_LOAD = 1  # load category for this char
STRATEGY_IGNORE = 2  # add char to result
STRATEGY_REMOVE = 3  # remove char from result

ASCII_RANGE = range(128)


DATA_LOCATION = os.path.join(".")


class Categories:
    """
    Work with aliases from ISO 15924.
    https://en.wikipedia.org/wiki/ISO_15924#List_of_codes
    """

    fpath = os.path.join(DATA_LOCATION, "categories.json")

    @classmethod
    def _get_ranges(cls, categories):
        """
        :return: iter: (start code, end code)
        :rtype: list
        """
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)

        for category in categories:
            if category not in data["aliases"]:
                raise ValueError("Invalid category: {}".format(category))

        for point in data["points"]:
            if point[2] in categories:
                yield point[:2]

    @classmethod
    def get_alphabet(cls, categories):
        """
        :return: set of chars in alphabet by categories list
        :rtype: set
        """
        alphabet = set()
        for start, end in cls._get_ranges(categories):
            chars = (chr(code) for code in range(start, end + 1))
            alphabet.update(chars)
        return alphabet

    @classmethod
    def detect(cls, char):
        """
        :return: category
        :rtype: str
        """
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)

        # try detect category by unicodedata
        try:
            category = unicodedata.name(char).split()[0]
        except (TypeError, ValueError):
            # In Python2 unicodedata.name raise error for non-unicode chars
            # Python3 raise ValueError for non-unicode characters
            pass
        else:
            if category in data["aliases"]:
                return category

        # try detect category by ranges from JSON file.
        code = ord(char)
        for point in data["points"]:
            if point[0] <= code <= point[1]:
                return point[2]

    @classmethod
    def get_all(cls):
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)
        return set(data["aliases"])


class Languages:
    fpath = os.path.join(DATA_LOCATION, "languages.json")

    @classmethod
    def get_alphabet(cls, languages):
        """
        :return: set of chars in alphabet by languages list
        :rtype: set
        """
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)
        alphabet = set()
        for lang in languages:
            if lang not in data:
                raise ValueError("Invalid language code: {}".format(lang))
            alphabet.update(data[lang])
        return alphabet

    @classmethod
    def detect(cls, char):
        """
        :return: set of languages which alphabet contains passed char.
        :rtype: set
        """
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)
        languages = set()
        for lang, alphabet in data.items():
            if char in alphabet:
                languages.add(lang)
        return languages

    @classmethod
    def get_all(cls):
        with open(cls.fpath, encoding="utf-8") as f:
            data = json.load(f)
        return set(data.keys())


class Homoglyphs:
    def __init__(
        self,
        categories=None,
        languages=None,
        alphabet=None,
        strategy=STRATEGY_IGNORE,
        ascii_strategy=STRATEGY_IGNORE,
        ascii_range=ASCII_RANGE,
    ):
        # strategies
        if strategy not in (STRATEGY_LOAD, STRATEGY_IGNORE, STRATEGY_REMOVE):
            raise ValueError("Invalid strategy")
        self.strategy = strategy
        self.ascii_strategy = ascii_strategy
        self.ascii_range = ascii_range

        # Homoglyphs must be initialized by any alphabet for correct work
        if not categories and not languages and not alphabet:
            categories = ("LATIN", "COMMON")

        # cats and langs
        self.categories = set(categories or [])
        self.languages = set(languages or [])

        # alphabet
        self.alphabet = set(alphabet or [])
        if self.categories:
            alphabet = Categories.get_alphabet(self.categories)
            self.alphabet.update(alphabet)
        if self.languages:
            alphabet = Languages.get_alphabet(self.languages)
            self.alphabet.update(alphabet)
        self.table = self.get_table(self.alphabet)

    @staticmethod
    def get_table(alphabet):
        table = defaultdict(set)
        with open(os.path.join(DATA_LOCATION, "confusables_sept2022.json")) as f:
            data = json.load(f)
        for char in alphabet:
            if char in data:
                for homoglyph in data[char]:
                    if homoglyph in alphabet:
                        table[char].add(homoglyph)
        return table

    @staticmethod
    def get_restricted_table(source_alphabet, target_alphabet):
        table = defaultdict(set)
        with open(os.path.join(DATA_LOCATION, "confusables_sept2022.json")) as f:
            data = json.load(f)
        for char in source_alphabet:
            if char in data:
                for homoglyph in data[char]:
                    if homoglyph in target_alphabet:
                        table[char].add(homoglyph)
        return table

    @staticmethod
    def uniq_and_sort(data):
        result = list(set(data))
        result.sort(key=lambda x: (-len(x), x))
        return result

    def _update_alphabet(self, char):
        # try detect languages
        langs = Languages.detect(char)
        if langs:
            self.languages.update(langs)
            alphabet = Languages.get_alphabet(langs)
            self.alphabet.update(alphabet)
        else:
            # try detect categories
            category = Categories.detect(char)
            if category is None:
                return False
            self.categories.add(category)
            alphabet = Categories.get_alphabet([category])
            self.alphabet.update(alphabet)
        # update table for new alphabet
        self.table = self.get_table(self.alphabet)
        return True

    def _get_char_variants(self, char):
        if char not in self.alphabet:
            if self.strategy == STRATEGY_LOAD:
                if not self._update_alphabet(char):
                    return []
            elif self.strategy == STRATEGY_IGNORE:
                return [char]
            elif self.strategy == STRATEGY_REMOVE:
                return []

        # find alternative chars for current char
        alt_chars = self.table.get(char, set())
        if alt_chars:
            # find alternative chars for alternative chars for current char
            alt_chars2 = [self.table.get(alt_char, set()) for alt_char in alt_chars]
            # combine all alternatives
            alt_chars.update(*alt_chars2)
        # add current char to alternatives
        alt_chars.add(char)

        # uniq, sort and return
        return self.uniq_and_sort(alt_chars)

    def _get_combinations(self, text, ascii=False):
        variations = []
        for char in text:
            alt_chars = self._get_char_variants(char)

            if ascii:
                alt_chars = [char for char in alt_chars if ord(char) in self.ascii_range]
                if not alt_chars and self.ascii_strategy == STRATEGY_IGNORE:
                    return

            if alt_chars:
                variations.append(alt_chars)
        if variations:
            for variant in product(*variations):
                yield "".join(variant)

    def get_combinations(self, text):
        return list(self._get_combinations(text))

    def _to_ascii(self, text):
        for variant in self._get_combinations(text, ascii=True):
            if max(map(ord, variant)) in self.ascii_range:
                yield variant

    def to_ascii(self, text):
        return self.uniq_and_sort(self._to_ascii(text))


## alternative_prf_schemes.py

In [None]:
"""Implement other PRF functions (These all vary only how they generate a single hash from the tokens in the context).

Can be hooked into existing WatermarkLogitsProcessor as modified base class WatermarkBase, see implementation in
extended_watermark_processor.py
"""

# coding=utf-8
# Copyright 2023 Authors of "A Watermark for Large Language Models"
# available at https://arxiv.org/abs/2301.10226
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch
from itertools import combinations
from functools import cache

# Key properties of a hashing scheme
props = {
    "prf_type": str,  # string name of the underlying PRF mapping multiple token ids to a random seed
    "context_width": int,  # this is h in the paper, how many previous tokens should be considered for each PRF
    "self_salt": bool,  # Use the rules laid in robust-watermarking to use the token itself to seed and possibly reject its own list
    "hash_key": int,  # integer, large prime, used to move seed away from low-entrop bit sequences in PRF chosen above
}


def seeding_scheme_lookup(seeding_scheme: str):
    if not isinstance(seeding_scheme, str):
        raise ValueError("Seeding scheme should be a string summarizing the procedure.")
    if seeding_scheme == "simple_1" or seeding_scheme == "lefthash":
        # Default, simple bigram hash  # alias for ff-additive_prf-1-False-15485863
        prf_type = "additive_prf"
        context_width = 1
        self_salt = False
        hash_key = 15485863
    elif seeding_scheme == "algorithm-3" or seeding_scheme == "selfhash":
        prf_type = "anchored_minhash_prf"
        context_width = 4
        self_salt = True
        hash_key = 15485863
    elif seeding_scheme == "minhash":
        prf_type = "minhash_prf"
        context_width = 4
        self_salt = False
        hash_key = 15485863
    elif seeding_scheme == "skipgram":
        prf_type = "skipgram_prf"
        context_width = 5
        self_salt = False
        hash_key = 15485863
    elif seeding_scheme.startswith("ff"):  # freeform seeding scheme API - only use for experimenting
        # expects strings of the form ff-additive_prf-4-True-hash or ff-additive_prf-5-True (hash key is optional)
        split_scheme = seeding_scheme.split("-")
        prf_type = str(split_scheme[1])
        context_width = int(split_scheme[2])
        self_salt = split_scheme[3] == "True"
        if len(split_scheme) == 5:
            hash_key = int(split_scheme[4])
        else:
            hash_key = 15485863
    else:
        raise ValueError(f"Invalid seeding scheme name {seeding_scheme} given. Try  'simple_1'?")

    assert prf_type in prf_lookup.keys()
    return prf_type, context_width, self_salt, hash_key


def multiplicative_prf(input_ids: torch.LongTensor, salt_key: int) -> int:
    return salt_key * input_ids.prod().item()


def additive_prf(input_ids: torch.LongTensor, salt_key: int) -> int:
    return salt_key * input_ids.sum().item()


def minfunc_prf(input_ids: torch.LongTensor, salt_key: int) -> int:
    # not a great idea for non-random input ids as in text
    return salt_key * input_ids.min().item()


def simple_skip_prf(input_ids: torch.LongTensor, salt_key: int, k=2) -> int:
    # k is the skip distance
    return hashint(salt_key * input_ids[::k]).prod().item()


def skipgram_prf(input_ids: torch.LongTensor, salt_key: int) -> int:
    # maximum distance skipgram within context
    return hashint(salt_key * input_ids[0]).item()


def anchored_skipgram_prf(input_ids: torch.LongTensor, salt_key: int, anchor: int = -1) -> int:
    # maximum distance skipgram within context
    return (hashint(salt_key * input_ids[0]) * hashint(salt_key * input_ids[anchor])).item()


def minhash_prf(input_ids: torch.LongTensor, salt_key: int) -> int:
    # slightly less not the greatest idea for non-random input ids as in text
    return hashint(salt_key * input_ids).min().item()


def anchored_minhash_prf(input_ids: torch.LongTensor, salt_key: int, anchor: int = -1) -> int:
    # Anchor to one key to produce a min over pairs again
    return (salt_key * hashint(input_ids) * hashint(input_ids[anchor])).min().item()


def minskipgram_prf(input_ids: torch.LongTensor, salt_key: int, k: int = 2) -> int:
    # min over all skipgrams in context, k=2 is all pairs
    skipgrams = torch.as_tensor(list(combinations(hashint(salt_key * input_ids), 2)))
    return skipgrams.prod(dim=1).min().item()


def noncomm_prf(input_ids: torch.LongTensor, salt_key: int, k: int = 2) -> int:
    key = torch.as_tensor(salt_key, dtype=torch.long)
    for entry in input_ids:
        key *= hashint(key * entry)
        key %= 2**32
    return key.item()


def position_prf(input_ids: torch.LongTensor, salt_key: int, k: int = 2) -> int:
    return (salt_key * input_ids * torch.arange(1, len(input_ids) + 1, device=input_ids.device)).sum().item()


prf_lookup = {
    "multiplicative_prf": multiplicative_prf,
    "additive_prf": additive_prf,
    "minfunc_prf": minfunc_prf,
    "simple_skip_prf": simple_skip_prf,
    "skipgram_prf": skipgram_prf,
    "anchored_skipgram_prf": anchored_skipgram_prf,
    "minhash_prf": minhash_prf,
    "anchored_minhash_prf": anchored_minhash_prf,
    "minskipgram_prf": minskipgram_prf,
    "noncomm_prf": noncomm_prf,
    "position_prf": position_prf,
}

# Generate a global permute table once at startup
rng = torch.Generator(device=torch.device("cpu"))
rng.manual_seed(2971215073)  # fib47 is prime
table_size = 1_000_003
fixed_table = torch.randperm(1_000_003, device=torch.device("cpu"), generator=rng)  # actually faster than I thought


def hashint(integer_tensor: torch.LongTensor) -> torch.LongTensor:
    """Sane version, in the end we only need a small permutation table."""
    return fixed_table[integer_tensor.cpu() % table_size] + 1  # minor cheat here, this function always return CPU values


def _hashint_avalanche_tensor(integer_tensor: torch.LongTensor):
    """http://burtleburtle.net/bob/hash/integer.html, ported into pytorch, runs on tensors. Apparently a decent avalanche."""
    i = integer_tensor.to(torch.int32).clone()  # or torch.int16?
    i -= i << 6
    i ^= i >> 17
    i -= i << 9
    i ^= i << 4
    i -= i << 3
    i ^= i << 10
    i ^= i >> 15
    return i.to(torch.long)


@cache
def _hashint_avalanche_int(integer: int):
    """http://burtleburtle.net/bob/hash/integer.html, runs in base python, caches based on access.
    Does this make sense for signed 64bit ints?"""
    i = integer % (2**32)
    i -= i << 6
    i ^= i >> 17
    i -= i << 9
    i ^= i << 4
    i -= i << 3
    i ^= i << 10
    i ^= i >> 15
    return i


## normalizers.py

In [None]:
""" Text-based normalizers, used to mitigate simple attacks against watermarking.

This implementation is unlikely to be a complete list of all possible exploits within the unicode standard,
it represents our best effort at the time of writing.

These normalizers can be used as stand-alone normalizers. They could be made to conform to HF tokenizers standard, but that would
require messing with the limited rust interface of tokenizers.NormalizedString
"""
from collections import defaultdict
from functools import cache

import re
import unicodedata


def normalization_strategy_lookup(strategy_name: str) -> object:
    if strategy_name == "unicode":
        return UnicodeSanitizer()
    elif strategy_name == "homoglyphs":
        return HomoglyphCanonizer()
    elif strategy_name == "truecase":
        return TrueCaser()


class HomoglyphCanonizer:
    """Attempts to detect homoglyph attacks and find a consistent canon.

    This function does so on a per-ISO-category level. Language-level would also be possible (see commented code).
    """

    def __init__(self):
        self.homoglyphs = None

    def __call__(self, homoglyphed_str: str) -> str:
        # find canon:
        target_category, all_categories = self._categorize_text(homoglyphed_str)
        homoglyph_table = self._select_canon_category_and_load(target_category, all_categories)
        return self._sanitize_text(target_category, homoglyph_table, homoglyphed_str)

    def _categorize_text(self, text: str) -> dict:
        iso_categories = defaultdict(int)
        # self.iso_languages = defaultdict(int)

        for char in text:
            iso_categories[Categories.detect(char)] += 1
            # for lang in hg.Languages.detect(char):
            #     self.iso_languages[lang] += 1
        target_category = max(iso_categories, key=iso_categories.get)
        all_categories = tuple(iso_categories)
        return target_category, all_categories

    @cache
    def _select_canon_category_and_load(
        self, target_category: str, all_categories: tuple[str]
    ) -> dict:
        homoglyph_table = Homoglyphs(
            categories=(target_category, "COMMON")
        )  # alphabet loaded here from file

        source_alphabet = Categories.get_alphabet(all_categories)
        restricted_table = homoglyph_table.get_restricted_table(
            source_alphabet, homoglyph_table.alphabet
        )  # table loaded here from file
        return restricted_table

    def _sanitize_text(
        self, target_category: str, homoglyph_table: dict, homoglyphed_str: str
    ) -> str:
        sanitized_text = ""
        for char in homoglyphed_str:
            # langs = hg.Languages.detect(char)
            cat = Categories.detect(char)
            if target_category in cat or "COMMON" in cat or len(cat) == 0:
                sanitized_text += char
            else:
                sanitized_text += list(homoglyph_table[char])[0]
        return sanitized_text


class UnicodeSanitizer:
    """Regex-based unicode sanitzer. Has different levels of granularity.

    * ruleset="whitespaces"    - attempts to remove only whitespace unicode characters
    * ruleset="IDN.blacklist"  - does its best to remove unusual unicode based on  Network.IDN.blacklist characters
    * ruleset="ascii"          - brute-forces all text into ascii

    This is unlikely to be a comprehensive list.

    You can find a more comprehensive discussion at https://www.unicode.org/reports/tr36/
    and https://www.unicode.org/faq/security.html
    """

    def __init__(self, ruleset="whitespaces"):
        if ruleset == "whitespaces":
            """Documentation:
            \u00A0: Non-breaking space
            \u1680: Ogham space mark
            \u180E: Mongolian vowel separator
            \u2000-\u200B: Various space characters, including en space, em space, thin space, hair space, zero-width space, and zero-width non-joiner
            \u200C\u200D: Zero-width non-joiner and zero-width joiner
            \u200E,\u200F: Left-to-right-mark, Right-to-left-mark
            \u2060: Word joiner
            \u2063: Invisible separator
            \u202F: Narrow non-breaking space
            \u205F: Medium mathematical space
            \u3000: Ideographic space
            \uFEFF: Zero-width non-breaking space
            \uFFA0: Halfwidth hangul filler
            \uFFF9\uFFFA\uFFFB: Interlinear annotation characters
            \uFE00-\uFE0F: Variation selectors
            \u202A-\u202F: Embedding characters
            \u3164: Korean hangul filler.

            Note that these characters are not always superfluous whitespace characters!
            """

            self.pattern = re.compile(
                r"[\u00A0\u1680\u180E\u2000-\u200B\u200C\u200D\u200E\u200F\u2060\u2063\u202F\u205F\u3000\uFEFF\uFFA0\uFFF9\uFFFA\uFFFB"
                r"\uFE00\uFE01\uFE02\uFE03\uFE04\uFE05\uFE06\uFE07\uFE08\uFE09\uFE0A\uFE0B\uFE0C\uFE0D\uFE0E\uFE0F\u3164\u202A\u202B\u202C\u202D"
                r"\u202E\u202F]"
            )
        elif ruleset == "IDN.blacklist":
            """Documentation:
            [\u00A0\u1680\u180E\u2000-\u200B\u202F\u205F\u2060\u2063\uFEFF]: Matches any whitespace characters in the Unicode character
                        set that are included in the IDN blacklist.
            \uFFF9-\uFFFB: Matches characters that are not defined in Unicode but are used as language tags in various legacy encodings.
                        These characters are not allowed in domain names.
            \uD800-\uDB7F: Matches the first part of a surrogate pair. Surrogate pairs are used to represent characters in the Unicode character
                        set that cannot be represented by a single 16-bit value. The first part of a surrogate pair is in the range U+D800 to U+DBFF,
                        and the second part is in the range U+DC00 to U+DFFF.
            \uDB80-\uDBFF][\uDC00-\uDFFF]?: Matches the second part of a surrogate pair. The second part of a surrogate pair is in the range U+DC00
                        to U+DFFF, and is optional.
            [\uDB40\uDC20-\uDB40\uDC7F][\uDC00-\uDFFF]: Matches certain invalid UTF-16 sequences which should not appear in IDNs.
            """

            self.pattern = re.compile(
                r"[\u00A0\u1680\u180E\u2000-\u200B\u202F\u205F\u2060\u2063\uFEFF\uFFF9-\uFFFB\uD800-\uDB7F\uDB80-\uDBFF]"
                r"[\uDC00-\uDFFF]?|[\uDB40\uDC20-\uDB40\uDC7F][\uDC00-\uDFFF]"
            )
        else:
            """Documentation:
            This is a simple restriction to "no-unicode", using only ascii characters. Control characters are included.
            """
            self.pattern = re.compile(r"[^\x00-\x7F]+")

    def __call__(self, text: str) -> str:
        text = unicodedata.normalize("NFC", text)  # canon forms
        text = self.pattern.sub(" ", text)  # pattern match
        text = re.sub(" +", " ", text)  # collapse whitespaces
        text = "".join(
            c for c in text if unicodedata.category(c) != "Cc"
        )  # Remove any remaining non-printable characters
        return text


class TrueCaser:
    """True-casing, is a capitalization normalization that returns text to its original capitalization.

    This defends against attacks that wRIte TeXt lIkE spOngBoB.

    Here, a simple POS-tagger is used.
    """

    uppercase_pos = ["PROPN"]  # Name POS tags that should be upper-cased

    def __init__(self, backend="spacy"):
        if backend == "spacy":
            import spacy

            self.nlp = spacy.load("en_core_web_sm")
            self.normalize_fn = self._spacy_truecasing
        else:
            from nltk import pos_tag, word_tokenize  # noqa
            import nltk

            nltk.download("punkt")
            nltk.download("averaged_perceptron_tagger")
            nltk.download("universal_tagset")
            self.normalize_fn = self._nltk_truecasing

    def __call__(self, random_capitalized_string: str) -> str:
        truecased_str = self.normalize_fn(random_capitalized_string)
        return truecased_str

    def _spacy_truecasing(self, random_capitalized_string: str):
        doc = self.nlp(random_capitalized_string.lower())
        POS = self.uppercase_pos
        truecased_str = "".join(
            [
                w.text_with_ws.capitalize() if w.pos_ in POS or w.is_sent_start else w.text_with_ws
                for w in doc
            ]
        )
        return truecased_str

    def _nltk_truecasing(self, random_capitalized_string: str):
        from nltk import pos_tag, word_tokenize
        import nltk

        nltk.download("punkt")
        nltk.download("averaged_perceptron_tagger")
        nltk.download("universal_tagset")
        POS = ["NNP", "NNPS"]

        tagged_text = pos_tag(word_tokenize(random_capitalized_string.lower()))
        truecased_str = " ".join([w.capitalize() if p in POS else w for (w, p) in tagged_text])
        return truecased_str


## watermark processor

In [None]:
# coding=utf-8
# Copyright 2023 Authors of "A Watermark for Large Language Models"
# available at https://arxiv.org/abs/2301.10226
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import annotations
import collections
from math import sqrt
from itertools import chain, tee
from functools import lru_cache

import scipy.stats
import torch
from tokenizers import Tokenizer
from transformers import LogitsProcessor


class WatermarkBase:
    def __init__(
        self,
        vocab: list[int] = None,
        gamma: float = 0.25,
        delta: float = 2.0,
        seeding_scheme: str = "selfhash",  # simple default, find more schemes in alternative_prf_schemes.py
        select_green_tokens: bool = True,  # should always be the default if not running in legacy mode
    ):
        # patch now that None could now maybe be passed as seeding_scheme
        if seeding_scheme is None:
            seeding_scheme = "selfhash"

        # Vocabulary setup
        self.vocab = vocab
        self.vocab_size = len(vocab)

        # Watermark behavior:
        self.gamma = gamma
        self.delta = delta
        self.rng = None
        self._initialize_seeding_scheme(seeding_scheme)
        # Legacy behavior:
        self.select_green_tokens = select_green_tokens

    def _initialize_seeding_scheme(self, seeding_scheme: str) -> None:
        """Initialize all internal settings of the seeding strategy from a colloquial, "public" name for the scheme."""
        self.prf_type, self.context_width, self.self_salt, self.hash_key = seeding_scheme_lookup(seeding_scheme)

    def _seed_rng(self, input_ids: torch.LongTensor) -> None:
        """Seed RNG from local context. Not batched, because the generators we use (like cuda.random) are not batched."""
        # Need to have enough context for seed generation
        # if input_ids.shape[-1] < self.context_width:
        #     raise ValueError(f"seeding_scheme requires at least a {self.context_width} token prefix to seed the RNG.")

        prf_key = prf_lookup[self.prf_type](input_ids[-self.context_width :], salt_key=self.hash_key)
        # enable for long, interesting streams of pseudorandom numbers: print(prf_key)
        self.rng.manual_seed(prf_key % (2**64 - 1))  # safeguard against overflow from long

    def _get_greenlist_ids(self, input_ids: torch.LongTensor) -> torch.LongTensor:
        """Seed rng based on local context width and use this information to generate ids on the green list."""
        self._seed_rng(input_ids)

        greenlist_size = int(self.vocab_size * self.gamma)
        vocab_permutation = torch.randperm(self.vocab_size, device=input_ids.device, generator=self.rng)
        if self.select_green_tokens:  # directly
            greenlist_ids = vocab_permutation[:greenlist_size]  # new
        else:  # select green via red
            greenlist_ids = vocab_permutation[(self.vocab_size - greenlist_size) :]  # legacy behavior
        return greenlist_ids


class WatermarkLogitsProcessor(WatermarkBase, LogitsProcessor):
    """LogitsProcessor modifying model output scores in a pipe. Can be used in any HF pipeline to modify scores to fit the watermark,
    but can also be used as a standalone tool inserted for any model producing scores inbetween model outputs and next token sampler.
    """

    def __init__(self, *args, store_spike_ents: bool = False, **kwargs):
        super().__init__(*args, **kwargs)

        self.store_spike_ents = store_spike_ents
        self.spike_entropies = None
        if self.store_spike_ents:
            self._init_spike_entropies()

    def _init_spike_entropies(self):
        alpha = torch.exp(torch.tensor(self.delta)).item()
        gamma = self.gamma

        self.z_value = ((1 - gamma) * (alpha - 1)) / (1 - gamma + (alpha * gamma))
        self.expected_gl_coef = (gamma * alpha) / (1 - gamma + (alpha * gamma))

        # catch for overflow when bias is "infinite"
        if alpha == torch.inf:
            self.z_value = 1.0
            self.expected_gl_coef = 1.0

    def _get_spike_entropies(self):
        spike_ents = [[] for _ in range(len(self.spike_entropies))]
        for b_idx, ent_tensor_list in enumerate(self.spike_entropies):
            for ent_tensor in ent_tensor_list:
                spike_ents[b_idx].append(ent_tensor.item())
        return spike_ents

    def _get_and_clear_stored_spike_ents(self):
        spike_ents = self._get_spike_entropies()
        self.spike_entropies = None
        return spike_ents

    def _compute_spike_entropy(self, scores):
        # precomputed z value in init
        probs = scores.softmax(dim=-1)
        denoms = 1 + (self.z_value * probs)
        renormed_probs = probs / denoms
        sum_renormed_probs = renormed_probs.sum()
        return sum_renormed_probs

    def _calc_greenlist_mask(self, scores: torch.FloatTensor, greenlist_token_ids) -> torch.BoolTensor:
        # Cannot lose loop, greenlists might have different lengths
        green_tokens_mask = torch.zeros_like(scores, dtype=torch.bool)
        for b_idx, greenlist in enumerate(greenlist_token_ids):
            if len(greenlist) > 0:
                green_tokens_mask[b_idx][greenlist] = True
        return green_tokens_mask

    def _bias_greenlist_logits(self, scores: torch.Tensor, greenlist_mask: torch.Tensor, greenlist_bias: float) -> torch.Tensor:
        scores[greenlist_mask] = scores[greenlist_mask] + greenlist_bias
        return scores

    def _score_rejection_sampling(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, tail_rule="fixed_compute") -> list[int]:
        """Generate greenlist based on current candidate next token. Reject and move on if necessary. Method not batched.
        This is only a partial version of Alg.3 "Robust Private Watermarking", as it always assumes greedy sampling. It will still (kinda)
        work for all types of sampling, but less effectively.
        To work efficiently, this function can switch between a number of rules for handling the distribution tail.
        These are not exposed by default.
        """
        sorted_scores, greedy_predictions = scores.sort(dim=-1, descending=True)

        final_greenlist = []
        for idx, prediction_candidate in enumerate(greedy_predictions):
            greenlist_ids = self._get_greenlist_ids(torch.cat([input_ids, prediction_candidate[None]], dim=0))  # add candidate to prefix
            if prediction_candidate in greenlist_ids:  # test for consistency
                final_greenlist.append(prediction_candidate)

            # What follows below are optional early-stopping rules for efficiency
            if tail_rule == "fixed_score":
                if sorted_scores[0] - sorted_scores[idx + 1] > self.delta:
                    break
            elif tail_rule == "fixed_list_length":
                if len(final_greenlist) == 10:
                    break
            elif tail_rule == "fixed_compute":
                if idx == 40:
                    break
            else:
                pass  # do not break early
        return torch.as_tensor(final_greenlist, device=input_ids.device)

    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
        """Call with previous context as input_ids, and scores for next token."""

        # this is lazy to allow us to co-locate on the watermarked model's device
        self.rng = torch.Generator(device=input_ids.device) if self.rng is None else self.rng

        # NOTE, it would be nice to get rid of this batch loop, but currently,
        # the seed and partition operations are not tensor/vectorized, thus
        # each sequence in the batch needs to be treated separately.

        list_of_greenlist_ids = [None for _ in input_ids]  # Greenlists could differ in length
        for b_idx, input_seq in enumerate(input_ids):
            if self.self_salt:
                greenlist_ids = self._score_rejection_sampling(input_seq, scores[b_idx])
            else:
                greenlist_ids = self._get_greenlist_ids(input_seq)
            list_of_greenlist_ids[b_idx] = greenlist_ids

            # logic for computing and storing spike entropies for analysis
            if self.store_spike_ents:
                if self.spike_entropies is None:
                    self.spike_entropies = [[] for _ in range(input_ids.shape[0])]
                self.spike_entropies[b_idx].append(self._compute_spike_entropy(scores[b_idx]))

        green_tokens_mask = self._calc_greenlist_mask(scores=scores, greenlist_token_ids=list_of_greenlist_ids)
        scores = self._bias_greenlist_logits(scores=scores, greenlist_mask=green_tokens_mask, greenlist_bias=self.delta)

        return scores


class WatermarkDetector(WatermarkBase):
    """This is the detector for all watermarks imprinted with WatermarkLogitsProcessor.

    The detector needs to be given the exact same settings that were given during text generation  to replicate the watermark
    greenlist generation and so detect the watermark.
    This includes the correct device that was used during text generation, the correct tokenizer, the correct
    seeding_scheme name, and parameters (delta, gamma).

    Optional arguments are
    * normalizers ["unicode", "homoglyphs", "truecase"] -> These can mitigate modifications to generated text that could trip the watermark
    * ignore_repeated_ngrams -> This option changes the detection rules to count every unique ngram only once.
    * z_threshold -> Changing this threshold will change the sensitivity of the detector.
    """

    def __init__(
        self,
        *args,
        device: torch.device = None,
        tokenizer: Tokenizer = None,
        z_threshold: float = 4.0,
        normalizers: list[str] = ["unicode"],  # or also: ["unicode", "homoglyphs", "truecase"]
        ignore_repeated_ngrams: bool = True,
        **kwargs,
    ):
        super().__init__(*args, **kwargs)
        # also configure the metrics returned/preprocessing options
        assert device, "Must pass device"
        assert tokenizer, "Need an instance of the generating tokenizer to perform detection"

        self.tokenizer = tokenizer
        self.device = device
        self.z_threshold = z_threshold
        self.rng = torch.Generator(device=self.device)

        self.normalizers = []
        for normalization_strategy in normalizers:
            self.normalizers.append(normalization_strategy_lookup(normalization_strategy))
        self.ignore_repeated_ngrams = ignore_repeated_ngrams

    def dummy_detect(
        self,
        return_prediction: bool = True,
        return_scores: bool = True,
        z_threshold: float = None,
        return_num_tokens_scored: bool = True,
        return_num_green_tokens: bool = True,
        return_green_fraction: bool = True,
        return_green_token_mask: bool = False,
        return_all_window_scores: bool = False,
        return_z_score: bool = True,
        return_z_at_T: bool = True,
        return_p_value: bool = True,
    ):
        # HF-style output dictionary
        score_dict = dict()
        if return_num_tokens_scored:
            score_dict.update(dict(num_tokens_scored=float("nan")))
        if return_num_green_tokens:
            score_dict.update(dict(num_green_tokens=float("nan")))
        if return_green_fraction:
            score_dict.update(dict(green_fraction=float("nan")))
        if return_z_score:
            score_dict.update(dict(z_score=float("nan")))
        if return_p_value:
            z_score = score_dict.get("z_score")
            if z_score is None:
                z_score = float("nan")
            score_dict.update(dict(p_value=float("nan")))
        if return_green_token_mask:
            score_dict.update(dict(green_token_mask=[]))
        if return_all_window_scores:
            score_dict.update(dict(window_list=[]))
        if return_z_at_T:
            score_dict.update(dict(z_score_at_T=torch.tensor([])))

        output_dict = {}
        if return_scores:
            output_dict.update(score_dict)
        # if passed return_prediction then perform the hypothesis test and return the outcome
        if return_prediction:
            z_threshold = z_threshold if z_threshold else self.z_threshold
            assert z_threshold is not None, "Need a threshold in order to decide outcome of detection test"
            output_dict["prediction"] = False

        return output_dict

    def _compute_z_score(self, observed_count, T):
        # count refers to number of green tokens, T is total number of tokens
        expected_count = self.gamma
        numer = observed_count - expected_count * T
        denom = sqrt(T * expected_count * (1 - expected_count))
        z = numer / denom
        return z

    def _compute_p_value(self, z):
        p_value = scipy.stats.norm.sf(z)
        return p_value

    @lru_cache(maxsize=2**32)
    def _get_ngram_score_cached(self, prefix: tuple[int], target: int):
        """Expensive re-seeding and sampling is cached."""
        # Handle with care, should ideally reset on __getattribute__ access to self.prf_type, self.context_width, self.self_salt, self.hash_key
        greenlist_ids = self._get_greenlist_ids(torch.as_tensor(prefix, device=self.device))
        return True if target in greenlist_ids else False

    def _score_ngrams_in_passage(self, input_ids: torch.Tensor):
        """Core function to gather all ngrams in the input and compute their watermark."""
        if len(input_ids) - self.context_width < 1:
            raise ValueError(
                f"Must have at least {1} token to score after "
                f"the first min_prefix_len={self.context_width} tokens required by the seeding scheme."
            )

        # Compute scores for all ngrams contexts in the passage:
        token_ngram_generator = ngrams(input_ids.cpu().tolist(), self.context_width + 1 - self.self_salt)
        frequencies_table = collections.Counter(token_ngram_generator)
        ngram_to_watermark_lookup = {}
        for idx, ngram_example in enumerate(frequencies_table.keys()):
            prefix = ngram_example if self.self_salt else ngram_example[:-1]
            target = ngram_example[-1]
            ngram_to_watermark_lookup[ngram_example] = self._get_ngram_score_cached(prefix, target)

        return ngram_to_watermark_lookup, frequencies_table

    def _get_green_at_T_booleans(self, input_ids, ngram_to_watermark_lookup) -> tuple[torch.Tensor]:
        """Generate binary list of green vs. red per token, a separate list that ignores repeated ngrams, and a list of offsets to
        convert between both representations:
        green_token_mask = green_token_mask_unique[offsets] except for all locations where otherwise a repeat would be counted
        """
        green_token_mask, green_token_mask_unique, offsets = [], [], []
        used_ngrams = {}
        unique_ngram_idx = 0
        ngram_examples = ngrams(input_ids.cpu().tolist(), self.context_width + 1 - self.self_salt)

        for idx, ngram_example in enumerate(ngram_examples):
            green_token_mask.append(ngram_to_watermark_lookup[ngram_example])
            if self.ignore_repeated_ngrams:
                if ngram_example in used_ngrams:
                    pass
                else:
                    used_ngrams[ngram_example] = True
                    unique_ngram_idx += 1
                    green_token_mask_unique.append(ngram_to_watermark_lookup[ngram_example])
            else:
                green_token_mask_unique.append(ngram_to_watermark_lookup[ngram_example])
                unique_ngram_idx += 1
            offsets.append(unique_ngram_idx - 1)
        return (
            torch.tensor(green_token_mask),
            torch.tensor(green_token_mask_unique),
            torch.tensor(offsets),
        )

    def _score_sequence(
        self,
        input_ids: torch.Tensor,
        return_num_tokens_scored: bool = True,
        return_num_green_tokens: bool = True,
        return_green_fraction: bool = True,
        return_green_token_mask: bool = False,
        return_z_score: bool = True,
        return_z_at_T: bool = True,
        return_p_value: bool = True,
    ):
        ngram_to_watermark_lookup, frequencies_table = self._score_ngrams_in_passage(input_ids)
        green_token_mask, green_unique, offsets = self._get_green_at_T_booleans(input_ids, ngram_to_watermark_lookup)

        # Count up scores over all ngrams
        if self.ignore_repeated_ngrams:
            # Method that only counts a green/red hit once per unique ngram.
            # New num total tokens scored (T) becomes the number unique ngrams.
            # We iterate over all unqiue token ngrams in the input, computing the greenlist
            # induced by the context in each, and then checking whether the last
            # token falls in that greenlist.
            num_tokens_scored = len(frequencies_table.keys())
            green_token_count = sum(ngram_to_watermark_lookup.values())
        else:
            num_tokens_scored = sum(frequencies_table.values())
            assert num_tokens_scored == len(input_ids) - self.context_width + self.self_salt
            green_token_count = sum(freq * outcome for freq, outcome in zip(frequencies_table.values(), ngram_to_watermark_lookup.values()))
        assert green_token_count == green_unique.sum()

        # HF-style output dictionary
        score_dict = dict()
        if return_num_tokens_scored:
            score_dict.update(dict(num_tokens_scored=num_tokens_scored))
        if return_num_green_tokens:
            score_dict.update(dict(num_green_tokens=green_token_count))
        if return_green_fraction:
            score_dict.update(dict(green_fraction=(green_token_count / num_tokens_scored)))
        if return_z_score:
            score_dict.update(dict(z_score=self._compute_z_score(green_token_count, num_tokens_scored)))
        if return_p_value:
            z_score = score_dict.get("z_score")
            if z_score is None:
                z_score = self._compute_z_score(green_token_count, num_tokens_scored)
            score_dict.update(dict(p_value=self._compute_p_value(z_score)))
        if return_green_token_mask:
            score_dict.update(dict(green_token_mask=green_token_mask.tolist()))
        if return_z_at_T:
            # Score z_at_T separately:
            sizes = torch.arange(1, len(green_unique) + 1)
            seq_z_score_enum = torch.cumsum(green_unique, dim=0) - self.gamma * sizes
            seq_z_score_denom = torch.sqrt(sizes * self.gamma * (1 - self.gamma))
            z_score_at_effective_T = seq_z_score_enum / seq_z_score_denom
            z_score_at_T = z_score_at_effective_T[offsets]
            assert torch.isclose(z_score_at_T[-1], torch.tensor(z_score))

            score_dict.update(dict(z_score_at_T=z_score_at_T))

        return score_dict

    def _score_windows_impl_batched(
        self,
        input_ids: torch.Tensor,
        window_size: str,
        window_stride: int = 1,
    ):
        # Implementation details:
        # 1) --ignore_repeated_ngrams is applied globally, and windowing is then applied over the reduced binary vector
        #      this is only one way of doing it, another would be to ignore bigrams within each window (maybe harder to parallelize that)
        # 2) These windows on the binary vector of green/red hits, independent of context_width, in contrast to Kezhi's first implementation
        # 3) z-scores from this implementation cannot be directly converted to p-values, and should only be used as labels for a
        #    ROC chart that calibrates to a chosen FPR. Due, to windowing, the multiple hypotheses will increase scores across the board#
        #    naive_count_correction=True is a partial remedy to this

        ngram_to_watermark_lookup, frequencies_table = self._score_ngrams_in_passage(input_ids)
        green_mask, green_ids, offsets = self._get_green_at_T_booleans(input_ids, ngram_to_watermark_lookup)
        len_full_context = len(green_ids)

        partial_sum_id_table = torch.cumsum(green_ids, dim=0)

        if window_size == "max":
            # could start later, small window sizes cannot generate enough power
            # more principled: solve (T * Spike_Entropy - g * T) / sqrt(T * g * (1 - g)) = z_thresh for T
            sizes = range(1, len_full_context)
        else:
            sizes = [int(x) for x in window_size.split(",") if len(x) > 0]

        z_score_max_per_window = torch.zeros(len(sizes))
        cumulative_eff_z_score = torch.zeros(len_full_context)
        s = window_stride

        window_fits = False
        for idx, size in enumerate(sizes):
            if size <= len_full_context:
                # Compute hits within window for all positions in parallel:
                window_score = torch.zeros(len_full_context - size + 1, dtype=torch.long)
                # Include 0-th window
                window_score[0] = partial_sum_id_table[size - 1]
                # All other windows from the 1st:
                window_score[1:] = partial_sum_id_table[size::s] - partial_sum_id_table[:-size:s]

                # Now compute batched z_scores
                batched_z_score_enum = window_score - self.gamma * size
                z_score_denom = sqrt(size * self.gamma * (1 - self.gamma))
                batched_z_score = batched_z_score_enum / z_score_denom

                # And find the maximal hit
                maximal_z_score = batched_z_score.max()
                z_score_max_per_window[idx] = maximal_z_score

                z_score_at_effective_T = torch.cummax(batched_z_score, dim=0)[0]
                cumulative_eff_z_score[size::s] = torch.maximum(cumulative_eff_z_score[size::s], z_score_at_effective_T[:-1])
                window_fits = True  # successful computation for any window in sizes

        if not window_fits:
            raise ValueError(
                f"Could not find a fitting window with window sizes {window_size} for (effective) context length {len_full_context}."
            )

        # Compute optimal window size and z-score
        cumulative_z_score = cumulative_eff_z_score[offsets]
        optimal_z, optimal_window_size_idx = z_score_max_per_window.max(dim=0)
        optimal_window_size = sizes[optimal_window_size_idx]
        return (
            optimal_z,
            optimal_window_size,
            z_score_max_per_window,
            cumulative_z_score,
            green_mask,
        )

    def _score_sequence_window(
        self,
        input_ids: torch.Tensor,
        return_num_tokens_scored: bool = True,
        return_num_green_tokens: bool = True,
        return_green_fraction: bool = True,
        return_green_token_mask: bool = False,
        return_z_score: bool = True,
        return_z_at_T: bool = True,
        return_p_value: bool = True,
        window_size: str = None,
        window_stride: int = 1,
    ):
        (
            optimal_z,
            optimal_window_size,
            _,
            z_score_at_T,
            green_mask,
        ) = self._score_windows_impl_batched(input_ids, window_size, window_stride)

        # HF-style output dictionary
        score_dict = dict()
        if return_num_tokens_scored:
            score_dict.update(dict(num_tokens_scored=optimal_window_size))

        denom = sqrt(optimal_window_size * self.gamma * (1 - self.gamma))
        green_token_count = int(optimal_z * denom + self.gamma * optimal_window_size)
        green_fraction = green_token_count / optimal_window_size
        if return_num_green_tokens:
            score_dict.update(dict(num_green_tokens=green_token_count))
        if return_green_fraction:
            score_dict.update(dict(green_fraction=green_fraction))
        if return_z_score:
            score_dict.update(dict(z_score=optimal_z))
        if return_z_at_T:
            score_dict.update(dict(z_score_at_T=z_score_at_T))
        if return_p_value:
            z_score = score_dict.get("z_score", optimal_z)
            score_dict.update(dict(p_value=self._compute_p_value(z_score)))

        # Return per-token results for mask. This is still the same, just scored by windows
        # todo would be to mark the actually counted tokens differently
        if return_green_token_mask:
            score_dict.update(dict(green_token_mask=green_mask.tolist()))

        return score_dict

    def detect(
        self,
        text: str = None,
        tokenized_text: list[int] = None,
        window_size: str = None,
        window_stride: int = None,
        return_prediction: bool = True,
        return_scores: bool = True,
        z_threshold: float = None,
        convert_to_float: bool = False,
        **kwargs,
    ) -> dict:
        """Scores a given string of text and returns a dictionary of results."""

        assert (text is not None) ^ (tokenized_text is not None), "Must pass either the raw or tokenized string"
        if return_prediction:
            kwargs["return_p_value"] = True  # to return the "confidence":=1-p of positive detections

        # run optional normalizers on text
        for normalizer in self.normalizers:
            text = normalizer(text)
        if len(self.normalizers) > 0:
            print(f"Text after normalization:\n\n{text}\n")

        if tokenized_text is None:
            assert self.tokenizer is not None, (
                "Watermark detection on raw string ",
                "requires an instance of the tokenizer ",
                "that was used at generation time.",
            )
            print(text)
            tokenized_text = self.tokenizer(text, return_tensors="pt", add_special_tokens=False)["input_ids"][0].to(self.device)
            if tokenized_text[0] == self.tokenizer.bos_token_id:
                tokenized_text = tokenized_text[1:]
        else:
            # try to remove the bos_tok at beginning if it's there
            if (self.tokenizer is not None) and (tokenized_text[0] == self.tokenizer.bos_token_id):
                tokenized_text = tokenized_text[1:]

        # call score method
        output_dict = {}

        if window_size is not None:
            # assert window_size <= len(tokenized_text) cannot assert for all new types
            score_dict = self._score_sequence_window(
                tokenized_text,
                window_size=window_size,
                window_stride=window_stride,
                **kwargs,
            )
            output_dict.update(score_dict)
        else:
            score_dict = self._score_sequence(tokenized_text, **kwargs)
        if return_scores:
            output_dict.update(score_dict)
        # if passed return_prediction then perform the hypothesis test and return the outcome
        if return_prediction:
            z_threshold = z_threshold if z_threshold else self.z_threshold
            assert z_threshold is not None, "Need a threshold in order to decide outcome of detection test"
            output_dict["prediction"] = score_dict["z_score"] > z_threshold
            if output_dict["prediction"]:
                output_dict["confidence"] = 1 - score_dict["p_value"]

        # convert any numerical values to float if requested
        if convert_to_float:
            for key, value in output_dict.items():
                if isinstance(value, int):
                    output_dict[key] = float(value)

        return output_dict


##########################################################################
# Ngram iteration from nltk, extracted to remove the dependency
# Natural Language Toolkit: Utility functions
#
# Copyright (C) 2001-2023 NLTK Project
# Author: Steven Bird <stevenbird1@gmail.com>
#         Eric Kafe <kafe.eric@gmail.com> (acyclic closures)
# URL: <https://www.nltk.org/>
# For license information, see https://github.com/nltk/nltk/blob/develop/LICENSE.txt
##########################################################################


def ngrams(sequence, n, pad_left=False, pad_right=False, pad_symbol=None):
    sequence = iter(sequence)
    if pad_left:
        sequence = chain((pad_symbol,) * (n - 1), sequence)
    if pad_right:
        sequence = chain(sequence, (pad_symbol,) * (n - 1))
    iterables = tee(sequence, n)

    for i, sub_iterable in enumerate(iterables):  # For each window,
        for _ in range(i):  # iterate through every order of ngrams
            next(sub_iterable, None)  # generate the ngrams within the window.
    return zip(*iterables)  # Unpack and flattens the iterables.


In [None]:
from pprint import pprint
from functools import partial

import numpy # for gradio hot reload
import gradio as gr

import torch

def generate(prompt, model=None, device=None, tokenizer=None, summerizer=None):
    """Instatiate the WatermarkLogitsProcessor according to the watermark parameters
       and generate watermarked text by passing it to the generate method of the model
       as a logits processor. """

    watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
                                               gamma=0.25,
                                               delta=100.0,
                                               seeding_scheme="selfhash")

    generate_with_watermark = partial(
        model.generate,
        logits_processor=LogitsProcessorList([watermark_processor]),
    )


    # summarized_output = summarizer(prompt, max_length=200)

    tokd_input = tokenizer(prompt, return_tensors="pt").to(device)
    torch.manual_seed(123)
    output_without_watermark = generate_with_watermark(**tokd_input)

    decoded_output_without_watermark = tokenizer.batch_decode(output_without_watermark)[0]

    return decoded_output_without_watermark


# Generator

In [None]:
from functools import partial

def generate_with_watermark(
    prompt,
    model=None,
    device=None,
    tokenizer=None,
    gamma=None,
    delta=None
  ):
    watermark_processor = WatermarkLogitsProcessor(
        vocab=list(tokenizer.get_vocab().values()),
        gamma=gamma,
        delta=delta,
        seeding_scheme="selfhash"
      )
    kwargs = dict(
        do_sample=True,
        top_k=0,
        temperature=0.7
    )
    generate_with_watermark = partial(
        model.generate,
        logits_processor=LogitsProcessorList([watermark_processor]),
        **kwargs
    )


    tokd_input = tokenizer(prompt, return_tensors="pt").to(device)
    output = generate_with_watermark(max_length=500, **tokd_input)
    decoded_watermarked_output = tokenizer.batch_decode(output)[0]

    return decoded_watermarked_output

def generate_without_watermark(
    prompt,
    model=None,
    device=None,
    tokenizer=None,
  ):
    kwargs = dict(
        do_sample=True,
        top_k=0,
        temperature=0.7
    )

    generate_with_watermark = partial(
        model.generate,
        **kwargs
    )


    tokd_input = tokenizer(prompt, return_tensors="pt").to(device)
    output = generate_with_watermark(max_length=500, **tokd_input)
    decoded_watermarked_output = tokenizer.batch_decode(output)[0]

    return decoded_watermarked_output

def generate(
    prompt,
    model=None,
    device=None,
    tokenizer=None,
    gamma=None,
    delta=None
  ):

    device = "cuda" if torch.cuda.is_available() else "cpu"
    generated_without_watermark = generate_without_watermark(
      prompt=prompt,
      model=model,
      device=device,
      tokenizer=tokenizer,
    )

    generated_with_watermark = generate_with_watermark(
      prompt=prompt,
      model=model,
      device=device,
      tokenizer=tokenizer,
      gamma=gamma,
      delta=delta
    )

    return generated_with_watermark, generated_without_watermark

# Detector

In [None]:
def format_names(s):
    """Format names for the gradio demo interface"""
    s=s.replace("num_tokens_scored","Tokens Counted (T)")
    s=s.replace("num_green_tokens","# Tokens in Greenlist")
    s=s.replace("green_fraction","Fraction of T in Greenlist")
    s=s.replace("z_score","z-score")
    s=s.replace("p_value","p value")
    s=s.replace("prediction","Prediction")
    s=s.replace("confidence","Confidence")
    return s

def list_format_scores(score_dict, detection_threshold):
    """Format the detection metrics into a gradio dataframe input format"""
    lst_2d = []
    # lst_2d.append(["z-score threshold", f"{detection_threshold}"])
    for k,v in score_dict.items():
        if k=='green_fraction':
            lst_2d.append([format_names(k), f"{v:.1%}"])
        elif k=='confidence':
            lst_2d.append([format_names(k), f"{v:.3%}"])
        elif isinstance(v, float):
            lst_2d.append([format_names(k), f"{v:.3g}"])
        elif isinstance(v, bool):
            lst_2d.append([format_names(k), ("Watermarked" if v else "Human/Unwatermarked")])
        else:
            lst_2d.append([format_names(k), f"{v}"])
    if "confidence" in score_dict:
        lst_2d.insert(-2,["z-score Threshold", f"{detection_threshold}"])
    else:
        lst_2d.insert(-1,["z-score Threshold", f"{detection_threshold}"])
    return lst_2d

def detect_watermark(input_text, device=None, tokenizer=None):
    """Instantiate the WatermarkDetection object and call detect on
        the input text returning the scores and outcome of the test"""
    watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                        gamma=0.25, # should match original setting
                                        seeding_scheme="selfhash", # should match original setting
                                        device=model.device, # must match the original rng device type
                                        tokenizer=tokenizer,
                                        z_threshold=4.0,
                                        )

    # if len(input_text)-1 > watermark_detector.min_prefix_len:
    score_dict = watermark_detector.detect(input_text)
    # output = str_format_scores(score_dict, watermark_detector.z_threshold)
    output = list_format_scores(score_dict, watermark_detector.z_threshold)
    # else:
    #     # output = (f"Error: string not long enough to compute watermark presence.")
    #     output = [["Error","string too short to compute metrics"]]
    #     output += [["",""] for _ in range(6)]
    return output


# BART

In [None]:
from transformers import (AutoTokenizer,
                          AutoModelForSeq2SeqLM,
                          AutoModelForCausalLM,
                          LogitsProcessorList)

model = AutoModelForSeq2SeqLM.from_pretrained('facebook/bart-large-cnn')
tokenizer = AutoTokenizer.from_pretrained('facebook/bart-large-cnn')

Downloading (…)lve/main/config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

Downloading (…)neration_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
ARTICLE = """ New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York.
A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband.
Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other.
In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage.
Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the
2010 marriage license application, according to court documents.
Prosecutors said the marriages were part of an immigration scam.
On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further.
After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective
Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002.
All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say.
Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages.
Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted.
The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s
Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali.
Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force.
If convicted, Barrientos faces up to four years in prison.  Her next court appearance is scheduled for May 18.
"""


In [None]:
w_result, n_result = generate(
    prompt=ARTICLE,
    model=model,
    tokenizer=tokenizer,
    gamma=0.25,
    delta=5.0
)

In [None]:
w_result

'She has married up to eight times over the course of a 16 year span, prosecutors say. Her husband has been sent home by the FBI. She was arrested last year for allegedly stealing trains in the New York metro and then leaving them in the Bronx. She has appeared before a court this week in her first marriage has failed, court records say.'

In [None]:
n_result

'Liana Barrientos, 39, is charged with two counts of "offering a false instrument for filing in the first degree" In total, she has been married 10 times, with nine of her marriages occurring between 1999 and 2002. She is believed to still be married to four men, and at one time, she was married to eight men.'

In [None]:
detect_watermark(n_result, model.device, tokenizer)

Liana Barrientos, 39, is charged with two counts of "offering a false instrument for filing in the first degree" In total, she has been married 10 times, with nine of her marriages occurring between 1999 and 2002. She is believed to still be married to four men, and at one time, she was married to eight men.


[['Tokens Counted (T)', '68'],
 ['# Tokens in Greenlist', '17'],
 ['Fraction of T in Greenlist', '25.0%'],
 ['z-score', '0'],
 ['p value', '0.5'],
 ['z-score_at_T',
  'tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -0.8165,\n        -0.9623, -0.3651, -0.5222, -0.6667, -0.8006, -0.9258, -0.4472,  0.0000,\n        -0.1400,  0.2722,  0.1325,  0.5164,  0.8819,  1.2309,  1.5650,  1.4142,\n         1.2702,  1.1323,  1.0000,  0.8729,  1.1793,  1.0541,  1.3480,  1.2247,\n         1.1055,  0.9901,  0.8783,  0.7698,  0.6644,  0.5620,  0.4623,  0.7303,\n         0.6312,  0.8909,  0.7924,  0.6963,  0.6025,  0.5108,  0.4211,  0.6667,\n         0.5774,  0.4899,  0.4042,  0.6405,  0.5551,  0.4714,  0.3892,  0.3086,\n         0.2294,  0.1516,  0.0752,  0.0000, -0.0739,  0.1466,  0.0727,  0.0000,\n        -0.0716,  0.1421,  0.0705,  0.0000])'],
 ['z-score Threshold', '4.0'],
 ['Prediction', 'Human/Unwatermarked']]

In [None]:
detect_watermark(w_result, model.device, tokenizer)

She has married up to eight times over the course of a 16 year span, prosecutors say. Her husband has been sent home by the FBI. She was arrested last year for allegedly stealing trains in the New York metro and then leaving them in the Bronx. She has appeared before a court this week in her first marriage has failed, court records say.


[['Tokens Counted (T)', '67'],
 ['# Tokens in Greenlist', '62'],
 ['Fraction of T in Greenlist', '92.5%'],
 ['z-score', '12.8'],
 ['p value', '1.26e-37'],
 ['z-score_at_T',
  'tensor([ 1.7321,  2.4495,  3.0000,  3.4641,  3.8730,  4.2426,  4.5826,  4.0825,\n         4.4264,  4.7469,  5.0483,  5.3333,  5.6045,  5.8635,  6.1119,  5.7735,\n         6.0212,  6.2598,  6.4902,  6.7132,  6.9293,  7.1393,  7.3435,  7.5425,\n         7.7365,  7.9259,  8.1111,  8.2923,  8.4697,  8.6436,  8.8141,  8.9815,\n         9.1458,  9.3074,  9.4662,  9.6225,  9.7763,  9.9278,  9.7073,  9.8590,\n        10.0085, 10.1559, 10.3013, 10.4447, 10.5862, 10.7258, 10.8638, 10.6667,\n        10.8047, 10.9411, 11.0758, 11.2090, 11.3406, 11.4708, 11.5996, 11.7271,\n        11.8531, 11.9779, 12.1015, 12.2238, 12.3450, 12.4650, 12.5839, 12.7017,\n        12.8185, 12.9342, 12.7668])'],
 ['z-score Threshold', '4.0'],
 ['Prediction', 'Watermarked'],
 ['Confidence', '100.000%']]

# T5

In [None]:
! pip install --upgrade protobuf

Collecting protobuf
  Downloading protobuf-4.24.3-cp37-abi3-manylinux2014_x86_64.whl (311 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m311.6/311.6 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.20.3
    Uninstalling protobuf-3.20.3:
      Successfully uninstalled protobuf-3.20.3
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow-metadata 1.14.0 requires protobuf<4.21,>=3.20.3, but you have protobuf 4.24.3 which is incompatible.[0m[31m
[0mSuccessfully installed protobuf-4.24.3


In [None]:
!pip install sentencepiece

Collecting sentencepiece
  Downloading sentencepiece-0.1.99-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m12.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentencepiece
Successfully installed sentencepiece-0.1.99


In [None]:
from transformers import (AutoTokenizer,
                          AutoModelForSeq2SeqLM,
                          AutoModelForCausalLM,
                          LogitsProcessorList)


In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
import sentencepiece
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-small", device_map="auto",)


ImportError: ignored

# CNN Dataset

In [None]:
! pip install datasets

In [None]:
from datasets import load_dataset

dataset = load_dataset('cnn_dailymail', '3.0.0')

In [None]:
import pandas as pd

dataset = pd.DataFrame(dataset['train'])

In [None]:
dataset = dataset.sample(frac=1)

In [None]:
dataset

# Generate and Detect for a single article

In [None]:
! pip install lmppl

In [None]:
import lmppl

scorer = lmppl.EncoderDecoderLM('google/flan-t5-small')
inputs = [
    'sentiment classification: I dropped my laptop on my knee, and someone stole my coffee.',
    'sentiment classification: I dropped my laptop on my knee, and someone stole my coffee.'
]
outputs = [
    'I am fresh.',
    'I am sleepy.'
]
ppl = scorer.get_perplexity(input_texts=inputs, output_texts=outputs)
print(list(zip(outputs, ppl)))


In [None]:
ARTICLE = """ New York (CNN)When Liana Barrientos was 23 years old, she got married in Westchester County, New York.
A year later, she got married again in Westchester County, but to a different man and without divorcing her first husband.
Only 18 days after that marriage, she got hitched yet again. Then, Barrientos declared "I do" five more times, sometimes only within two weeks of each other.
In 2010, she married once more, this time in the Bronx. In an application for a marriage license, she stated it was her "first and only" marriage.
Barrientos, now 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," referring to her false statements on the
2010 marriage license application, according to court documents.
Prosecutors said the marriages were part of an immigration scam.
On Friday, she pleaded not guilty at State Supreme Court in the Bronx, according to her attorney, Christopher Wright, who declined to comment further.
After leaving court, Barrientos was arrested and charged with theft of service and criminal trespass for allegedly sneaking into the New York subway through an emergency exit, said Detective
Annette Markowski, a police spokeswoman. In total, Barrientos has been married 10 times, with nine of her marriages occurring between 1999 and 2002.
All occurred either in Westchester County, Long Island, New Jersey or the Bronx. She is believed to still be married to four men, and at one time, she was married to eight men at once, prosecutors say.
Prosecutors said the immigration scam involved some of her husbands, who filed for permanent residence status shortly after the marriages.
Any divorces happened only after such filings were approved. It was unclear whether any of the men will be prosecuted.
The case was referred to the Bronx District Attorney\'s Office by Immigration and Customs Enforcement and the Department of Homeland Security\'s
Investigation Division. Seven of the men are from so-called "red-flagged" countries, including Egypt, Turkey, Georgia, Pakistan and Mali.
Her eighth husband, Rashid Rajput, was deported in 2006 to his native Pakistan after an investigation by the Joint Terrorism Task Force.
If convicted, Barrientos faces up to four years in prison.  Her next court appearance is scheduled for May 18.
"""


In [None]:
w_result, n_result = generate(
    prompt=ARTICLE,
    model=model,
    tokenizer=tokenizer,
    gamma=0.25,
    delta=4.0
  )

In [None]:
n_result

'<pad> Liana Barrientos, 39, is facing two criminal counts of "offering a false instrument for filing in the first degree," a court spokesman said.</s>'

In [None]:
w_result

'<pad> A former student from New York, Lamar, is being sued for allegedly sneaking into the subway with four men on the subway.</s>'

In [None]:
detect_watermark(n_result, model.device, tokenizer)[3]

Text after normalization:

<pad> Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" He says he doesn't plan to spend money on fast cars, drink and celebrity parties. Radcliffe says he's keeping his feet firmly on the ground.</s>

<pad> Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" He says he doesn't plan to spend money on fast cars, drink and celebrity parties. Radcliffe says he's keeping his feet firmly on the ground.</s>


['z-score', '1.32']

In [None]:
scorer.get_perplexity(input_texts=ARTICLE, output_texts=n_result)

100%|██████████| 1/1 [00:00<00:00, 29.28it/s]


2.1947363876610075

In [None]:
detect_watermark(w_result, model.device, tokenizer)[3]

Text after normalization:

<pad> A former student from New York, Lamar, is being sued for allegedly sneaking into the subway with four men on the subway.</s>

<pad> A former student from New York, Lamar, is being sued for allegedly sneaking into the subway with four men on the subway.</s>


['z-score', '6.33']

In [None]:
scorer.get_perplexity(input_texts=ARTICLE, output_texts=w_result)


100%|██████████| 1/1 [00:00<00:00, 20.38it/s]


11.16093302189969

# Generate and Detect for CNN dataset

generate summerized text with T5 with watermark and without watermark with different delta

and also run detection algorithm on each generated summerized text

gamma = 0.25
delta = 5.0

In [None]:
results_by_delta = []
for d in range(4, 9):
  result = {
      'article_id': [],
      'highlights_ppl': [],
      'unwatermarked_ppl': [],
      'unwatermarked_z_score': [],
      'unwatermarked_false': [],
      'watermarked_ppl': [],
      'watermarked_z_score': [],
      'watermarked_false': [],
  }
  i = 0
  for idx, data in dataset.iterrows():
    print(i)
    if i == 20:
      break
    i += 1
    delta = d/2
    print("delta:", delta)
    ARTICLE = data['article']
    print('ARTICLE')
    print(ARTICLE)
    print('highlights')
    print(data['highlights'])
    ppl = scorer.get_perplexity(input_texts=ARTICLE, output_texts=data['highlights'])
    result['highlights_ppl'].append(ppl)
    result['article_id'].append(data['id'])
    w_result, n_result = generate(
      prompt=ARTICLE,
      model=model,
      tokenizer=tokenizer,
      gamma=0.25,
      delta=delta
    )

    print('Without Watermark')
    dw = detect_watermark(n_result, model.device, tokenizer)
    wrong = False
    if dw[3][1] > dw[6][1]:
      wrong = True
    ppl = scorer.get_perplexity(input_texts=ARTICLE, output_texts=n_result)
    result['unwatermarked_ppl'].append(ppl)
    result['unwatermarked_z_score'].append(dw[3][1])
    result['unwatermarked_false'].append(wrong)
    print(ppl)

    print('With Watermark')
    dw = detect_watermark(w_result, model.device, tokenizer)
    wrong = False
    if dw[3][1] <= dw[6][1]:
      wrong = True
    ppl = scorer.get_perplexity(input_texts=ARTICLE, output_texts=w_result)
    result['watermarked_ppl'].append(ppl)
    result['watermarked_z_score'].append(dw[3][1])
    result['watermarked_false'].append(wrong)
    print(ppl)

  print(result)
  result_df = pd.DataFrame(result)
  result_df.to_csv(f'/content/drive/MyDrive/watermark/delta_{str(delta)}.csv')
  results_by_delta



0
delta: 2.0
ARTICLE
(CNN) -- A Florida high school valedictorian and her sister, who were facing deportation, instead were meeting with with lawmakers Wednesday after being granted a reprieve. An immigration judge ruled last week that Daniela Pelaez, 18, and her sister Dayana were to be deported for being in the country illegally. But Immigration and Customs Enforcement on Tuesday gave the sisters a two-year reprieve. The decision was made under the policy of prosecutorial discretion, which is designed to prioritize deportation for illegal border crossers with a criminal record, instead of those who pose little or no risk. "The agency exercises prosecutorial discretion, on a case by case basis, as necessary to focus resources on our stated priorities," ICE spokesman Nestor Yglesias said in a statement Wednesday. The Pelaez sisters traveled to Washington to meet with Florida Sens. Marco Rubio and Bill Nelson, and Florida Reps. David Rivera and Frederica Wilson. Rubio, a Republican, had

100%|██████████| 1/1 [00:00<00:00, 13.63it/s]


Without Watermark
Text after normalization:

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 25.21it/s]

2.3679224856097196
With Watermark





Text after normalization:

<pad> Daniela Pelaez is a student from Florida.</s>

<pad> Daniela Pelaez is a student from Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 22.07it/s]


3.0982566621589194
1
delta: 2.0
ARTICLE
Editor's note: Erica Williams is deputy director of Campus Progress, a project of the Center for American Progress, a Washington-based organization that describes itself as dedicated to progressive causes. She works to engage the millennial generation and communities such as people of color, women and people of faith in the political process. She can be found at ericawilliamsonline.com and on Twitter at @ericawilliamsdc. Erica Williams says the idea that young people don't care about the health care issue is wrong. (CNN) -- As Congress returns to Capitol Hill, back from a recess of contentious town halls on health care reform, one new voice has the potential to break through the seemingly endless deadlock: the voice of young Americans. Just Thursday, there were more than 880,000 Facebook status updates posted with the meme of a demand for health care reform, generated organically and spread virally from young people and other Facebook users acros

100%|██████████| 1/1 [00:00<00:00, 14.70it/s]


Without Watermark
Text after normalization:

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>


100%|██████████| 1/1 [00:00<00:00, 25.75it/s]

16.153275564235432
With Watermark
Text after normalization:

<pad> Erica Williams: Young people don't care about health care reform, but it is a dangerous state of affairs for the larger debate. Williams: The debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: The debate is stalled and hemmed in by older Americans who are in a better economic position than young people.</s>

<pad> Erica Williams: Young people don't care about health care reform, but it is a dangerous state of affairs for the larger debate. Williams: The debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: The debate is stalled and hemmed in by older Americans who are in a better economic position than young people.</s>



100%|██████████| 1/1 [00:00<00:00, 25.30it/s]


10.711432880946136
2
delta: 2.0
ARTICLE
highlights
Anish Goel: The dilemma faced by the U.S. with Iraq could happen in Afghanistan .
He says: A weak Iraqi government with a poorly trained Army faces a fierce insurgency .
When U.S. troops leave Afghanistan, will government be able to stop Taliban, he asks .


100%|██████████| 1/1 [00:00<00:00, 25.13it/s]


Without Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>


100%|██████████| 1/1 [00:00<00:00, 27.86it/s]


2.4067782896530847
With Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way.</s>

<pad> The current chaos in Iraq is tragic in almost every way.</s>


100%|██████████| 1/1 [00:00<00:00, 29.71it/s]


5.317199299464056
3
delta: 2.0
ARTICLE
One in every 13 people on the planet check Facebook the moment they wake up - leading to reports of Facebook addiction and social anxiety. And, according to one researcher, the fault lies with the little red notification icons and the numbers that litter the social network. Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers, from the site – and discovered that their removal improved a user’s enjoyment. Scroll down for video . Software expert Benjamin Grosser recently created a browser plug-in to automatically remove all metrics from Facebook, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red . The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics from the site, including likes, shares, comments, number of eve

100%|██████████| 1/1 [00:00<00:00, 25.08it/s]


Without Watermark
Text after normalization:

<pad>Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message

100%|██████████| 1/1 [00:00<00:00, 16.07it/s]

1.166726540929002
With Watermark





Text after normalization:

<pad>Software expert Benjamin Grosser recently created a browser plug-in to remove all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, com

100%|██████████| 1/1 [00:00<00:00, 16.27it/s]


1.172843432919264
4
delta: 2.0
ARTICLE
Real Madrid superstar Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders . Real Madrid legend Iker Casillas has once again stated his desire to play in the MLS once his glittering career at the Bernabeu is over. The Spain international has spent his entire career with Los Blancos after being handed his debut as a 16-year-old and his current contract expires in 2017. 'I have no problem saying that I would like to play in the US,' Casillas told The Wall Street Journal without putting any timescale on his departure. Real Madrid No 1 Iker Casillas can see himself playing in the US at the end of his career . The Spain international looks to the skies during Real Madrid's 2-1 defeat of Cordoba . The 33-year-old's contract at the Bernabeu expires at the end of the 2016-17 season . 'This has been a tough past year and a half, but it has made me tougher mentally. At the club level, Real Madrid winning lifted a huge 

100%|██████████| 1/1 [00:00<00:00, 28.19it/s]


Without Watermark
Text after normalization:

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>


100%|██████████| 1/1 [00:00<00:00, 21.98it/s]


1.5120370494723798
With Watermark
Text after normalization:

<pad> Cristiano Ronaldo sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend is currently on contract with the club. The 33-year-old is currently on contract with the club. The Spanish international is currently on contract with the club.</s>

<pad> Cristiano Ronaldo sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend is currently on contract with the club. The 33-year-old is currently on contract with the club. The Spanish international is currently on contract with the club.</s>


100%|██████████| 1/1 [00:00<00:00, 29.50it/s]


2.7876815406186117
5
delta: 2.0
ARTICLE
Ronny Deila insists he does not care if Celtic edge the title by a single point – so long it's part of a domestic Treble. The Parkhead side find themselves embroiled in a title joust for the first time in three years, with second placed Aberdeen just two points behind going into the New Year. Expected to romp to four-in-a-row, the champions lost to Dundee United and only managed a home goalless draw with bottom club Ross County on Saturday. Celtic manager Ronny Deila insists he isn't concerned how his side win the Scottish Premiership . Conceding it would be 'unthinkable' to surrender the championship, Deila admitted he would settle for limping over the line by the slenderest of margins. 'If we win the Treble – yes,' he said. 'I don't care if it's one point or ten points as long as we win the Treble. 'It's all about trophies. No-one asks how many points do you win by, it's all about trophies. 'That's always been the case in history. You never rem

100%|██████████| 1/1 [00:00<00:00, 29.33it/s]


Without Watermark
Text after normalization:

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>


100%|██████████| 1/1 [00:00<00:00, 25.96it/s]


25.691728459487038
With Watermark
Text after normalization:

<pad> Celtic beat bottom club Ross County on Saturday. The Championship is part of a domestic Treble. The Championship is a domestic title joust for the first time in three years. Celtic are set to win the Scottish Premiership on Saturday.</s>

<pad> Celtic beat bottom club Ross County on Saturday. The Championship is part of a domestic Treble. The Championship is a domestic title joust for the first time in three years. Celtic are set to win the Scottish Premiership on Saturday.</s>


100%|██████████| 1/1 [00:00<00:00, 28.89it/s]


3.2659500988347405
6
delta: 2.0
ARTICLE
A rare letter written by Napoleon Bonaparte in English will go to the auction block in France on Sunday. The letter could fetch close to $100,000 in the auction in the French town of Fontainebleau, south of Paris. Auction house Osenat describes it as the first letter the French emperor wrote in English on St. Helena, after he was defeated and exiled to the British island to live under military guard. Learning English behind the backs of his captors "was a sort of revenge, a historical revenge" for Napoleon, said Jean-Christophe Chataigner of Osenat. "He was imprisoned by the English... and he wants to continue to have a certain degree of independence, of freedom, and to be able to learn English without his jailers knowing it was a great motivation for him," Chataigner said. The auctioneer said Napoleon picked up English relatively quickly and well. "I think that French people who learn English today make lots more mistakes than Napoleon at the ti

100%|██████████| 1/1 [00:00<00:00, 38.01it/s]


Without Watermark
Text after normalization:

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>


100%|██████████| 1/1 [00:00<00:00, 22.02it/s]

2.5025988416832337
With Watermark





Text after normalization:

<pad> Jean-Christophe Chataigner of Osenat says the letter is "remarkable" and "someone is really a very good student"</s>

<pad> Jean-Christophe Chataigner of Osenat says the letter is "remarkable" and "someone is really a very good student"</s>


100%|██████████| 1/1 [00:00<00:00, 35.12it/s]


2.5564455214892896
7
delta: 2.0
ARTICLE
By . Jennifer Newton for MailOnline . Rapper and reality TV star Joe Budden has appeared in court to face charges that he viciously beat his ex-girlfriend in a jealous rage outside a New York restaurant. The 33-year-old Pump It Up singer turned himself into police on Wednesday when he attended the 34th Precinct station house in Inwood, Manhattan with his lawyer present. He was later arraigned on charges of assault, grand larceny and robbery in Manhattan Criminal Court. Rapper Joe Budden, pictured who has appeared in court after being accused of viciously beating up his ex-girlfriend in a jealous rage . However, according to the Daily News, the Slaughterhouse hip hop crew member was later released after his mother posted the $10,000 bail needed to free him. He told them as he left the courthouse: 'It's good to be free. 'I’ve been portrayed worse by better,' he added. Career trajectory: Budden is a member of the Slaughterhouse hip hop crew and a st

100%|██████████| 1/1 [00:00<00:00, 27.30it/s]


Without Watermark
Text after normalization:

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>


100%|██████████| 1/1 [00:00<00:00, 26.43it/s]

1.5526731110388385
With Watermark
Text after normalization:

<pad> Joe Budden, 33, is accused of beating ex-girlfriend in a jealous rage. He is accused of assault, grand larceny and robbery. He is also ordered to sign a $15,000 bail.</s>

<pad> Joe Budden, 33, is accused of beating ex-girlfriend in a jealous rage. He is accused of assault, grand larceny and robbery. He is also ordered to sign a $15,000 bail.</s>



100%|██████████| 1/1 [00:00<00:00, 28.46it/s]


2.2977870146128287
8
delta: 2.0
ARTICLE
Pet owners looking to launch the next online sensation or just longing for a new view of their dog's dashing and digging won't have to shop for long to find the perfect holiday gift. Wrap up a dog harness that holds any durable, wearable camera and watch Frisbee fetch, lazy lap naps and every memory in between come alive. GoPro Inc.'s Fetch dog harness fits over Fido's chest or back and holds the small, waterproof camera known for attaching to helmets, surfboards, cars and wrists to film rugged adventures. Sony, Garmin and Kurgo also make camera mounts for dogs. The device is among a legion of gifts that retailers have rounded up for pet wish lists this year. Narrowing it down is tough, but the harness tops the more unique options and creates footage that lasts. Doggone awesome: A dog wearing two GoPro cameras, one on his back and one on his chest is  held on by what is known as a Fetch dog harness . Bark up someone's else's tree: Thor, a French 

100%|██████████| 1/1 [00:00<00:00, 25.94it/s]


Without Watermark
Text after normalization:

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>


100%|██████████| 1/1 [00:00<00:00, 26.57it/s]

1.874236702766601
With Watermark
Text after normalization:

<pad> Dogs are being sold at PetSmart. The canine camera is a popular and popular brand for people looking for gift. It is sold for $399, and the mount costs $59.</s>

<pad> Dogs are being sold at PetSmart. The canine camera is a popular and popular brand for people looking for gift. It is sold for $399, and the mount costs $59.</s>



100%|██████████| 1/1 [00:00<00:00, 28.66it/s]


8.42856361291723
9
delta: 2.0
ARTICLE
Two California teachers who were arrested on allegations of engaging in sexual encounters on the beach with their male high school students will not be charged with sexual assault. However, South Hills High School teachers Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were each charged yesterday with one misdemeanor count of contributing to the delinquency of a minor after allegedly providing alcohol to students. They could face a maximum sentence of one year in jail if convicted. Prosecutors said they found insufficient evidence to file sexual assault charges. Scroll down for video . Melody Lippert (left), 38, and Michelle Ghirelli (right), 30, both from Covina, were arrested last month over sexual assault allegations, but will not be charged for sexual assault. However they were each charged on one misdemeanor count of contributing to the delinquency of a minor . As the investigation continues, Lippert and Ghirelli could also f

100%|██████████| 1/1 [00:00<00:00, 28.60it/s]


Without Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>


100%|██████████| 1/1 [00:00<00:00, 29.29it/s]

1.325813141716708
With Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on charges of conspiring and contributing to the delinquency of a minor. The teachers could face a maximum sentence of one year in jail if convicted. The teachers could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on charges of conspiring and contributing to the delinquency of a minor. The teachers could face a maximum sentence of one year in jail if convicted. The teachers could face a maximum sentence of one year in jail if convicted.</s>



100%|██████████| 1/1 [00:00<00:00, 28.22it/s]


1.3794617551792168
10
delta: 2.0
ARTICLE
By . Christopher Stevens . PUBLISHED: . 17:25 EST, 29 September 2013 . | . UPDATED: . 17:37 EST, 29 September 2013 . Atlantis was buried beneath the waves by a cataclysm aeons ago. Addicts of Seventies’ TV will know the cataclysm had a name: Patrick Duffy. The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas, as the last survivor of that mythical submerged city. He had webbed feet and gills, and he could live off seaweed at the bottom of the ocean. Action packed: Atlantis promises to be a romp through Greek fables . The problem was, Duffy’s acting was so wooden that he floated. The Man From Atlantis was washed up after one series. Keen to avoid any confusion with this disastrous prototype, the producers of BBC1’s new family adventure serial, Atlantis, have changed everything. Atlantis isn’t an undersea kingdom, for a start — it’s a hot and dusty walled city on a Greek island. The hero Jason doesn’t

100%|██████████| 1/1 [00:00<00:00, 28.02it/s]


Without Watermark
Text after normalization:

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>


100%|██████████| 1/1 [00:00<00:00, 24.41it/s]

1.7570570810328938
With Watermark
Text after normalization:

<pad> The Man From Atlantis is a drama featuring Duffy, the last survivor of that mythical submerged city. The story is a drama featuring Duffy, the last survivor of that mythical submerged city. The hero Jason doesn’t have gills. In fact, he is half drowned when the story starts: half drowned and completely naked.</s>

<pad> The Man From Atlantis is a drama featuring Duffy, the last survivor of that mythical submerged city. The story is a drama featuring Duffy, the last survivor of that mythical submerged city. The hero Jason doesn’t have gills. In fact, he is half drowned when the story starts: half drowned and completely naked.</s>



100%|██████████| 1/1 [00:00<00:00, 25.00it/s]


1.7511478751075358
11
delta: 2.0
ARTICLE
Philadelphia, Pennsylvania (CNN) -- Paulette Beale shakes her head at the suggestion, then flashes a contagious smile. "It's still history," she says, to rebut the notion there could be less intensity for President Barack Obama in the African-American community the second time around. "The first history was that he won. The second history's that he won twice. So, it's not just about history the first time, you have to be concerned about the history for the next four years also. You can make history more than one time, you know." Her mother and father stand a few feet away, nodding approvingly. Paul and Altermese Beale founded Paul Beale's Florist 41 years ago. Paulette takes the lead now, but her parents are on hand helping most days in a shop that is an institution in the Ogontz Avenue area of North Philadelphia. "We love him," Altermese Beale says of Obama. "One of the proudest days of my life was the day he was elected." The Beales are determ

100%|██████████| 1/1 [00:00<00:00, 24.59it/s]


Without Watermark
Text after normalization:

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>


100%|██████████| 1/1 [00:00<00:00, 25.45it/s]


3.0043870665178445
With Watermark
Text after normalization:

<pad> Paulette Beale: The idea is that African-Americans are being voted on, but it isn't a big deal.</s>

<pad> Paulette Beale: The idea is that African-Americans are being voted on, but it isn't a big deal.</s>


100%|██████████| 1/1 [00:00<00:00, 25.78it/s]


4.91774557133585
12
delta: 2.0
ARTICLE
A Filipino maid has been left covered in burns after her Saudi boss's mother allegedly threw boiling water at her as punishment for not making coffee quick enough. The 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. Once there, she passed her cousin's phone number to hospital staff and urged them to call for help. Attack: This is the 23-year-old who was burned after her boss's mother allegedly threw boiling water at her . When she returned for a check-up, her cousin arrived to take her home. She is now in the care of the Philippine Embassy. Outraged, her cousin has shared images of 'Fatma', which is not her real name, on Facebook. The posts also claim Fatma was beaten by her employer and deprived of food. According to ABS-CBN News, she was scalded after her employer's mother in Riyadh, Saudi Arabia, became enraged at the time it was taking to brew coffee. A tussle ensued, which led to Fatma being c

100%|██████████| 1/1 [00:00<00:00, 28.88it/s]


Without Watermark
Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>


100%|██████████| 1/1 [00:00<00:00, 25.01it/s]

1.412812075113653
With Watermark





Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring. The 23-year-old was not taken to hospital for hours, despite suffering scarring. The posts claim Fatma was beaten by her employer and deprived of food.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring. The 23-year-old was not taken to hospital for hours, despite suffering scarring. The posts claim Fatma was beaten by her employer and deprived of food.</s>


100%|██████████| 1/1 [00:00<00:00, 19.85it/s]

1.7510524775421252





13
delta: 2.0
ARTICLE
By . Annabel Fenwick Elliott . A woman has claimed that Covergirl's two-step Bombshell Volume mascara is so hard to get off, it caused her to lose a significant clump of eyelashes in trying. 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand', Amy Schavolt, 21, based in West Palm Beach, Florida, tells ABC15. Ms Schavolt was initially impressed with the dramatic results she achieved from the mascara, which Covergirl boasts delivers 'ten times more noticeable lashes vs. bare lashes'. Noticeable indeed! Amy Schavolt claims Covergirl's Bombshell Volume mascara was so hard to get off - even with make-up remover - that she wrenched out a sizable clump of her lashes in trying (pictured) Product fail: Ms Schavolt (pictured) says she was initially impressed with the bold effects of the mascara, but it quickly turned into a 'clumpy, gooey mess' before it caused her lashes to fall out . 'I had a couple of people say

100%|██████████| 1/1 [00:00<00:00, 17.38it/s]


Without Watermark
Text after normalization:

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>


100%|██████████| 1/1 [00:00<00:00, 27.87it/s]

1.87450304598904
With Watermark
Text after normalization:

<pad> Amy Schavolt, 21, based in West Palm Beach, Florida, claims Covergirl's two-step mascara is so hard to get off. The mascara quickly turned into a clumpy, gooey mess before it caused her to lose a significant clump of eyelashes. The mascara quickly turned into a clumpy, gooey mess but it quickly turned into a clumpy, gooey mess. Ms Schavolt, 21, based in West Palm Beach, Florida, claims Covergirl's Bombshell Volume mascara is so hard to get off. The spokesperson advised customers to remove it using an oil-based eye make-up remover and allow it sit on the lashes 'for about a minute' before 'gently' wiping it away.</s>

<pad> Amy Schavolt, 21, based in West Palm Beach, Florida, claims Covergirl's two-step mascara is so hard to get off. The mascara quickly turned into a clumpy, gooey mess before it caused her to lose a significant clump of eyelashes. The mascara quickly turned into a clumpy, gooey mess but it quickly turned i


100%|██████████| 1/1 [00:00<00:00, 18.65it/s]


3.5063265918935898
14
delta: 2.0
ARTICLE
He still has the power to excite, exhilarate — and court controversy. Picasso’s long-awaited, vastly over-budget museum reopens today after five years of renovation. Housed in the Marais’s 17th-century Hotel Sale — the former home of a salt-tax collector — it’s an enormous, elegant space, and Picasso’s work fills it to the rafters. The man behind the magic: Some of Pablo Picasso's finest works are found in the Paris museum of his art . I had a sneak preview. There are 5,000 pieces, illustrating the breadth of his repertoire, from early paintings to found-object sculptures, collages and shocking nudes. The collection was given to Paris in 1979 — in lieu of inheritance tax — by Picasso’s heirs. A visitor stands in front of the painting 'Woman Throwing a Stone' and the sculptures (L-R) 'Head of a Woman', 'Bust of a Woman' and 'Bust of a Woman' by Spanish painter Pablo Picasso (1881-1973) Picasso's painting 'Gustave Coquiot' was one of 400 pieces op

100%|██████████| 1/1 [00:00<00:00, 21.41it/s]


Without Watermark
Text after normalization:

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>


100%|██████████| 1/1 [00:00<00:00, 25.75it/s]

1.3264452522520611
With Watermark
Text after normalization:

<pad> Picasso's 17th-century Hotel Sale reopens today. Picasso's work is an impressive, elegant space. Picasso's work is a classic style of glamour. Picasso's work is a classic style of glamour.</s>

<pad> Picasso's 17th-century Hotel Sale reopens today. Picasso's work is an impressive, elegant space. Picasso's work is a classic style of glamour. Picasso's work is a classic style of glamour.</s>



100%|██████████| 1/1 [00:00<00:00, 26.61it/s]


3.6615680975508043
15
delta: 2.0
ARTICLE
highlights
Malaysia's opposition leader calls on supporters to attend rally on Wednesday .
Anwar Ibrahim says Sunday's poll marred by unprecedented fraud .
Election was won by ruling coalition, extending its 56 years in power .
A government spokesperson says allegations of fraud unsubstantiated .


100%|██████████| 1/1 [00:00<00:00, 29.55it/s]


Without Watermark
Text after normalization:

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>


100%|██████████| 1/1 [00:00<00:00, 28.16it/s]


2.6630211599027658
With Watermark
Text after normalization:

<pad>Malaysia's opposition leader says it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "di

100%|██████████| 1/1 [00:00<00:00, 16.45it/s]


1.1964979608618613
16
delta: 2.0
ARTICLE
The legal high known as ‘hippy crack’ was used by one in 16 young people, according to new figures. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000  people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is the second most popular drug among young people. Legal high: Discarded Nitrous Oxide canisters and balloons used for inhaling abandoned in campsites at the Glastonbury Festival . On a downer: Illegal drug use has fallen, particularly for cocaine and ecstasy, latest figures show . Nitrous oxide is inhaled to make people feel euphoric and relaxed - but drug experts warn it can cause death on first time use through suffocation. For the first time the 2012-13 Crime Survey for England and Wales included questions about the use of the drug. It is often sold to recreational users in balloons in nightclubs and at 

100%|██████████| 1/1 [00:00<00:00, 28.94it/s]


Without Watermark
Text after normalization:

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>


100%|██████████| 1/1 [00:00<00:00, 18.45it/s]


1.2609500387270791
With Watermark
Text after normalization:

<pad> Drug used by 16 young people last year is at record low. The drug is often sold to recreational users in balloons. The home office says the government is concerned about nitrous oxide.</s>

<pad> Drug used by 16 young people last year is at record low. The drug is often sold to recreational users in balloons. The home office says the government is concerned about nitrous oxide.</s>


100%|██████████| 1/1 [00:00<00:00, 18.89it/s]


3.8733551256432395
17
delta: 2.0
ARTICLE
A regiment of soldiers from a pint-sized battalion has been honoured 100 years after they volunteered for service in World War One. The Birkenhead Bantams were formed in November 1914 after the Wirral town's MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. Bigland's petition to the War Office led to the minimum height for soldiers in the Army to be lowered from five foot three inches, to just five foot. The pint-sized battalion has been honoured 100 years after they volunteered for service in World War One . As a result thousands of men who had been refused the chance to serve their country, were able to fight in the Great War. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including at Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country. A pl

100%|██████████| 1/1 [00:00<00:00, 18.46it/s]


Without Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 23.42it/s]


1.157230604239093
With Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 24.18it/s]


1.2105310796929298
18
delta: 2.0
ARTICLE
Sitting in the casualty department of one of the best-known hospitals in Britain, I can’t help but notice it’s packed to the rafters. Yet it’s not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I’m in deep shock and on the point of passing out in agony. This is by far the worst pain I’ve ever suffered. I know my upper arm is broken — I fell off a step and heard the crack — and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: ‘Hope you’ve set aside the whole day. I’ve been waiting ages!’ Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital . As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she’s come for — a headache, no less. ‘No point calling the GP. You can never 

100%|██████████| 1/1 [00:00<00:00, 20.61it/s]


Without Watermark
Text after normalization:

<pad> The Royal Free Hospital in North London looks so strained. It's packed to the rafters. But it's not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I'm in deep shock and on the point of passing out in agony. This is by far the worst pain I've ever suffered. I know my upper arm is broken - I fell off a step and heard the crack - and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: 'Hope you've set aside the whole day. I've been waiting ages!' Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital. As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she's come for — a headache, no less. 'No point calling the GP. You can never get seen,' she says. It's the same sto

100%|██████████| 1/1 [00:00<00:00, 18.74it/s]


1.1526999235278335
With Watermark
Text after normalization:

<pad> A patient is being treated at Royal Free Hospital in North London. Her fracture clinic is being treated at 8.30pm. The nurse, who is sent for an X-ray, is sent home wearing a sling.</s>

<pad> A patient is being treated at Royal Free Hospital in North London. Her fracture clinic is being treated at 8.30pm. The nurse, who is sent for an X-ray, is sent home wearing a sling.</s>


100%|██████████| 1/1 [00:00<00:00, 25.76it/s]


4.886082476828613
19
delta: 2.0
ARTICLE
Washington (CNN)  -- A new book about former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy -- all in the name of trying to win the presidency. In "The Politician," former Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. "The Politician" went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008. The plan was funded by two wealthy benefactors, the late trial lawyer Fred Baron and banking heir widow Bunny Mellon, but Mellon was unaware that her money was being used for the mistress. A federal grand jury is investigating payments the former senator's campaign and supporters made to Rielle Hunter, who was a videogra

100%|██████████| 1/1 [00:00<00:00, 22.69it/s]


Without Watermark
Text after normalization:

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>


100%|██████████| 1/1 [00:00<00:00, 25.14it/s]


4.066388505592387
With Watermark
Text after normalization:

<pad> Edwards says he is the father of the woman who had given birth to her.</s>

<pad> Edwards says he is the father of the woman who had given birth to her.</s>


100%|██████████| 1/1 [00:00<00:00, 25.80it/s]


7.253545426017464
20
{'article_id': ['6fa5e02cf2bbd138da7ac530680843071b3957d8', '2326d69072e1821bc767584dc2c1274db536b982', '3bbfad251b66eec6ad978665d2bfbd60c12a9789', '02e60b6c1206a757fbd15ecbec840e270b490c0f', '1a69268d48af97a75ca677e9aefaaaa363e269a3', 'ca584d07782df66cf85d2c13526cc575005cf223', '8a1b3c68e2f98f5ef496f0ffa873f0709777f1aa', '6ac3db13498e3c2a5a9b58908c6015cbddff9ece', 'c84413ad2b705d85cde4d08591aeeeca8855fe4d', 'b19693ddbe3a374e877326f31ac5376746236183', '641f83379174ad75162641d7cb02819766d50011', '7d173d0978bfc75c6758e399b90b9f6c2dbad957', '30a2fb66e6747a28f554e01ff65b2b14466ad0f0', '49d472de3b6efb8ee70aa3f289d638deeb1470da', '9fab92819f39214f274dd087e8a7d014a137b715', '2dae8a82f56f0c95eda5f5c2c84a5e5059513466', '248b017bab494d691820106cfd0aea60b25b4a95', '5f4211155ff8acff61259dc10d68ea33bc28a6ec', '0ea2b1d30367fdb86d8e51f23a00db35b2025d49', '6c86910d655a49095ccb7e7ecc850bae6e221eed'], 'highlights_ppl': [11.51093422905635, 29.42712186181532, 30.700566902443356, 20.28

100%|██████████| 1/1 [00:00<00:00, 32.46it/s]


Without Watermark
Text after normalization:

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 34.07it/s]


2.3679224856097196
With Watermark
Text after normalization:

<pad> Daniela Pelaez is a student from Florida.</s>

<pad> Daniela Pelaez is a student from Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 37.11it/s]


3.0982566621589194
1
delta: 2.5
ARTICLE
Editor's note: Erica Williams is deputy director of Campus Progress, a project of the Center for American Progress, a Washington-based organization that describes itself as dedicated to progressive causes. She works to engage the millennial generation and communities such as people of color, women and people of faith in the political process. She can be found at ericawilliamsonline.com and on Twitter at @ericawilliamsdc. Erica Williams says the idea that young people don't care about the health care issue is wrong. (CNN) -- As Congress returns to Capitol Hill, back from a recess of contentious town halls on health care reform, one new voice has the potential to break through the seemingly endless deadlock: the voice of young Americans. Just Thursday, there were more than 880,000 Facebook status updates posted with the meme of a demand for health care reform, generated organically and spread virally from young people and other Facebook users acros

100%|██████████| 1/1 [00:00<00:00, 26.48it/s]


Without Watermark
Text after normalization:

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>


100%|██████████| 1/1 [00:00<00:00, 24.27it/s]

16.153275564235432
With Watermark
Text after normalization:

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the debate. Williams says it is an important part of the debate.</s>

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the debate. Williams says it is an important part of the debate.</s>



100%|██████████| 1/1 [00:00<00:00, 27.50it/s]


8.04294632991706
2
delta: 2.5
ARTICLE
highlights
Anish Goel: The dilemma faced by the U.S. with Iraq could happen in Afghanistan .
He says: A weak Iraqi government with a poorly trained Army faces a fierce insurgency .
When U.S. troops leave Afghanistan, will government be able to stop Taliban, he asks .


100%|██████████| 1/1 [00:00<00:00, 26.66it/s]


Without Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>


100%|██████████| 1/1 [00:00<00:00, 29.50it/s]


2.4067782896530847
With Watermark
Text after normalization:

<pad> A growing conflict is causing an unexpected conflict in the country.</s>

<pad> A growing conflict is causing an unexpected conflict in the country.</s>


100%|██████████| 1/1 [00:00<00:00, 29.03it/s]


19.80952536232989
3
delta: 2.5
ARTICLE
One in every 13 people on the planet check Facebook the moment they wake up - leading to reports of Facebook addiction and social anxiety. And, according to one researcher, the fault lies with the little red notification icons and the numbers that litter the social network. Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers, from the site – and discovered that their removal improved a user’s enjoyment. Scroll down for video . Software expert Benjamin Grosser recently created a browser plug-in to automatically remove all metrics from Facebook, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red . The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics from the site, including likes, shares, comments, number of eve

100%|██████████| 1/1 [00:00<00:00, 23.38it/s]


Without Watermark
Text after normalization:

<pad>Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message

100%|██████████| 1/1 [00:00<00:00, 16.01it/s]

1.166726540929002
With Watermark





Text after normalization:

<pad> Software expert Benjamin Grosser recently created a browser plug-in to remove all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. Users can't know if their friend count is being observed at any one moment, but it causes the metric to disappear.</s>

<pad> Software expert Benjamin Grosser recently created a browser plug-in to remove all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. Users can't know if their friend count is being observed at any one moment, but it causes the metric to disappear.</s>


100%|██████████| 1/1 [00:00<00:00, 23.76it/s]


1.9941419530105766
4
delta: 2.5
ARTICLE
Real Madrid superstar Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders . Real Madrid legend Iker Casillas has once again stated his desire to play in the MLS once his glittering career at the Bernabeu is over. The Spain international has spent his entire career with Los Blancos after being handed his debut as a 16-year-old and his current contract expires in 2017. 'I have no problem saying that I would like to play in the US,' Casillas told The Wall Street Journal without putting any timescale on his departure. Real Madrid No 1 Iker Casillas can see himself playing in the US at the end of his career . The Spain international looks to the skies during Real Madrid's 2-1 defeat of Cordoba . The 33-year-old's contract at the Bernabeu expires at the end of the 2016-17 season . 'This has been a tough past year and a half, but it has made me tougher mentally. At the club level, Real Madrid winning lifted a huge

100%|██████████| 1/1 [00:00<00:00, 24.91it/s]


Without Watermark
Text after normalization:

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>


100%|██████████| 1/1 [00:00<00:00, 29.40it/s]

1.5120370494723798
With Watermark
Text after normalization:

<pad> Cristiano Ronaldo sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend is currently on contract with the club for the 2016-17 season. The 33-year-old is currently on contract with the club for the 2016-17 season.</s>

<pad> Cristiano Ronaldo sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend is currently on contract with the club for the 2016-17 season. The 33-year-old is currently on contract with the club for the 2016-17 season.</s>



100%|██████████| 1/1 [00:00<00:00, 29.07it/s]


2.495017474471146
5
delta: 2.5
ARTICLE
Ronny Deila insists he does not care if Celtic edge the title by a single point – so long it's part of a domestic Treble. The Parkhead side find themselves embroiled in a title joust for the first time in three years, with second placed Aberdeen just two points behind going into the New Year. Expected to romp to four-in-a-row, the champions lost to Dundee United and only managed a home goalless draw with bottom club Ross County on Saturday. Celtic manager Ronny Deila insists he isn't concerned how his side win the Scottish Premiership . Conceding it would be 'unthinkable' to surrender the championship, Deila admitted he would settle for limping over the line by the slenderest of margins. 'If we win the Treble – yes,' he said. 'I don't care if it's one point or ten points as long as we win the Treble. 'It's all about trophies. No-one asks how many points do you win by, it's all about trophies. 'That's always been the case in history. You never reme

100%|██████████| 1/1 [00:00<00:00, 26.79it/s]


Without Watermark
Text after normalization:

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>


100%|██████████| 1/1 [00:00<00:00, 27.06it/s]

25.691728459487038
With Watermark
Text after normalization:

<pad>Celtic beat bottom club Ross County on Saturday. The Championship is part of a domestic Treble. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. The Championship is an all-time top league. Th


100%|██████████| 1/1 [00:00<00:00, 15.65it/s]


1.3860241447092296
6
delta: 2.5
ARTICLE
A rare letter written by Napoleon Bonaparte in English will go to the auction block in France on Sunday. The letter could fetch close to $100,000 in the auction in the French town of Fontainebleau, south of Paris. Auction house Osenat describes it as the first letter the French emperor wrote in English on St. Helena, after he was defeated and exiled to the British island to live under military guard. Learning English behind the backs of his captors "was a sort of revenge, a historical revenge" for Napoleon, said Jean-Christophe Chataigner of Osenat. "He was imprisoned by the English... and he wants to continue to have a certain degree of independence, of freedom, and to be able to learn English without his jailers knowing it was a great motivation for him," Chataigner said. The auctioneer said Napoleon picked up English relatively quickly and well. "I think that French people who learn English today make lots more mistakes than Napoleon at the ti

100%|██████████| 1/1 [00:00<00:00, 26.36it/s]


Without Watermark
Text after normalization:

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>


100%|██████████| 1/1 [00:00<00:00, 24.22it/s]

2.5025988416832337
With Watermark





Text after normalization:

<pad> Jean-Christophe Chataigner of Osenat says the letter is "remarkable" and "someone is really a very good student"</s>

<pad> Jean-Christophe Chataigner of Osenat says the letter is "remarkable" and "someone is really a very good student"</s>


100%|██████████| 1/1 [00:00<00:00, 19.18it/s]


2.5564455214892896
7
delta: 2.5
ARTICLE
By . Jennifer Newton for MailOnline . Rapper and reality TV star Joe Budden has appeared in court to face charges that he viciously beat his ex-girlfriend in a jealous rage outside a New York restaurant. The 33-year-old Pump It Up singer turned himself into police on Wednesday when he attended the 34th Precinct station house in Inwood, Manhattan with his lawyer present. He was later arraigned on charges of assault, grand larceny and robbery in Manhattan Criminal Court. Rapper Joe Budden, pictured who has appeared in court after being accused of viciously beating up his ex-girlfriend in a jealous rage . However, according to the Daily News, the Slaughterhouse hip hop crew member was later released after his mother posted the $10,000 bail needed to free him. He told them as he left the courthouse: 'It's good to be free. 'I’ve been portrayed worse by better,' he added. Career trajectory: Budden is a member of the Slaughterhouse hip hop crew and a st

100%|██████████| 1/1 [00:00<00:00, 19.68it/s]


Without Watermark
Text after normalization:

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>


100%|██████████| 1/1 [00:00<00:00, 16.48it/s]


1.5526731110388385
With Watermark
Text after normalization:

<pad> Joe Budden, 33, is accused of assault, grand larceny and robbery. He is accused of assault, grand larceny and robbery. His mother posted the $10,000 bail to free him. The 33-year-old rapper was also ordered to sign a 'protection order' forbidding him from contacting the ex-girlfriend. The rapper is currently on bail on charges of assault, grand larceny and robbery.</s>

<pad> Joe Budden, 33, is accused of assault, grand larceny and robbery. He is accused of assault, grand larceny and robbery. His mother posted the $10,000 bail to free him. The 33-year-old rapper was also ordered to sign a 'protection order' forbidding him from contacting the ex-girlfriend. The rapper is currently on bail on charges of assault, grand larceny and robbery.</s>


100%|██████████| 1/1 [00:00<00:00, 17.06it/s]


2.0107836761224487
8
delta: 2.5
ARTICLE
Pet owners looking to launch the next online sensation or just longing for a new view of their dog's dashing and digging won't have to shop for long to find the perfect holiday gift. Wrap up a dog harness that holds any durable, wearable camera and watch Frisbee fetch, lazy lap naps and every memory in between come alive. GoPro Inc.'s Fetch dog harness fits over Fido's chest or back and holds the small, waterproof camera known for attaching to helmets, surfboards, cars and wrists to film rugged adventures. Sony, Garmin and Kurgo also make camera mounts for dogs. The device is among a legion of gifts that retailers have rounded up for pet wish lists this year. Narrowing it down is tough, but the harness tops the more unique options and creates footage that lasts. Doggone awesome: A dog wearing two GoPro cameras, one on his back and one on his chest is  held on by what is known as a Fetch dog harness . Bark up someone's else's tree: Thor, a French 

100%|██████████| 1/1 [00:00<00:00, 20.55it/s]


Without Watermark
Text after normalization:

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>


100%|██████████| 1/1 [00:00<00:00, 24.77it/s]

1.874236702766601
With Watermark





Text after normalization:

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>


100%|██████████| 1/1 [00:00<00:00, 28.42it/s]


31.274293580217943
9
delta: 2.5
ARTICLE
Two California teachers who were arrested on allegations of engaging in sexual encounters on the beach with their male high school students will not be charged with sexual assault. However, South Hills High School teachers Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were each charged yesterday with one misdemeanor count of contributing to the delinquency of a minor after allegedly providing alcohol to students. They could face a maximum sentence of one year in jail if convicted. Prosecutors said they found insufficient evidence to file sexual assault charges. Scroll down for video . Melody Lippert (left), 38, and Michelle Ghirelli (right), 30, both from Covina, were arrested last month over sexual assault allegations, but will not be charged for sexual assault. However they were each charged on one misdemeanor count of contributing to the delinquency of a minor . As the investigation continues, Lippert and Ghirelli could also

100%|██████████| 1/1 [00:00<00:00, 27.62it/s]


Without Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>


100%|██████████| 1/1 [00:00<00:00, 27.49it/s]

1.325813141716708
With Watermark
Text after normalization:

<pad>Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement in sexual assault. The teachers, 38, and Ghirelli, 30, both from Covina, were charged with conspiracy and contributing to the delinquency of a minor. The pair met up with five students on a non-school sponsored camping trip from December 27 to 29 on the beach. The teachers, 38, and Ghirelli, 30, both from Covina, were released after posting bail and currently remain on administrative leave. The teachers, 38, and Ghirelli, 30, both from Covina, were released after posting bail and currently remain on paid administrative leave. The teachers, 38, and Ghirelli, 30, both from Covina, were released after posting bail and currently remain on paid administrative leave. The teachers, 38, and Ghirelli, 30, both from Covina, were released after posting bail and currently remain on paid administrative leave. The teach


100%|██████████| 1/1 [00:00<00:00, 16.86it/s]


1.64317469788653
10
delta: 2.5
ARTICLE
By . Christopher Stevens . PUBLISHED: . 17:25 EST, 29 September 2013 . | . UPDATED: . 17:37 EST, 29 September 2013 . Atlantis was buried beneath the waves by a cataclysm aeons ago. Addicts of Seventies’ TV will know the cataclysm had a name: Patrick Duffy. The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas, as the last survivor of that mythical submerged city. He had webbed feet and gills, and he could live off seaweed at the bottom of the ocean. Action packed: Atlantis promises to be a romp through Greek fables . The problem was, Duffy’s acting was so wooden that he floated. The Man From Atlantis was washed up after one series. Keen to avoid any confusion with this disastrous prototype, the producers of BBC1’s new family adventure serial, Atlantis, have changed everything. Atlantis isn’t an undersea kingdom, for a start — it’s a hot and dusty walled city on a Greek island. The hero Jason doesn’t h

100%|██████████| 1/1 [00:00<00:00, 25.83it/s]


Without Watermark
Text after normalization:

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>


100%|██████████| 1/1 [00:00<00:00, 26.03it/s]

1.7570570810328938
With Watermark
Text after normalization:

<pad>A comedy star is being introduced as a family. The story is a drama, but it is not an undersea kingdom for a start. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and


100%|██████████| 1/1 [00:00<00:00, 15.99it/s]


1.6873532429919733
11
delta: 2.5
ARTICLE
Philadelphia, Pennsylvania (CNN) -- Paulette Beale shakes her head at the suggestion, then flashes a contagious smile. "It's still history," she says, to rebut the notion there could be less intensity for President Barack Obama in the African-American community the second time around. "The first history was that he won. The second history's that he won twice. So, it's not just about history the first time, you have to be concerned about the history for the next four years also. You can make history more than one time, you know." Her mother and father stand a few feet away, nodding approvingly. Paul and Altermese Beale founded Paul Beale's Florist 41 years ago. Paulette takes the lead now, but her parents are on hand helping most days in a shop that is an institution in the Ogontz Avenue area of North Philadelphia. "We love him," Altermese Beale says of Obama. "One of the proudest days of my life was the day he was elected." The Beales are determ

100%|██████████| 1/1 [00:00<00:00, 21.79it/s]


Without Watermark
Text after normalization:

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>


100%|██████████| 1/1 [00:00<00:00, 19.31it/s]

3.0043870665178445
With Watermark





Text after normalization:

<pad> Paulette Beale: The idea is for people to have the proper ID, but it isn't a big deal.</s>

<pad> Paulette Beale: The idea is for people to have the proper ID, but it isn't a big deal.</s>


100%|██████████| 1/1 [00:00<00:00, 18.32it/s]


8.728285811691853
12
delta: 2.5
ARTICLE
A Filipino maid has been left covered in burns after her Saudi boss's mother allegedly threw boiling water at her as punishment for not making coffee quick enough. The 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. Once there, she passed her cousin's phone number to hospital staff and urged them to call for help. Attack: This is the 23-year-old who was burned after her boss's mother allegedly threw boiling water at her . When she returned for a check-up, her cousin arrived to take her home. She is now in the care of the Philippine Embassy. Outraged, her cousin has shared images of 'Fatma', which is not her real name, on Facebook. The posts also claim Fatma was beaten by her employer and deprived of food. According to ABS-CBN News, she was scalded after her employer's mother in Riyadh, Saudi Arabia, became enraged at the time it was taking to brew coffee. A tussle ensued, which led to Fatma being 

100%|██████████| 1/1 [00:00<00:00, 18.44it/s]


Without Watermark
Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>


100%|██████████| 1/1 [00:00<00:00, 32.55it/s]

1.412812075113653
With Watermark
Text after normalization:

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is now in the care of the Philippine Embassy. The posts claim Fatma is beaten by her employer and deprived of food.</s>

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is now in the care of the Philippine Embassy. The posts claim Fatma is beaten by her employer and deprived of food.</s>



100%|██████████| 1/1 [00:00<00:00, 33.49it/s]


1.8820731883766246
13
delta: 2.5
ARTICLE
By . Annabel Fenwick Elliott . A woman has claimed that Covergirl's two-step Bombshell Volume mascara is so hard to get off, it caused her to lose a significant clump of eyelashes in trying. 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand', Amy Schavolt, 21, based in West Palm Beach, Florida, tells ABC15. Ms Schavolt was initially impressed with the dramatic results she achieved from the mascara, which Covergirl boasts delivers 'ten times more noticeable lashes vs. bare lashes'. Noticeable indeed! Amy Schavolt claims Covergirl's Bombshell Volume mascara was so hard to get off - even with make-up remover - that she wrenched out a sizable clump of her lashes in trying (pictured) Product fail: Ms Schavolt (pictured) says she was initially impressed with the bold effects of the mascara, but it quickly turned into a 'clumpy, gooey mess' before it caused her lashes to fall out . 'I had a co

100%|██████████| 1/1 [00:00<00:00, 23.23it/s]


Without Watermark
Text after normalization:

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>


100%|██████████| 1/1 [00:00<00:00, 26.71it/s]

1.87450304598904
With Watermark
Text after normalization:

<pad>Amy Schavolt, 21, based in West Palm Beach, Florida, claims it was so hard to get off. The mascara quickly turned into a clumpy, gooey mess before it was ruined. The mascara quickly turned into a clumpy, gooey mess but it eventually turned into a clumpy, gooey mess. Ms Schavolt, 21, explains to ABC15, 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand.' The mascara quickly turned into a clumpy, gooey mess but it eventually turned into a clumpy, gooey mess' She claims the mascara is so hard to get off, but it soon became a clumpy, gooey mess. She claims it was so hard to get off, but it soon became a clumpy, gooey mess. She claims it was so hard to get off, but it soon became a clumpy, gooey mess. She claims it was so hard to get off, but it soon became a clumpy, gooey mess. She claims it was so hard to get off, but it soon became a clumpy, gooey mess. She claims it


100%|██████████| 1/1 [00:00<00:00, 15.38it/s]


1.6342397928367531
14
delta: 2.5
ARTICLE
He still has the power to excite, exhilarate — and court controversy. Picasso’s long-awaited, vastly over-budget museum reopens today after five years of renovation. Housed in the Marais’s 17th-century Hotel Sale — the former home of a salt-tax collector — it’s an enormous, elegant space, and Picasso’s work fills it to the rafters. The man behind the magic: Some of Pablo Picasso's finest works are found in the Paris museum of his art . I had a sneak preview. There are 5,000 pieces, illustrating the breadth of his repertoire, from early paintings to found-object sculptures, collages and shocking nudes. The collection was given to Paris in 1979 — in lieu of inheritance tax — by Picasso’s heirs. A visitor stands in front of the painting 'Woman Throwing a Stone' and the sculptures (L-R) 'Head of a Woman', 'Bust of a Woman' and 'Bust of a Woman' by Spanish painter Pablo Picasso (1881-1973) Picasso's painting 'Gustave Coquiot' was one of 400 pieces op

100%|██████████| 1/1 [00:00<00:00, 18.95it/s]


Without Watermark
Text after normalization:

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>


100%|██████████| 1/1 [00:00<00:00, 24.23it/s]

1.3264452522520611
With Watermark





Text after normalization:

<pad>Picasso's 17th-century Hotel Sale reopens today. Picasso's work is an impressive, elegant space. Picasso's work is an ancient, elegant place, and Picasso's work is an impressive, elegant place. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Pica

100%|██████████| 1/1 [00:00<00:00, 15.31it/s]


1.3309449497825756
15
delta: 2.5
ARTICLE
highlights
Malaysia's opposition leader calls on supporters to attend rally on Wednesday .
Anwar Ibrahim says Sunday's poll marred by unprecedented fraud .
Election was won by ruling coalition, extending its 56 years in power .
A government spokesperson says allegations of fraud unsubstantiated .


100%|██████████| 1/1 [00:00<00:00, 29.27it/s]


Without Watermark
Text after normalization:

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>


100%|██████████| 1/1 [00:00<00:00, 26.58it/s]


2.6630211599027658
With Watermark
Text after normalization:

<pad>Malaysia's opposition leader says it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "disappointing" that it is "di

100%|██████████| 1/1 [00:00<00:00, 14.65it/s]


1.1964979608618613
16
delta: 2.5
ARTICLE
The legal high known as ‘hippy crack’ was used by one in 16 young people, according to new figures. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000  people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is the second most popular drug among young people. Legal high: Discarded Nitrous Oxide canisters and balloons used for inhaling abandoned in campsites at the Glastonbury Festival . On a downer: Illegal drug use has fallen, particularly for cocaine and ecstasy, latest figures show . Nitrous oxide is inhaled to make people feel euphoric and relaxed - but drug experts warn it can cause death on first time use through suffocation. For the first time the 2012-13 Crime Survey for England and Wales included questions about the use of the drug. It is often sold to recreational users in balloons in nightclubs and at 

100%|██████████| 1/1 [00:00<00:00, 23.39it/s]


Without Watermark
Text after normalization:

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>


100%|██████████| 1/1 [00:00<00:00, 26.48it/s]

1.2609500387270791
With Watermark





Text after normalization:

<pad>Drug used by 16 young people last year is at record low. The drug is often sold for drinking, smoking, or smoking. The home office minister says it is at a record low. The survey was released last year. The survey was released last year. The home office said the drug used by 16 young people was at the lowest level since records began. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. T

100%|██████████| 1/1 [00:00<00:00, 16.48it/s]


1.8369774198953028
17
delta: 2.5
ARTICLE
A regiment of soldiers from a pint-sized battalion has been honoured 100 years after they volunteered for service in World War One. The Birkenhead Bantams were formed in November 1914 after the Wirral town's MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. Bigland's petition to the War Office led to the minimum height for soldiers in the Army to be lowered from five foot three inches, to just five foot. The pint-sized battalion has been honoured 100 years after they volunteered for service in World War One . As a result thousands of men who had been refused the chance to serve their country, were able to fight in the Great War. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including at Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country. A pl

100%|██████████| 1/1 [00:00<00:00, 28.11it/s]


Without Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 23.57it/s]

1.157230604239093
With Watermark





Text after normalization:

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, more than 900 officers and soldiers were lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, more than 900 officers and soldiers were lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 26.92it/s]


1.284693731313245
18
delta: 2.5
ARTICLE
Sitting in the casualty department of one of the best-known hospitals in Britain, I can’t help but notice it’s packed to the rafters. Yet it’s not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I’m in deep shock and on the point of passing out in agony. This is by far the worst pain I’ve ever suffered. I know my upper arm is broken — I fell off a step and heard the crack — and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: ‘Hope you’ve set aside the whole day. I’ve been waiting ages!’ Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital . As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she’s come for — a headache, no less. ‘No point calling the GP. You can never g

100%|██████████| 1/1 [00:00<00:00, 23.07it/s]


Without Watermark
Text after normalization:

<pad> The Royal Free Hospital in North London looks so strained. It's packed to the rafters. But it's not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I'm in deep shock and on the point of passing out in agony. This is by far the worst pain I've ever suffered. I know my upper arm is broken - I fell off a step and heard the crack - and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: 'Hope you've set aside the whole day. I've been waiting ages!' Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital. As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she's come for — a headache, no less. 'No point calling the GP. You can never get seen,' she says. It's the same sto

100%|██████████| 1/1 [00:00<00:00, 16.87it/s]


1.1526999235278335
With Watermark
Text after normalization:

<pad>A patient is being treated at Royal Free Hospital in North London. Her fracture clinic is being treated at 8.30pm. The nurse, who is sent for an X-ray, is sent for an X-ray. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent for an X-ray, but is still waiting for the appointment. The nurse is sent fo

100%|██████████| 1/1 [00:00<00:00, 13.94it/s]


1.830985497858646
19
delta: 2.5
ARTICLE
Washington (CNN)  -- A new book about former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy -- all in the name of trying to win the presidency. In "The Politician," former Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. "The Politician" went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008. The plan was funded by two wealthy benefactors, the late trial lawyer Fred Baron and banking heir widow Bunny Mellon, but Mellon was unaware that her money was being used for the mistress. A federal grand jury is investigating payments the former senator's campaign and supporters made to Rielle Hunter, who was a videogra

100%|██████████| 1/1 [00:00<00:00, 21.01it/s]


Without Watermark
Text after normalization:

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>


100%|██████████| 1/1 [00:00<00:00, 24.43it/s]


4.066388505592387
With Watermark
Text after normalization:

<pad> Edwards says Edwards is the father of the woman. Edwards says Edwards is "a very honest, honest, honest politician," but is worried about her family.</s>

<pad> Edwards says Edwards is the father of the woman. Edwards says Edwards is "a very honest, honest, honest politician," but is worried about her family.</s>


100%|██████████| 1/1 [00:00<00:00, 23.13it/s]


13.224832844035635
20
{'article_id': ['6fa5e02cf2bbd138da7ac530680843071b3957d8', '2326d69072e1821bc767584dc2c1274db536b982', '3bbfad251b66eec6ad978665d2bfbd60c12a9789', '02e60b6c1206a757fbd15ecbec840e270b490c0f', '1a69268d48af97a75ca677e9aefaaaa363e269a3', 'ca584d07782df66cf85d2c13526cc575005cf223', '8a1b3c68e2f98f5ef496f0ffa873f0709777f1aa', '6ac3db13498e3c2a5a9b58908c6015cbddff9ece', 'c84413ad2b705d85cde4d08591aeeeca8855fe4d', 'b19693ddbe3a374e877326f31ac5376746236183', '641f83379174ad75162641d7cb02819766d50011', '7d173d0978bfc75c6758e399b90b9f6c2dbad957', '30a2fb66e6747a28f554e01ff65b2b14466ad0f0', '49d472de3b6efb8ee70aa3f289d638deeb1470da', '9fab92819f39214f274dd087e8a7d014a137b715', '2dae8a82f56f0c95eda5f5c2c84a5e5059513466', '248b017bab494d691820106cfd0aea60b25b4a95', '5f4211155ff8acff61259dc10d68ea33bc28a6ec', '0ea2b1d30367fdb86d8e51f23a00db35b2025d49', '6c86910d655a49095ccb7e7ecc850bae6e221eed'], 'highlights_ppl': [11.51093422905635, 29.42712186181532, 30.700566902443356, 20.2

100%|██████████| 1/1 [00:00<00:00, 30.75it/s]


Without Watermark
Text after normalization:

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 20.24it/s]

2.3679224856097196
With Watermark





Text after normalization:

<pad> Daniela Pelaez is on the path to the medical school and is currently working as a freelance writer.</s>

<pad> Daniela Pelaez is on the path to the medical school and is currently working as a freelance writer.</s>


100%|██████████| 1/1 [00:00<00:00, 26.69it/s]


5.318113403501336
1
delta: 3.0
ARTICLE
Editor's note: Erica Williams is deputy director of Campus Progress, a project of the Center for American Progress, a Washington-based organization that describes itself as dedicated to progressive causes. She works to engage the millennial generation and communities such as people of color, women and people of faith in the political process. She can be found at ericawilliamsonline.com and on Twitter at @ericawilliamsdc. Erica Williams says the idea that young people don't care about the health care issue is wrong. (CNN) -- As Congress returns to Capitol Hill, back from a recess of contentious town halls on health care reform, one new voice has the potential to break through the seemingly endless deadlock: the voice of young Americans. Just Thursday, there were more than 880,000 Facebook status updates posted with the meme of a demand for health care reform, generated organically and spread virally from young people and other Facebook users across

100%|██████████| 1/1 [00:00<00:00, 23.53it/s]


Without Watermark
Text after normalization:

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>


100%|██████████| 1/1 [00:00<00:00, 23.27it/s]

16.153275564235432
With Watermark
Text after normalization:

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the fight for reform. She says it is important, but it is an important part. Williams: The debate is difficult for young people.</s>

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the fight for reform. She says it is important, but it is an important part. Williams: The debate is difficult for young people.</s>



100%|██████████| 1/1 [00:00<00:00, 23.98it/s]


13.170193388261763
2
delta: 3.0
ARTICLE
highlights
Anish Goel: The dilemma faced by the U.S. with Iraq could happen in Afghanistan .
He says: A weak Iraqi government with a poorly trained Army faces a fierce insurgency .
When U.S. troops leave Afghanistan, will government be able to stop Taliban, he asks .


100%|██████████| 1/1 [00:00<00:00, 24.74it/s]


Without Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>


100%|██████████| 1/1 [00:00<00:00, 22.84it/s]

2.4067782896530847
With Watermark





Text after normalization:

<pad> A growing conflict is happening across Iraq, but it is still happening.</s>

<pad> A growing conflict is happening across Iraq, but it is still happening.</s>


100%|██████████| 1/1 [00:00<00:00, 23.20it/s]


24.033131217980824
3
delta: 3.0
ARTICLE
One in every 13 people on the planet check Facebook the moment they wake up - leading to reports of Facebook addiction and social anxiety. And, according to one researcher, the fault lies with the little red notification icons and the numbers that litter the social network. Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers, from the site – and discovered that their removal improved a user’s enjoyment. Scroll down for video . Software expert Benjamin Grosser recently created a browser plug-in to automatically remove all metrics from Facebook, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red . The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics from the site, including likes, shares, comments, number of ev

100%|██████████| 1/1 [00:00<00:00, 20.22it/s]


Without Watermark
Text after normalization:

<pad>Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message

100%|██████████| 1/1 [00:00<00:00, 14.00it/s]


1.166726540929002
With Watermark
Text after normalization:

<pad> Software expert Benjamin Grosser recently created a browser plug-in to remove all metrics and add them to the site. Users can't use metric to get new notifications. The Google Chrome plugin can be installed on Chrome, Firefox and Safari. It automatically removes all metrics and adds them to the site.</s>

<pad> Software expert Benjamin Grosser recently created a browser plug-in to remove all metrics and add them to the site. Users can't use metric to get new notifications. The Google Chrome plugin can be installed on Chrome, Firefox and Safari. It automatically removes all metrics and adds them to the site.</s>


100%|██████████| 1/1 [00:00<00:00, 17.49it/s]


3.4481859524091782
4
delta: 3.0
ARTICLE
Real Madrid superstar Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders . Real Madrid legend Iker Casillas has once again stated his desire to play in the MLS once his glittering career at the Bernabeu is over. The Spain international has spent his entire career with Los Blancos after being handed his debut as a 16-year-old and his current contract expires in 2017. 'I have no problem saying that I would like to play in the US,' Casillas told The Wall Street Journal without putting any timescale on his departure. Real Madrid No 1 Iker Casillas can see himself playing in the US at the end of his career . The Spain international looks to the skies during Real Madrid's 2-1 defeat of Cordoba . The 33-year-old's contract at the Bernabeu expires at the end of the 2016-17 season . 'This has been a tough past year and a half, but it has made me tougher mentally. At the club level, Real Madrid winning lifted a huge

100%|██████████| 1/1 [00:00<00:00, 20.97it/s]


Without Watermark
Text after normalization:

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>


100%|██████████| 1/1 [00:00<00:00, 28.30it/s]

1.5120370494723798
With Watermark
Text after normalization:

<pad>Cristiano Ronaldo sent off against Cordoba for punching, kicking and kicking defenders. Real Madrid legend is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish international is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish international is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the


100%|██████████| 1/1 [00:00<00:00, 16.98it/s]


1.6378641795826248
5
delta: 3.0
ARTICLE
Ronny Deila insists he does not care if Celtic edge the title by a single point – so long it's part of a domestic Treble. The Parkhead side find themselves embroiled in a title joust for the first time in three years, with second placed Aberdeen just two points behind going into the New Year. Expected to romp to four-in-a-row, the champions lost to Dundee United and only managed a home goalless draw with bottom club Ross County on Saturday. Celtic manager Ronny Deila insists he isn't concerned how his side win the Scottish Premiership . Conceding it would be 'unthinkable' to surrender the championship, Deila admitted he would settle for limping over the line by the slenderest of margins. 'If we win the Treble – yes,' he said. 'I don't care if it's one point or ten points as long as we win the Treble. 'It's all about trophies. No-one asks how many points do you win by, it's all about trophies. 'That's always been the case in history. You never rem

100%|██████████| 1/1 [00:00<00:00, 27.33it/s]


Without Watermark
Text after normalization:

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>


100%|██████████| 1/1 [00:00<00:00, 27.53it/s]


25.691728459487038
With Watermark
Text after normalization:

<pad>Parkhead beat Ross County on Saturday 4-0. Aberdeen lost to Dundee United on Saturday. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. The Championship is part of a domestic Treble. T

100%|██████████| 1/1 [00:00<00:00, 16.33it/s]


1.43886968636022
6
delta: 3.0
ARTICLE
A rare letter written by Napoleon Bonaparte in English will go to the auction block in France on Sunday. The letter could fetch close to $100,000 in the auction in the French town of Fontainebleau, south of Paris. Auction house Osenat describes it as the first letter the French emperor wrote in English on St. Helena, after he was defeated and exiled to the British island to live under military guard. Learning English behind the backs of his captors "was a sort of revenge, a historical revenge" for Napoleon, said Jean-Christophe Chataigner of Osenat. "He was imprisoned by the English... and he wants to continue to have a certain degree of independence, of freedom, and to be able to learn English without his jailers knowing it was a great motivation for him," Chataigner said. The auctioneer said Napoleon picked up English relatively quickly and well. "I think that French people who learn English today make lots more mistakes than Napoleon at the time

100%|██████████| 1/1 [00:00<00:00, 24.28it/s]


Without Watermark
Text after normalization:

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>


100%|██████████| 1/1 [00:00<00:00, 23.85it/s]

2.5025988416832337
With Watermark





Text after normalization:

<pad> Jean-Christophe Chataigner, an auctioneer, is interested in a rare letter that is being auctioned in the French town of Fontainebleau on Sunday.</s>

<pad> Jean-Christophe Chataigner, an auctioneer, is interested in a rare letter that is being auctioned in the French town of Fontainebleau on Sunday.</s>


100%|██████████| 1/1 [00:00<00:00, 21.25it/s]


2.3473785632911643
7
delta: 3.0
ARTICLE
By . Jennifer Newton for MailOnline . Rapper and reality TV star Joe Budden has appeared in court to face charges that he viciously beat his ex-girlfriend in a jealous rage outside a New York restaurant. The 33-year-old Pump It Up singer turned himself into police on Wednesday when he attended the 34th Precinct station house in Inwood, Manhattan with his lawyer present. He was later arraigned on charges of assault, grand larceny and robbery in Manhattan Criminal Court. Rapper Joe Budden, pictured who has appeared in court after being accused of viciously beating up his ex-girlfriend in a jealous rage . However, according to the Daily News, the Slaughterhouse hip hop crew member was later released after his mother posted the $10,000 bail needed to free him. He told them as he left the courthouse: 'It's good to be free. 'I’ve been portrayed worse by better,' he added. Career trajectory: Budden is a member of the Slaughterhouse hip hop crew and a st

100%|██████████| 1/1 [00:00<00:00, 18.02it/s]


Without Watermark
Text after normalization:

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>


100%|██████████| 1/1 [00:00<00:00, 21.05it/s]


1.5526731110388385
With Watermark
Text after normalization:

<pad> Hip Hop star turned himself into police on Wednesday. The rapper was later arraigned on charges of assault, grand larceny and robbery. His mother posted the $10,000 bail to be allowed to free him. The 33-year-old rapper was also ordered to sign a restitution order forbidding him from contacting him.</s>

<pad> Hip Hop star turned himself into police on Wednesday. The rapper was later arraigned on charges of assault, grand larceny and robbery. His mother posted the $10,000 bail to be allowed to free him. The 33-year-old rapper was also ordered to sign a restitution order forbidding him from contacting him.</s>


100%|██████████| 1/1 [00:00<00:00, 20.29it/s]


2.4144157413717844
8
delta: 3.0
ARTICLE
Pet owners looking to launch the next online sensation or just longing for a new view of their dog's dashing and digging won't have to shop for long to find the perfect holiday gift. Wrap up a dog harness that holds any durable, wearable camera and watch Frisbee fetch, lazy lap naps and every memory in between come alive. GoPro Inc.'s Fetch dog harness fits over Fido's chest or back and holds the small, waterproof camera known for attaching to helmets, surfboards, cars and wrists to film rugged adventures. Sony, Garmin and Kurgo also make camera mounts for dogs. The device is among a legion of gifts that retailers have rounded up for pet wish lists this year. Narrowing it down is tough, but the harness tops the more unique options and creates footage that lasts. Doggone awesome: A dog wearing two GoPro cameras, one on his back and one on his chest is  held on by what is known as a Fetch dog harness . Bark up someone's else's tree: Thor, a French 

100%|██████████| 1/1 [00:00<00:00, 16.57it/s]


Without Watermark
Text after normalization:

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>


100%|██████████| 1/1 [00:00<00:00, 24.99it/s]

1.874236702766601
With Watermark





Text after normalization:

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>


100%|██████████| 1/1 [00:00<00:00, 25.43it/s]


31.274293580217943
9
delta: 3.0
ARTICLE
Two California teachers who were arrested on allegations of engaging in sexual encounters on the beach with their male high school students will not be charged with sexual assault. However, South Hills High School teachers Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were each charged yesterday with one misdemeanor count of contributing to the delinquency of a minor after allegedly providing alcohol to students. They could face a maximum sentence of one year in jail if convicted. Prosecutors said they found insufficient evidence to file sexual assault charges. Scroll down for video . Melody Lippert (left), 38, and Michelle Ghirelli (right), 30, both from Covina, were arrested last month over sexual assault allegations, but will not be charged for sexual assault. However they were each charged on one misdemeanor count of contributing to the delinquency of a minor . As the investigation continues, Lippert and Ghirelli could also

100%|██████████| 1/1 [00:00<00:00, 23.37it/s]


Without Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>


100%|██████████| 1/1 [00:00<00:00, 29.10it/s]

1.325813141716708
With Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement in sexual assault. The teachers, 38, and Ghirelli, 30, both from Covina, were charged with coercion and causing sex. The pair, 38, had been on an off-school-sponsored camping trip from December to 29. The three, 38, and Ghirelli, 30, both were on paid administrative leave, were released on bail.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement in sexual assault. The teachers, 38, and Ghirelli, 30, both from Covina, were charged with coercion and causing sex. The pair, 38, had been on an off-school-sponsored camping trip from December to 29. The three, 38, and Ghirelli, 30, both were on paid administrative leave, were released on bail.</s>



100%|██████████| 1/1 [00:00<00:00, 26.01it/s]


4.581503659160678
10
delta: 3.0
ARTICLE
By . Christopher Stevens . PUBLISHED: . 17:25 EST, 29 September 2013 . | . UPDATED: . 17:37 EST, 29 September 2013 . Atlantis was buried beneath the waves by a cataclysm aeons ago. Addicts of Seventies’ TV will know the cataclysm had a name: Patrick Duffy. The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas, as the last survivor of that mythical submerged city. He had webbed feet and gills, and he could live off seaweed at the bottom of the ocean. Action packed: Atlantis promises to be a romp through Greek fables . The problem was, Duffy’s acting was so wooden that he floated. The Man From Atlantis was washed up after one series. Keen to avoid any confusion with this disastrous prototype, the producers of BBC1’s new family adventure serial, Atlantis, have changed everything. Atlantis isn’t an undersea kingdom, for a start — it’s a hot and dusty walled city on a Greek island. The hero Jason doesn’t 

100%|██████████| 1/1 [00:00<00:00, 25.70it/s]


Without Watermark
Text after normalization:

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>


100%|██████████| 1/1 [00:00<00:00, 28.39it/s]

1.7570570810328938
With Watermark
Text after normalization:

<pad>A comedy star is being introduced as a family. The story is a drama, but it is not an undersea kingdom for a start. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all white, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and it is all black. The cast is all black, and


100%|██████████| 1/1 [00:00<00:00, 15.90it/s]


1.6873532429919733
11
delta: 3.0
ARTICLE
Philadelphia, Pennsylvania (CNN) -- Paulette Beale shakes her head at the suggestion, then flashes a contagious smile. "It's still history," she says, to rebut the notion there could be less intensity for President Barack Obama in the African-American community the second time around. "The first history was that he won. The second history's that he won twice. So, it's not just about history the first time, you have to be concerned about the history for the next four years also. You can make history more than one time, you know." Her mother and father stand a few feet away, nodding approvingly. Paul and Altermese Beale founded Paul Beale's Florist 41 years ago. Paulette takes the lead now, but her parents are on hand helping most days in a shop that is an institution in the Ogontz Avenue area of North Philadelphia. "We love him," Altermese Beale says of Obama. "One of the proudest days of my life was the day he was elected." The Beales are determ

100%|██████████| 1/1 [00:00<00:00, 25.94it/s]


Without Watermark
Text after normalization:

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>


100%|██████████| 1/1 [00:00<00:00, 14.92it/s]


3.0043870665178445
With Watermark
Text after normalization:

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part of the Obama campaign.</s>

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part of the Obama campaign.</s>


100%|██████████| 1/1 [00:00<00:00, 18.08it/s]


8.961378115333366
12
delta: 3.0
ARTICLE
A Filipino maid has been left covered in burns after her Saudi boss's mother allegedly threw boiling water at her as punishment for not making coffee quick enough. The 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. Once there, she passed her cousin's phone number to hospital staff and urged them to call for help. Attack: This is the 23-year-old who was burned after her boss's mother allegedly threw boiling water at her . When she returned for a check-up, her cousin arrived to take her home. She is now in the care of the Philippine Embassy. Outraged, her cousin has shared images of 'Fatma', which is not her real name, on Facebook. The posts also claim Fatma was beaten by her employer and deprived of food. According to ABS-CBN News, she was scalded after her employer's mother in Riyadh, Saudi Arabia, became enraged at the time it was taking to brew coffee. A tussle ensued, which led to Fatma being 

100%|██████████| 1/1 [00:00<00:00, 15.58it/s]


Without Watermark
Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>


100%|██████████| 1/1 [00:00<00:00, 18.51it/s]


1.412812075113653
With Watermark
Text after normalization:

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is being treated for burns at home. The Filipino maid is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claim Fatma is beaten by her employer and is deprived of food.</s>

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is being treated for burns at home. The Filipino maid is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claim Fatma is beaten by her employer and is deprived of food.</s>


100%|██████████| 1/1 [00:00<00:00, 26.44it/s]


3.08858508337708
13
delta: 3.0
ARTICLE
By . Annabel Fenwick Elliott . A woman has claimed that Covergirl's two-step Bombshell Volume mascara is so hard to get off, it caused her to lose a significant clump of eyelashes in trying. 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand', Amy Schavolt, 21, based in West Palm Beach, Florida, tells ABC15. Ms Schavolt was initially impressed with the dramatic results she achieved from the mascara, which Covergirl boasts delivers 'ten times more noticeable lashes vs. bare lashes'. Noticeable indeed! Amy Schavolt claims Covergirl's Bombshell Volume mascara was so hard to get off - even with make-up remover - that she wrenched out a sizable clump of her lashes in trying (pictured) Product fail: Ms Schavolt (pictured) says she was initially impressed with the bold effects of the mascara, but it quickly turned into a 'clumpy, gooey mess' before it caused her lashes to fall out . 'I had a coup

100%|██████████| 1/1 [00:00<00:00, 17.98it/s]


Without Watermark
Text after normalization:

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>


100%|██████████| 1/1 [00:00<00:00, 19.26it/s]

1.87450304598904
With Watermark





Text after normalization:

<pad>Amy Schavolt, 21, based in West Palm Beach, Florida, claims it was so hard to get off. The mascara quickly turned into a clump, gooey mess but it eventually turned into a clump. The mascara quickly turned into a clump, gooey mess but it eventually turned into a clump. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers to remove the mascara using an oil-based eyemake-up remover. The spokesperson warned customers 

100%|██████████| 1/1 [00:00<00:00, 15.73it/s]


2.0061042328689864
14
delta: 3.0
ARTICLE
He still has the power to excite, exhilarate — and court controversy. Picasso’s long-awaited, vastly over-budget museum reopens today after five years of renovation. Housed in the Marais’s 17th-century Hotel Sale — the former home of a salt-tax collector — it’s an enormous, elegant space, and Picasso’s work fills it to the rafters. The man behind the magic: Some of Pablo Picasso's finest works are found in the Paris museum of his art . I had a sneak preview. There are 5,000 pieces, illustrating the breadth of his repertoire, from early paintings to found-object sculptures, collages and shocking nudes. The collection was given to Paris in 1979 — in lieu of inheritance tax — by Picasso’s heirs. A visitor stands in front of the painting 'Woman Throwing a Stone' and the sculptures (L-R) 'Head of a Woman', 'Bust of a Woman' and 'Bust of a Woman' by Spanish painter Pablo Picasso (1881-1973) Picasso's painting 'Gustave Coquiot' was one of 400 pieces op

100%|██████████| 1/1 [00:00<00:00, 24.01it/s]


Without Watermark
Text after normalization:

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>


100%|██████████| 1/1 [00:00<00:00, 24.96it/s]

1.3264452522520611
With Watermark
Text after normalization:

<pad>Picasso's 17th-century Hotel Sale reopens today. Picasso's work is an impressive, elegant space for visiting at the museum. Picasso's work is an ancient, elegant place, and Picasso's work is an impressive, elegant space for visiting at the museum. Picasso is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashioned sort of refinement. Picasso's work is an old-fashione


100%|██████████| 1/1 [00:00<00:00, 14.94it/s]


1.3985861800080626
15
delta: 3.0
ARTICLE
highlights
Malaysia's opposition leader calls on supporters to attend rally on Wednesday .
Anwar Ibrahim says Sunday's poll marred by unprecedented fraud .
Election was won by ruling coalition, extending its 56 years in power .
A government spokesperson says allegations of fraud unsubstantiated .


100%|██████████| 1/1 [00:00<00:00, 26.04it/s]


Without Watermark
Text after normalization:

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>


100%|██████████| 1/1 [00:00<00:00, 27.05it/s]


2.6630211599027658
With Watermark
Text after normalization:

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>


100%|██████████| 1/1 [00:00<00:00, 27.18it/s]


11.561721666378702
16
delta: 3.0
ARTICLE
The legal high known as ‘hippy crack’ was used by one in 16 young people, according to new figures. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000  people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is the second most popular drug among young people. Legal high: Discarded Nitrous Oxide canisters and balloons used for inhaling abandoned in campsites at the Glastonbury Festival . On a downer: Illegal drug use has fallen, particularly for cocaine and ecstasy, latest figures show . Nitrous oxide is inhaled to make people feel euphoric and relaxed - but drug experts warn it can cause death on first time use through suffocation. For the first time the 2012-13 Crime Survey for England and Wales included questions about the use of the drug. It is often sold to recreational users in balloons in nightclubs and at 

100%|██████████| 1/1 [00:00<00:00, 24.83it/s]


Without Watermark
Text after normalization:

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>


100%|██████████| 1/1 [00:00<00:00, 27.41it/s]

1.2609500387270791
With Watermark





Text after normalization:

<pad> Drug used by 16 young people last year is at record low. The drug is often sold for drinking, smoking, or smoking. The home office minister says it is at a record low. The survey was released last year. The survey was released last year. The home office is worried it is unsafe.</s>

<pad> Drug used by 16 young people last year is at record low. The drug is often sold for drinking, smoking, or smoking. The home office minister says it is at a record low. The survey was released last year. The survey was released last year. The home office is worried it is unsafe.</s>


100%|██████████| 1/1 [00:00<00:00, 24.31it/s]


7.594922158947168
17
delta: 3.0
ARTICLE
A regiment of soldiers from a pint-sized battalion has been honoured 100 years after they volunteered for service in World War One. The Birkenhead Bantams were formed in November 1914 after the Wirral town's MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. Bigland's petition to the War Office led to the minimum height for soldiers in the Army to be lowered from five foot three inches, to just five foot. The pint-sized battalion has been honoured 100 years after they volunteered for service in World War One . As a result thousands of men who had been refused the chance to serve their country, were able to fight in the Great War. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including at Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country. A pla

100%|██████████| 1/1 [00:00<00:00, 26.74it/s]


Without Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 25.13it/s]

1.157230604239093
With Watermark





Text after normalization:

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, 900 officers and soldiers were lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. The Bantam battalions, who were renamed the 15th and 16th battalions, Cheshire Regiment, served in bloody battles. By the end of the war in 1918, 900 officers and soldiers were lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 25.44it/s]


1.3672799052955578
18
delta: 3.0
ARTICLE
Sitting in the casualty department of one of the best-known hospitals in Britain, I can’t help but notice it’s packed to the rafters. Yet it’s not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I’m in deep shock and on the point of passing out in agony. This is by far the worst pain I’ve ever suffered. I know my upper arm is broken — I fell off a step and heard the crack — and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: ‘Hope you’ve set aside the whole day. I’ve been waiting ages!’ Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital . As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she’s come for — a headache, no less. ‘No point calling the GP. You can never 

100%|██████████| 1/1 [00:00<00:00, 22.14it/s]


Without Watermark
Text after normalization:

<pad> The Royal Free Hospital in North London looks so strained. It's packed to the rafters. But it's not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I'm in deep shock and on the point of passing out in agony. This is by far the worst pain I've ever suffered. I know my upper arm is broken - I fell off a step and heard the crack - and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: 'Hope you've set aside the whole day. I've been waiting ages!' Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital. As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she's come for — a headache, no less. 'No point calling the GP. You can never get seen,' she says. It's the same sto

100%|██████████| 1/1 [00:00<00:00, 17.00it/s]


1.1526999235278335
With Watermark
Text after normalization:

<pad>A patient is being treated at North London's Royal Free Hospital on Wednesday. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for a

100%|██████████| 1/1 [00:00<00:00, 15.73it/s]


1.5079099462859589
19
delta: 3.0
ARTICLE
Washington (CNN)  -- A new book about former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy -- all in the name of trying to win the presidency. In "The Politician," former Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. "The Politician" went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008. The plan was funded by two wealthy benefactors, the late trial lawyer Fred Baron and banking heir widow Bunny Mellon, but Mellon was unaware that her money was being used for the mistress. A federal grand jury is investigating payments the former senator's campaign and supporters made to Rielle Hunter, who was a videogr

100%|██████████| 1/1 [00:00<00:00, 24.38it/s]


Without Watermark
Text after normalization:

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>


100%|██████████| 1/1 [00:00<00:00, 23.96it/s]

4.066388505592387
With Watermark
Text after normalization:

<pad> A new book titled "The Politician", titled "The Politician", titled "Are you out of your mind?", is an attempt to highlight the deceptions of the former Sen.. Edwards' aide says the allegations are false because of his wife. Edwards' lawyer says Edwards is motivated by public and media attention, and is willing to deny the alleged affair.</s>

<pad> A new book titled "The Politician", titled "The Politician", titled "Are you out of your mind?", is an attempt to highlight the deceptions of the former Sen.. Edwards' aide says the allegations are false because of his wife. Edwards' lawyer says Edwards is motivated by public and media attention, and is willing to deny the alleged affair.</s>



100%|██████████| 1/1 [00:00<00:00, 24.00it/s]


11.00435508203171
20
{'article_id': ['6fa5e02cf2bbd138da7ac530680843071b3957d8', '2326d69072e1821bc767584dc2c1274db536b982', '3bbfad251b66eec6ad978665d2bfbd60c12a9789', '02e60b6c1206a757fbd15ecbec840e270b490c0f', '1a69268d48af97a75ca677e9aefaaaa363e269a3', 'ca584d07782df66cf85d2c13526cc575005cf223', '8a1b3c68e2f98f5ef496f0ffa873f0709777f1aa', '6ac3db13498e3c2a5a9b58908c6015cbddff9ece', 'c84413ad2b705d85cde4d08591aeeeca8855fe4d', 'b19693ddbe3a374e877326f31ac5376746236183', '641f83379174ad75162641d7cb02819766d50011', '7d173d0978bfc75c6758e399b90b9f6c2dbad957', '30a2fb66e6747a28f554e01ff65b2b14466ad0f0', '49d472de3b6efb8ee70aa3f289d638deeb1470da', '9fab92819f39214f274dd087e8a7d014a137b715', '2dae8a82f56f0c95eda5f5c2c84a5e5059513466', '248b017bab494d691820106cfd0aea60b25b4a95', '5f4211155ff8acff61259dc10d68ea33bc28a6ec', '0ea2b1d30367fdb86d8e51f23a00db35b2025d49', '6c86910d655a49095ccb7e7ecc850bae6e221eed'], 'highlights_ppl': [11.51093422905635, 29.42712186181532, 30.700566902443356, 20.28

100%|██████████| 1/1 [00:00<00:00, 26.89it/s]


Without Watermark
Text after normalization:

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 33.86it/s]


2.3679224856097196
With Watermark
Text after normalization:

<pad>Daniela is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Flo

100%|██████████| 1/1 [00:00<00:00, 17.74it/s]


1.3332686226295192
1
delta: 3.5
ARTICLE
Editor's note: Erica Williams is deputy director of Campus Progress, a project of the Center for American Progress, a Washington-based organization that describes itself as dedicated to progressive causes. She works to engage the millennial generation and communities such as people of color, women and people of faith in the political process. She can be found at ericawilliamsonline.com and on Twitter at @ericawilliamsdc. Erica Williams says the idea that young people don't care about the health care issue is wrong. (CNN) -- As Congress returns to Capitol Hill, back from a recess of contentious town halls on health care reform, one new voice has the potential to break through the seemingly endless deadlock: the voice of young Americans. Just Thursday, there were more than 880,000 Facebook status updates posted with the meme of a demand for health care reform, generated organically and spread virally from young people and other Facebook users acros

100%|██████████| 1/1 [00:00<00:00, 24.33it/s]


Without Watermark
Text after normalization:

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>


100%|██████████| 1/1 [00:00<00:00, 24.85it/s]

16.153275564235432
With Watermark
Text after normalization:

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the fight for reform. She says it is important, but it is an important part. Williams: The debate is difficult for young people.</s>

<pad> Erica Williams: Young people don't care about health care reform, but it is an important part of the larger debate. Williams says it is an important part of the fight for reform. She says it is important, but it is an important part. Williams: The debate is difficult for young people.</s>



100%|██████████| 1/1 [00:00<00:00, 20.13it/s]


13.170193388261763
2
delta: 3.5
ARTICLE
highlights
Anish Goel: The dilemma faced by the U.S. with Iraq could happen in Afghanistan .
He says: A weak Iraqi government with a poorly trained Army faces a fierce insurgency .
When U.S. troops leave Afghanistan, will government be able to stop Taliban, he asks .


100%|██████████| 1/1 [00:00<00:00, 26.26it/s]


Without Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>


100%|██████████| 1/1 [00:00<00:00, 23.09it/s]

2.4067782896530847
With Watermark
Text after normalization:

<pad> A growing conflict is happening across Iraq, but it is still happening.</s>

<pad> A growing conflict is happening across Iraq, but it is still happening.</s>



100%|██████████| 1/1 [00:00<00:00, 22.85it/s]


24.033131217980824
3
delta: 3.5
ARTICLE
One in every 13 people on the planet check Facebook the moment they wake up - leading to reports of Facebook addiction and social anxiety. And, according to one researcher, the fault lies with the little red notification icons and the numbers that litter the social network. Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers, from the site – and discovered that their removal improved a user’s enjoyment. Scroll down for video . Software expert Benjamin Grosser recently created a browser plug-in to automatically remove all metrics from Facebook, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red . The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics from the site, including likes, shares, comments, number of ev

100%|██████████| 1/1 [00:00<00:00, 24.39it/s]


Without Watermark
Text after normalization:

<pad>Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message

100%|██████████| 1/1 [00:00<00:00, 15.23it/s]

1.166726540929002
With Watermark





Text after normalization:

<pad>Software expert Benjamin Grosser recently created a browser plug-in to delete all metrics and add them to the site. Users who watch the videos can't get a full account. The plugin is now installed on Chrome, Firefox, and Safari. Users can't get a full account. The plugin is now installed on the Google Chrome, Firefox, and Safari devices, but it is still working. One study demonstrates metric users who watch the videos are better. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. The most common type is graphopticon. Th

100%|██████████| 1/1 [00:00<00:00, 14.94it/s]


1.7260357915658602
4
delta: 3.5
ARTICLE
Real Madrid superstar Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders . Real Madrid legend Iker Casillas has once again stated his desire to play in the MLS once his glittering career at the Bernabeu is over. The Spain international has spent his entire career with Los Blancos after being handed his debut as a 16-year-old and his current contract expires in 2017. 'I have no problem saying that I would like to play in the US,' Casillas told The Wall Street Journal without putting any timescale on his departure. Real Madrid No 1 Iker Casillas can see himself playing in the US at the end of his career . The Spain international looks to the skies during Real Madrid's 2-1 defeat of Cordoba . The 33-year-old's contract at the Bernabeu expires at the end of the 2016-17 season . 'This has been a tough past year and a half, but it has made me tougher mentally. At the club level, Real Madrid winning lifted a huge

100%|██████████| 1/1 [00:00<00:00, 26.03it/s]


Without Watermark
Text after normalization:

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>


100%|██████████| 1/1 [00:00<00:00, 27.68it/s]

1.5120370494723798
With Watermark
Text after normalization:

<pad>Cristiano Ronaldo sent off against Cordoba for punching, kicking, slapping defenders. Real Madrid legend is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish international is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish club. The Spanish club is currently on contract with the Spanish cl


100%|██████████| 1/1 [00:00<00:00, 16.53it/s]


1.63967049249494
5
delta: 3.5
ARTICLE
Ronny Deila insists he does not care if Celtic edge the title by a single point – so long it's part of a domestic Treble. The Parkhead side find themselves embroiled in a title joust for the first time in three years, with second placed Aberdeen just two points behind going into the New Year. Expected to romp to four-in-a-row, the champions lost to Dundee United and only managed a home goalless draw with bottom club Ross County on Saturday. Celtic manager Ronny Deila insists he isn't concerned how his side win the Scottish Premiership . Conceding it would be 'unthinkable' to surrender the championship, Deila admitted he would settle for limping over the line by the slenderest of margins. 'If we win the Treble – yes,' he said. 'I don't care if it's one point or ten points as long as we win the Treble. 'It's all about trophies. No-one asks how many points do you win by, it's all about trophies. 'That's always been the case in history. You never remem

100%|██████████| 1/1 [00:00<00:00, 26.23it/s]


Without Watermark
Text after normalization:

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>


100%|██████████| 1/1 [00:00<00:00, 25.17it/s]

25.691728459487038
With Watermark
Text after normalization:

<pad> Parkhead beat Ross County on Saturday after losing to Dundee United. Celtic have not yet made their debut on the final day. The Championship is currently second. Scotland lost to Dundee, Aberdeen, St Johnstone and St Johnstone on Saturday. The Championship is second.</s>

<pad> Parkhead beat Ross County on Saturday after losing to Dundee United. Celtic have not yet made their debut on the final day. The Championship is currently second. Scotland lost to Dundee, Aberdeen, St Johnstone and St Johnstone on Saturday. The Championship is second.</s>



100%|██████████| 1/1 [00:00<00:00, 26.30it/s]


10.517632063544463
6
delta: 3.5
ARTICLE
A rare letter written by Napoleon Bonaparte in English will go to the auction block in France on Sunday. The letter could fetch close to $100,000 in the auction in the French town of Fontainebleau, south of Paris. Auction house Osenat describes it as the first letter the French emperor wrote in English on St. Helena, after he was defeated and exiled to the British island to live under military guard. Learning English behind the backs of his captors "was a sort of revenge, a historical revenge" for Napoleon, said Jean-Christophe Chataigner of Osenat. "He was imprisoned by the English... and he wants to continue to have a certain degree of independence, of freedom, and to be able to learn English without his jailers knowing it was a great motivation for him," Chataigner said. The auctioneer said Napoleon picked up English relatively quickly and well. "I think that French people who learn English today make lots more mistakes than Napoleon at the ti

100%|██████████| 1/1 [00:00<00:00, 25.51it/s]


Without Watermark
Text after normalization:

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>


100%|██████████| 1/1 [00:00<00:00, 31.43it/s]


2.5025988416832337
With Watermark
Text after normalization:

<pad> Jean-Christophe Chataigner, an official of the eponymous French newspaper, is selling a rare letter to be sold to auction.</s>

<pad> Jean-Christophe Chataigner, an official of the eponymous French newspaper, is selling a rare letter to be sold to auction.</s>


100%|██████████| 1/1 [00:00<00:00, 31.30it/s]


4.4244454432724964
7
delta: 3.5
ARTICLE
By . Jennifer Newton for MailOnline . Rapper and reality TV star Joe Budden has appeared in court to face charges that he viciously beat his ex-girlfriend in a jealous rage outside a New York restaurant. The 33-year-old Pump It Up singer turned himself into police on Wednesday when he attended the 34th Precinct station house in Inwood, Manhattan with his lawyer present. He was later arraigned on charges of assault, grand larceny and robbery in Manhattan Criminal Court. Rapper Joe Budden, pictured who has appeared in court after being accused of viciously beating up his ex-girlfriend in a jealous rage . However, according to the Daily News, the Slaughterhouse hip hop crew member was later released after his mother posted the $10,000 bail needed to free him. He told them as he left the courthouse: 'It's good to be free. 'I’ve been portrayed worse by better,' he added. Career trajectory: Budden is a member of the Slaughterhouse hip hop crew and a st

100%|██████████| 1/1 [00:00<00:00, 22.62it/s]


Without Watermark
Text after normalization:

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>


100%|██████████| 1/1 [00:00<00:00, 26.36it/s]

1.5526731110388385
With Watermark
Text after normalization:

<pad>Hip Hop star turned himself into police on Wednesday. The rapper was also ordered to sign a $15,000 bail, the Los Angeles Times reports. The rapper was also ordered to sign a $15,000 arrest warrant to pay the $15,000 bail, the Los Angeles Times says. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Dai


100%|██████████| 1/1 [00:00<00:00, 16.56it/s]


1.7931302896722991
8
delta: 3.5
ARTICLE
Pet owners looking to launch the next online sensation or just longing for a new view of their dog's dashing and digging won't have to shop for long to find the perfect holiday gift. Wrap up a dog harness that holds any durable, wearable camera and watch Frisbee fetch, lazy lap naps and every memory in between come alive. GoPro Inc.'s Fetch dog harness fits over Fido's chest or back and holds the small, waterproof camera known for attaching to helmets, surfboards, cars and wrists to film rugged adventures. Sony, Garmin and Kurgo also make camera mounts for dogs. The device is among a legion of gifts that retailers have rounded up for pet wish lists this year. Narrowing it down is tough, but the harness tops the more unique options and creates footage that lasts. Doggone awesome: A dog wearing two GoPro cameras, one on his back and one on his chest is  held on by what is known as a Fetch dog harness . Bark up someone's else's tree: Thor, a French 

100%|██████████| 1/1 [00:00<00:00, 25.12it/s]


Without Watermark
Text after normalization:

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>


100%|██████████| 1/1 [00:00<00:00, 25.51it/s]

1.874236702766601
With Watermark
Text after normalization:

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>



100%|██████████| 1/1 [00:00<00:00, 28.40it/s]


31.274293580217943
9
delta: 3.5
ARTICLE
Two California teachers who were arrested on allegations of engaging in sexual encounters on the beach with their male high school students will not be charged with sexual assault. However, South Hills High School teachers Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were each charged yesterday with one misdemeanor count of contributing to the delinquency of a minor after allegedly providing alcohol to students. They could face a maximum sentence of one year in jail if convicted. Prosecutors said they found insufficient evidence to file sexual assault charges. Scroll down for video . Melody Lippert (left), 38, and Michelle Ghirelli (right), 30, both from Covina, were arrested last month over sexual assault allegations, but will not be charged for sexual assault. However they were each charged on one misdemeanor count of contributing to the delinquency of a minor . As the investigation continues, Lippert and Ghirelli could also

100%|██████████| 1/1 [00:00<00:00, 24.85it/s]


Without Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>


100%|██████████| 1/1 [00:00<00:00, 18.47it/s]


1.325813141716708
With Watermark
Text after normalization:

<pad>Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement in sexual assault. The teachers, 38, and Ghirelli were both released on bail on Friday. The pair, 38, had been on an off-school-sponsored camping trip from December to 29. The three, 38, and Ghirelli were both on paid administrative leave, according to authorities. The pair, 38, had been on an off-school-sponsored camping trip from December to 29. The three, 38, and Ghirelli were on paid administrative leave, according to authorities, but were released on Friday. The pair, 38, had been on an off-school-sponsored camping trip from December to 29. The three, 38, and Ghirelli were on paid administrative leave, according to authorities, according to authorities, according to authorities, according to authorities, according to authorities, according to authorities, according to authorities, according to authori

100%|██████████| 1/1 [00:00<00:00, 15.79it/s]


2.5586878032616185
10
delta: 3.5
ARTICLE
By . Christopher Stevens . PUBLISHED: . 17:25 EST, 29 September 2013 . | . UPDATED: . 17:37 EST, 29 September 2013 . Atlantis was buried beneath the waves by a cataclysm aeons ago. Addicts of Seventies’ TV will know the cataclysm had a name: Patrick Duffy. The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas, as the last survivor of that mythical submerged city. He had webbed feet and gills, and he could live off seaweed at the bottom of the ocean. Action packed: Atlantis promises to be a romp through Greek fables . The problem was, Duffy’s acting was so wooden that he floated. The Man From Atlantis was washed up after one series. Keen to avoid any confusion with this disastrous prototype, the producers of BBC1’s new family adventure serial, Atlantis, have changed everything. Atlantis isn’t an undersea kingdom, for a start — it’s a hot and dusty walled city on a Greek island. The hero Jason doesn’t

100%|██████████| 1/1 [00:00<00:00, 20.58it/s]


Without Watermark
Text after normalization:

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>


100%|██████████| 1/1 [00:00<00:00, 25.68it/s]

1.7570570810328938
With Watermark
Text after normalization:

<pad>A comedy star is being introduced as 'Matt and His Daddy' The Man From Atlantis is an animated drama, but it is still very popular in Britain and Europe. Atlantis is an animated drama, but it is still very popular in Britain and Europe, so it is still popular in Europe and the North American West. The story is presented by Jason, an actor, but is still very popular in Britain and Europe, so it is still popular in Europe and the North American West. The story is presented by Jason, an actor, but is still very popular in Britain and Europe, so it is still popular in Europe and the North American West. The story is presented by Jason, an actor, but is still very popular in Britain and Europe, so it is still popular in Europe and the North American West. The story is presented by Jason, an actor, but is still very popular in Britain and Europe, so it is still popular in Europe and the North American West. The story is presen


100%|██████████| 1/1 [00:00<00:00, 15.72it/s]


2.308998881367604
11
delta: 3.5
ARTICLE
Philadelphia, Pennsylvania (CNN) -- Paulette Beale shakes her head at the suggestion, then flashes a contagious smile. "It's still history," she says, to rebut the notion there could be less intensity for President Barack Obama in the African-American community the second time around. "The first history was that he won. The second history's that he won twice. So, it's not just about history the first time, you have to be concerned about the history for the next four years also. You can make history more than one time, you know." Her mother and father stand a few feet away, nodding approvingly. Paul and Altermese Beale founded Paul Beale's Florist 41 years ago. Paulette takes the lead now, but her parents are on hand helping most days in a shop that is an institution in the Ogontz Avenue area of North Philadelphia. "We love him," Altermese Beale says of Obama. "One of the proudest days of my life was the day he was elected." The Beales are determi

100%|██████████| 1/1 [00:00<00:00, 22.47it/s]


Without Watermark
Text after normalization:

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>


100%|██████████| 1/1 [00:00<00:00, 18.97it/s]

3.0043870665178445
With Watermark





Text after normalization:

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part.</s>

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part.</s>


100%|██████████| 1/1 [00:00<00:00, 17.65it/s]


14.212761155905449
12
delta: 3.5
ARTICLE
A Filipino maid has been left covered in burns after her Saudi boss's mother allegedly threw boiling water at her as punishment for not making coffee quick enough. The 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. Once there, she passed her cousin's phone number to hospital staff and urged them to call for help. Attack: This is the 23-year-old who was burned after her boss's mother allegedly threw boiling water at her . When she returned for a check-up, her cousin arrived to take her home. She is now in the care of the Philippine Embassy. Outraged, her cousin has shared images of 'Fatma', which is not her real name, on Facebook. The posts also claim Fatma was beaten by her employer and deprived of food. According to ABS-CBN News, she was scalded after her employer's mother in Riyadh, Saudi Arabia, became enraged at the time it was taking to brew coffee. A tussle ensued, which led to Fatma being

100%|██████████| 1/1 [00:00<00:00, 23.53it/s]


Without Watermark
Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>


100%|██████████| 1/1 [00:00<00:00, 26.89it/s]

1.412812075113653
With Watermark





Text after normalization:

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is being treated for burns at home. The Filipino maid is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claimed she is beaten by her employer and is deprived of food.</s>

<pad> 23-year-old is being treated for burns after boss's mother allegedly threw boiling water at her. The 23-year-old is being treated for burns at home. The Filipino maid is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claimed she is beaten by her employer and is deprived of food.</s>


100%|██████████| 1/1 [00:00<00:00, 21.75it/s]


3.3347474958721057
13
delta: 3.5
ARTICLE
By . Annabel Fenwick Elliott . A woman has claimed that Covergirl's two-step Bombshell Volume mascara is so hard to get off, it caused her to lose a significant clump of eyelashes in trying. 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand', Amy Schavolt, 21, based in West Palm Beach, Florida, tells ABC15. Ms Schavolt was initially impressed with the dramatic results she achieved from the mascara, which Covergirl boasts delivers 'ten times more noticeable lashes vs. bare lashes'. Noticeable indeed! Amy Schavolt claims Covergirl's Bombshell Volume mascara was so hard to get off - even with make-up remover - that she wrenched out a sizable clump of her lashes in trying (pictured) Product fail: Ms Schavolt (pictured) says she was initially impressed with the bold effects of the mascara, but it quickly turned into a 'clumpy, gooey mess' before it caused her lashes to fall out . 'I had a co

100%|██████████| 1/1 [00:00<00:00, 21.13it/s]


Without Watermark
Text after normalization:

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>


100%|██████████| 1/1 [00:00<00:00, 15.41it/s]


1.87450304598904
With Watermark
Text after normalization:

<pad>Amy Schavolt, 21, based in West Palm Beach, Florida, claims it was so hard to get off. The mascara quickly turned into a clump, gooey mess but it eventually turned into a clump. The woman, 23, claims it was the second attempt on the brand. The reviewer, who claims it was the second attempt on the brand, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand.

100%|██████████| 1/1 [00:00<00:00, 14.62it/s]


1.586991606690575
14
delta: 3.5
ARTICLE
He still has the power to excite, exhilarate — and court controversy. Picasso’s long-awaited, vastly over-budget museum reopens today after five years of renovation. Housed in the Marais’s 17th-century Hotel Sale — the former home of a salt-tax collector — it’s an enormous, elegant space, and Picasso’s work fills it to the rafters. The man behind the magic: Some of Pablo Picasso's finest works are found in the Paris museum of his art . I had a sneak preview. There are 5,000 pieces, illustrating the breadth of his repertoire, from early paintings to found-object sculptures, collages and shocking nudes. The collection was given to Paris in 1979 — in lieu of inheritance tax — by Picasso’s heirs. A visitor stands in front of the painting 'Woman Throwing a Stone' and the sculptures (L-R) 'Head of a Woman', 'Bust of a Woman' and 'Bust of a Woman' by Spanish painter Pablo Picasso (1881-1973) Picasso's painting 'Gustave Coquiot' was one of 400 pieces ope

100%|██████████| 1/1 [00:00<00:00, 21.61it/s]


Without Watermark
Text after normalization:

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>


100%|██████████| 1/1 [00:00<00:00, 25.58it/s]

1.3264452522520611
With Watermark
Text after normalization:

<pad>Picasso's 17th-century Hotel Sale reopens today. Picasso's work is an impressive, elegant space for visiting at the museum. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Picassos works of the early 40's. Picasso has worked on Pica


100%|██████████| 1/1 [00:00<00:00, 15.39it/s]


1.4646381101310768
15
delta: 3.5
ARTICLE
highlights
Malaysia's opposition leader calls on supporters to attend rally on Wednesday .
Anwar Ibrahim says Sunday's poll marred by unprecedented fraud .
Election was won by ruling coalition, extending its 56 years in power .
A government spokesperson says allegations of fraud unsubstantiated .


100%|██████████| 1/1 [00:00<00:00, 26.59it/s]


Without Watermark
Text after normalization:

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>


100%|██████████| 1/1 [00:00<00:00, 29.49it/s]


2.6630211599027658
With Watermark
Text after normalization:

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>


100%|██████████| 1/1 [00:00<00:00, 27.51it/s]


11.561721666378702
16
delta: 3.5
ARTICLE
The legal high known as ‘hippy crack’ was used by one in 16 young people, according to new figures. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000  people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is the second most popular drug among young people. Legal high: Discarded Nitrous Oxide canisters and balloons used for inhaling abandoned in campsites at the Glastonbury Festival . On a downer: Illegal drug use has fallen, particularly for cocaine and ecstasy, latest figures show . Nitrous oxide is inhaled to make people feel euphoric and relaxed - but drug experts warn it can cause death on first time use through suffocation. For the first time the 2012-13 Crime Survey for England and Wales included questions about the use of the drug. It is often sold to recreational users in balloons in nightclubs and at 

100%|██████████| 1/1 [00:00<00:00, 27.50it/s]


Without Watermark
Text after normalization:

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>


100%|██████████| 1/1 [00:00<00:00, 26.44it/s]

1.2609500387270791
With Watermark





Text after normalization:

<pad>Drug used by 16 young people last year is at record low. The drug is often sold for drinking, smoking, or smoking, according to new figures. The home office minister says the government is worried about availability of the drug. The home office is worried it can cause death on first time use. The survey was released last year. The survey was released last year. The home office is worried it can cause death on first time use. The survey was released last year. The home office is worried it can cause death on first time use. The survey was released last year. The survey was released last year. The home office is worried it can cause death on first time use. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released 

100%|██████████| 1/1 [00:00<00:00, 16.98it/s]


1.9404004478109336
17
delta: 3.5
ARTICLE
A regiment of soldiers from a pint-sized battalion has been honoured 100 years after they volunteered for service in World War One. The Birkenhead Bantams were formed in November 1914 after the Wirral town's MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. Bigland's petition to the War Office led to the minimum height for soldiers in the Army to be lowered from five foot three inches, to just five foot. The pint-sized battalion has been honoured 100 years after they volunteered for service in World War One . As a result thousands of men who had been refused the chance to serve their country, were able to fight in the Great War. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including at Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country. A pl

100%|██████████| 1/1 [00:00<00:00, 25.92it/s]


Without Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 25.85it/s]

1.157230604239093
With Watermark





Text after normalization:

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to. The Bantam battalions, were originally recruited from Wirral town. The Bantam battalions served in bloody battles. By the end of the war the regiment renamed the 15th and 16th battalions, Cheshire Regiment, were formed from Bantams and tragically lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams formed in November 1914 after MP Alfred Bigland successfully petitioned for men of short stature to be allowed to. The Bantam battalions, were originally recruited from Wirral town. The Bantam battalions served in bloody battles. By the end of the war the regiment renamed the 15th and 16th battalions, Cheshire Regiment, were formed from Bantams and tragically lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 24.71it/s]


2.0125655864668683
18
delta: 3.5
ARTICLE
Sitting in the casualty department of one of the best-known hospitals in Britain, I can’t help but notice it’s packed to the rafters. Yet it’s not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I’m in deep shock and on the point of passing out in agony. This is by far the worst pain I’ve ever suffered. I know my upper arm is broken — I fell off a step and heard the crack — and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: ‘Hope you’ve set aside the whole day. I’ve been waiting ages!’ Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital . As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she’s come for — a headache, no less. ‘No point calling the GP. You can never 

100%|██████████| 1/1 [00:00<00:00, 23.01it/s]


Without Watermark
Text after normalization:

<pad> The Royal Free Hospital in North London looks so strained. It's packed to the rafters. But it's not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I'm in deep shock and on the point of passing out in agony. This is by far the worst pain I've ever suffered. I know my upper arm is broken - I fell off a step and heard the crack - and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: 'Hope you've set aside the whole day. I've been waiting ages!' Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital. As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she's come for — a headache, no less. 'No point calling the GP. You can never get seen,' she says. It's the same sto

100%|██████████| 1/1 [00:00<00:00, 16.60it/s]


1.1526999235278335
With Watermark
Text after normalization:

<pad>A patient is being treated at North London hospital after her death last night. A nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an

100%|██████████| 1/1 [00:00<00:00, 15.35it/s]


1.5343729184498274
19
delta: 3.5
ARTICLE
Washington (CNN)  -- A new book about former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy -- all in the name of trying to win the presidency. In "The Politician," former Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. "The Politician" went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008. The plan was funded by two wealthy benefactors, the late trial lawyer Fred Baron and banking heir widow Bunny Mellon, but Mellon was unaware that her money was being used for the mistress. A federal grand jury is investigating payments the former senator's campaign and supporters made to Rielle Hunter, who was a videogr

100%|██████████| 1/1 [00:00<00:00, 20.40it/s]


Without Watermark
Text after normalization:

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>


100%|██████████| 1/1 [00:00<00:00, 17.06it/s]


4.066388505592387
With Watermark
Text after normalization:

<pad> A new book titled "The Politician", titled "Are you out of your mind?" paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy. Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock.</s>

<pad> A new book titled "The Politician", titled "Are you out of your mind?" paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy. Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock.</s>


100%|██████████| 1/1 [00:00<00:00, 19.49it/s]


2.373134696859303
20
{'article_id': ['6fa5e02cf2bbd138da7ac530680843071b3957d8', '2326d69072e1821bc767584dc2c1274db536b982', '3bbfad251b66eec6ad978665d2bfbd60c12a9789', '02e60b6c1206a757fbd15ecbec840e270b490c0f', '1a69268d48af97a75ca677e9aefaaaa363e269a3', 'ca584d07782df66cf85d2c13526cc575005cf223', '8a1b3c68e2f98f5ef496f0ffa873f0709777f1aa', '6ac3db13498e3c2a5a9b58908c6015cbddff9ece', 'c84413ad2b705d85cde4d08591aeeeca8855fe4d', 'b19693ddbe3a374e877326f31ac5376746236183', '641f83379174ad75162641d7cb02819766d50011', '7d173d0978bfc75c6758e399b90b9f6c2dbad957', '30a2fb66e6747a28f554e01ff65b2b14466ad0f0', '49d472de3b6efb8ee70aa3f289d638deeb1470da', '9fab92819f39214f274dd087e8a7d014a137b715', '2dae8a82f56f0c95eda5f5c2c84a5e5059513466', '248b017bab494d691820106cfd0aea60b25b4a95', '5f4211155ff8acff61259dc10d68ea33bc28a6ec', '0ea2b1d30367fdb86d8e51f23a00db35b2025d49', '6c86910d655a49095ccb7e7ecc850bae6e221eed'], 'highlights_ppl': [11.51093422905635, 29.42712186181532, 30.700566902443356, 20.28

100%|██████████| 1/1 [00:00<00:00, 16.93it/s]


Without Watermark
Text after normalization:

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>

<pad> Daniela Pelaez is a graduate of the University of Florida, and is a graduate of the University of Florida.</s>


100%|██████████| 1/1 [00:00<00:00, 32.70it/s]


2.3679224856097196
With Watermark
Text after normalization:

<pad>Daniela is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Florida and is currently working for the University of Flo

100%|██████████| 1/1 [00:00<00:00, 17.54it/s]


1.3332686226295192
1
delta: 4.0
ARTICLE
Editor's note: Erica Williams is deputy director of Campus Progress, a project of the Center for American Progress, a Washington-based organization that describes itself as dedicated to progressive causes. She works to engage the millennial generation and communities such as people of color, women and people of faith in the political process. She can be found at ericawilliamsonline.com and on Twitter at @ericawilliamsdc. Erica Williams says the idea that young people don't care about the health care issue is wrong. (CNN) -- As Congress returns to Capitol Hill, back from a recess of contentious town halls on health care reform, one new voice has the potential to break through the seemingly endless deadlock: the voice of young Americans. Just Thursday, there were more than 880,000 Facebook status updates posted with the meme of a demand for health care reform, generated organically and spread virally from young people and other Facebook users acros

100%|██████████| 1/1 [00:00<00:00, 25.68it/s]


Without Watermark
Text after normalization:

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>

<pad> Erica Williams: Young Americans are often the primary voice of a moral imperative, she says. She says the debate is stalled and hemmed in by older Americans who are in a better economic position than young people. Williams: Young people are often the primary voice of a moral imperative, she says.</s>


100%|██████████| 1/1 [00:00<00:00, 24.40it/s]

16.153275564235432
With Watermark
Text after normalization:

<pad> Erica Williams: Young people don't care about health care, but it is an important part. Williams: The debate is an ongoing battle, because it is an important part. Williams: The debate is an ongoing battle, because it is an important part. Williams: The debate is an ongoing battle, because it is an important part.</s>

<pad> Erica Williams: Young people don't care about health care, but it is an important part. Williams: The debate is an ongoing battle, because it is an important part. Williams: The debate is an ongoing battle, because it is an important part. Williams: The debate is an ongoing battle, because it is an important part.</s>



100%|██████████| 1/1 [00:00<00:00, 24.53it/s]


8.876294466172201
2
delta: 4.0
ARTICLE
highlights
Anish Goel: The dilemma faced by the U.S. with Iraq could happen in Afghanistan .
He says: A weak Iraqi government with a poorly trained Army faces a fierce insurgency .
When U.S. troops leave Afghanistan, will government be able to stop Taliban, he asks .


100%|██████████| 1/1 [00:00<00:00, 26.14it/s]


Without Watermark
Text after normalization:

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>

<pad> The current chaos in Iraq is tragic in almost every way. In retrospect, it is easy to conclude Iraq was not nearly ready enough to assume control of its own security situation when the United States made the decision to withdraw forces in December 2011.</s>


100%|██████████| 1/1 [00:00<00:00, 24.36it/s]

2.4067782896530847
With Watermark





Text after normalization:

<pad> A growing conflict is happening across Iraq to make it look bleak.</s>

<pad> A growing conflict is happening across Iraq to make it look bleak.</s>


100%|██████████| 1/1 [00:00<00:00, 23.92it/s]


29.49145136651545
3
delta: 4.0
ARTICLE
One in every 13 people on the planet check Facebook the moment they wake up - leading to reports of Facebook addiction and social anxiety. And, according to one researcher, the fault lies with the little red notification icons and the numbers that litter the social network. Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers, from the site – and discovered that their removal improved a user’s enjoyment. Scroll down for video . Software expert Benjamin Grosser recently created a browser plug-in to automatically remove all metrics from Facebook, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red . The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics from the site, including likes, shares, comments, number of eve

100%|██████████| 1/1 [00:00<00:00, 22.67it/s]


Without Watermark
Text after normalization:

<pad>Software expert Benjamin Grosser recently created a plug-in to remove all metrics, including likes and share numbers. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message count and more. The vast number of metric is shown ringed in red. The browser plug-in can be installed to Chrome, Firefox and Safari. It automatically removes all metrics, including likes, shares, comments, number of events, group notifications, friend requests and numbers, message

100%|██████████| 1/1 [00:00<00:00, 14.86it/s]

1.166726540929002
With Watermark





Text after normalization:

<pad>A software expert created a browser plug-in to delete all metrics and add them to the site. Users who watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch people watch pe

100%|██████████| 1/1 [00:00<00:00, 12.93it/s]


1.211673155063428
4
delta: 4.0
ARTICLE
Real Madrid superstar Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders . Real Madrid legend Iker Casillas has once again stated his desire to play in the MLS once his glittering career at the Bernabeu is over. The Spain international has spent his entire career with Los Blancos after being handed his debut as a 16-year-old and his current contract expires in 2017. 'I have no problem saying that I would like to play in the US,' Casillas told The Wall Street Journal without putting any timescale on his departure. Real Madrid No 1 Iker Casillas can see himself playing in the US at the end of his career . The Spain international looks to the skies during Real Madrid's 2-1 defeat of Cordoba . The 33-year-old's contract at the Bernabeu expires at the end of the 2016-17 season . 'This has been a tough past year and a half, but it has made me tougher mentally. At the club level, Real Madrid winning lifted a huge 

100%|██████████| 1/1 [00:00<00:00, 21.05it/s]


Without Watermark
Text after normalization:

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>

<pad> Cristiano Ronaldo was sent off against Cordoba for punching, kicking and slapping defenders. Real Madrid legend Iker Casillas has said he would like to play in the US. The 33-year-old has spent his entire career with Los Blancos. Real Madrid legend Iker Casillas has said he would like to play in the US.</s>


100%|██████████| 1/1 [00:00<00:00, 28.05it/s]

1.5120370494723798
With Watermark





Text after normalization:

<pad>Cristiano Ronaldo sent off against Cordoba for punching, kicking, slapping defenders. Real Madrid's superstar is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The 33-year-old is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish club. The Spanish national defender is currently on contract with the Spanish 

100%|██████████| 1/1 [00:00<00:00, 17.07it/s]


1.5835788523239294
5
delta: 4.0
ARTICLE
Ronny Deila insists he does not care if Celtic edge the title by a single point – so long it's part of a domestic Treble. The Parkhead side find themselves embroiled in a title joust for the first time in three years, with second placed Aberdeen just two points behind going into the New Year. Expected to romp to four-in-a-row, the champions lost to Dundee United and only managed a home goalless draw with bottom club Ross County on Saturday. Celtic manager Ronny Deila insists he isn't concerned how his side win the Scottish Premiership . Conceding it would be 'unthinkable' to surrender the championship, Deila admitted he would settle for limping over the line by the slenderest of margins. 'If we win the Treble – yes,' he said. 'I don't care if it's one point or ten points as long as we win the Treble. 'It's all about trophies. No-one asks how many points do you win by, it's all about trophies. 'That's always been the case in history. You never rem

100%|██████████| 1/1 [00:00<00:00, 25.65it/s]


Without Watermark
Text after normalization:

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>

<pad> Celtic have been linked with a move for Ajax defender Stefano Denswil. Celtic have been linked with a move for Ajax defender Virgil van Dijk.</s>


100%|██████████| 1/1 [00:00<00:00, 16.69it/s]

25.691728459487038
With Watermark





Text after normalization:

<pad>Parkhead beat Ross County on Saturday after losing 10-0. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flight is due for next week. The Championship's top flig

100%|██████████| 1/1 [00:00<00:00, 14.59it/s]


1.4450042117432629
6
delta: 4.0
ARTICLE
A rare letter written by Napoleon Bonaparte in English will go to the auction block in France on Sunday. The letter could fetch close to $100,000 in the auction in the French town of Fontainebleau, south of Paris. Auction house Osenat describes it as the first letter the French emperor wrote in English on St. Helena, after he was defeated and exiled to the British island to live under military guard. Learning English behind the backs of his captors "was a sort of revenge, a historical revenge" for Napoleon, said Jean-Christophe Chataigner of Osenat. "He was imprisoned by the English... and he wants to continue to have a certain degree of independence, of freedom, and to be able to learn English without his jailers knowing it was a great motivation for him," Chataigner said. The auctioneer said Napoleon picked up English relatively quickly and well. "I think that French people who learn English today make lots more mistakes than Napoleon at the ti

100%|██████████| 1/1 [00:00<00:00, 17.83it/s]


Without Watermark
Text after normalization:

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>

<pad> Napoleon Bonaparte wrote in English in 1865, and learned it in two or three years.</s>


100%|██████████| 1/1 [00:00<00:00, 23.69it/s]

2.5025988416832337
With Watermark





Text after normalization:

<pad> Jean-Christophe Chataigner, an official of the eponymous French newspaper, is selling a rare letter to be sold to auction.</s>

<pad> Jean-Christophe Chataigner, an official of the eponymous French newspaper, is selling a rare letter to be sold to auction.</s>


100%|██████████| 1/1 [00:00<00:00, 23.17it/s]


4.4244454432724964
7
delta: 4.0
ARTICLE
By . Jennifer Newton for MailOnline . Rapper and reality TV star Joe Budden has appeared in court to face charges that he viciously beat his ex-girlfriend in a jealous rage outside a New York restaurant. The 33-year-old Pump It Up singer turned himself into police on Wednesday when he attended the 34th Precinct station house in Inwood, Manhattan with his lawyer present. He was later arraigned on charges of assault, grand larceny and robbery in Manhattan Criminal Court. Rapper Joe Budden, pictured who has appeared in court after being accused of viciously beating up his ex-girlfriend in a jealous rage . However, according to the Daily News, the Slaughterhouse hip hop crew member was later released after his mother posted the $10,000 bail needed to free him. He told them as he left the courthouse: 'It's good to be free. 'I’ve been portrayed worse by better,' he added. Career trajectory: Budden is a member of the Slaughterhouse hip hop crew and a st

100%|██████████| 1/1 [00:00<00:00, 20.98it/s]


Without Watermark
Text after normalization:

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>

<pad> Joe Budden, 33, was arrested Wednesday after he attended the 34th Precinct station house in Inwood, Manhattan. He was later arraigned on charges of assault, grand larceny and robbery. He was later released after his mother posted the $10,000 bail needed to free him. He was later released after his mother posted the $10,000 bail needed to free him.</s>


100%|██████████| 1/1 [00:00<00:00, 22.57it/s]

1.5526731110388385
With Watermark





Text after normalization:

<pad>Hip Hop star turned himself into police on Wednesday. The rapper was also ordered to sign a $15,000 bail, the Los Angeles Times says. The rapper was also ordered to sign a $15,000 arrest warrant to pay the $15,000 bail, the Los Angeles Times says. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is currently on bail with the Manhattan Criminal Court, the New York Daily News reports. The rapper is curren

100%|██████████| 1/1 [00:00<00:00, 16.20it/s]


1.8097889211658826
8
delta: 4.0
ARTICLE
Pet owners looking to launch the next online sensation or just longing for a new view of their dog's dashing and digging won't have to shop for long to find the perfect holiday gift. Wrap up a dog harness that holds any durable, wearable camera and watch Frisbee fetch, lazy lap naps and every memory in between come alive. GoPro Inc.'s Fetch dog harness fits over Fido's chest or back and holds the small, waterproof camera known for attaching to helmets, surfboards, cars and wrists to film rugged adventures. Sony, Garmin and Kurgo also make camera mounts for dogs. The device is among a legion of gifts that retailers have rounded up for pet wish lists this year. Narrowing it down is tough, but the harness tops the more unique options and creates footage that lasts. Doggone awesome: A dog wearing two GoPro cameras, one on his back and one on his chest is  held on by what is known as a Fetch dog harness . Bark up someone's else's tree: Thor, a French 

100%|██████████| 1/1 [00:00<00:00, 23.33it/s]


Without Watermark
Text after normalization:

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>

<pad> PetSmart is selling a dog harness that holds any durable, wearable camera and watches Frisbee fetch, lazy lap naps and every memory in between come alive. The camera mounts to helmets, surfboards, cars and wrists to film rugged adventures. The camera is among a legion of gifts that retailers have rounded up for pet wish lists this year.</s>


100%|██████████| 1/1 [00:00<00:00, 24.52it/s]

1.874236702766601
With Watermark
Text after normalization:

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>

<pad> Dogs are being sold at PetSmart. The canine camera is popular in 'Star Wars toys and clothing'</s>



100%|██████████| 1/1 [00:00<00:00, 28.68it/s]


31.274293580217943
9
delta: 4.0
ARTICLE
Two California teachers who were arrested on allegations of engaging in sexual encounters on the beach with their male high school students will not be charged with sexual assault. However, South Hills High School teachers Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were each charged yesterday with one misdemeanor count of contributing to the delinquency of a minor after allegedly providing alcohol to students. They could face a maximum sentence of one year in jail if convicted. Prosecutors said they found insufficient evidence to file sexual assault charges. Scroll down for video . Melody Lippert (left), 38, and Michelle Ghirelli (right), 30, both from Covina, were arrested last month over sexual assault allegations, but will not be charged for sexual assault. However they were each charged on one misdemeanor count of contributing to the delinquency of a minor . As the investigation continues, Lippert and Ghirelli could also

100%|██████████| 1/1 [00:00<00:00, 27.26it/s]


Without Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested yesterday. They were charged with conspiracy and contributing to the delinquency of a minor. They could face a maximum sentence of one year in jail if convicted.</s>


100%|██████████| 1/1 [00:00<00:00, 20.56it/s]


1.325813141716708
With Watermark
Text after normalization:

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement. They are currently on administrative leave, while Ghirelli would face additional charges.</s>

<pad> Melody Lippert, 38, and Michelle Ghirelli, 30, both from Covina, were arrested last month on suspicion of involvement. They are currently on administrative leave, while Ghirelli would face additional charges.</s>


100%|██████████| 1/1 [00:00<00:00, 22.64it/s]


2.861669828665478
10
delta: 4.0
ARTICLE
By . Christopher Stevens . PUBLISHED: . 17:25 EST, 29 September 2013 . | . UPDATED: . 17:37 EST, 29 September 2013 . Atlantis was buried beneath the waves by a cataclysm aeons ago. Addicts of Seventies’ TV will know the cataclysm had a name: Patrick Duffy. The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas, as the last survivor of that mythical submerged city. He had webbed feet and gills, and he could live off seaweed at the bottom of the ocean. Action packed: Atlantis promises to be a romp through Greek fables . The problem was, Duffy’s acting was so wooden that he floated. The Man From Atlantis was washed up after one series. Keen to avoid any confusion with this disastrous prototype, the producers of BBC1’s new family adventure serial, Atlantis, have changed everything. Atlantis isn’t an undersea kingdom, for a start — it’s a hot and dusty walled city on a Greek island. The hero Jason doesn’t 

100%|██████████| 1/1 [00:00<00:00, 21.13it/s]


Without Watermark
Text after normalization:

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>

<pad> The Man From Atlantis was a sci-fi drama featuring Duffy, future star of the Texas soap opera Dallas. The man from Atlantis was washed up after one series. The producers of the new family adventure serial have changed everything.</s>


100%|██████████| 1/1 [00:00<00:00, 26.98it/s]

1.7570570810328938
With Watermark
Text after normalization:

<pad>A comedy star is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is being introduced as his latest show. The comedy is


100%|██████████| 1/1 [00:00<00:00, 14.64it/s]


1.335833246190111
11
delta: 4.0
ARTICLE
Philadelphia, Pennsylvania (CNN) -- Paulette Beale shakes her head at the suggestion, then flashes a contagious smile. "It's still history," she says, to rebut the notion there could be less intensity for President Barack Obama in the African-American community the second time around. "The first history was that he won. The second history's that he won twice. So, it's not just about history the first time, you have to be concerned about the history for the next four years also. You can make history more than one time, you know." Her mother and father stand a few feet away, nodding approvingly. Paul and Altermese Beale founded Paul Beale's Florist 41 years ago. Paulette takes the lead now, but her parents are on hand helping most days in a shop that is an institution in the Ogontz Avenue area of North Philadelphia. "We love him," Altermese Beale says of Obama. "One of the proudest days of my life was the day he was elected." The Beales are determi

100%|██████████| 1/1 [00:00<00:00, 25.35it/s]


Without Watermark
Text after normalization:

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>

<pad> Paulette Beale says the Obama campaign is trying to get more African-American voters to vote.</s>


100%|██████████| 1/1 [00:00<00:00, 25.67it/s]

3.0043870665178445
With Watermark
Text after normalization:

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part.</s>

<pad> Paulette Beale: The idea is for people to have the proper identification, but it is an important part.</s>



100%|██████████| 1/1 [00:00<00:00, 23.98it/s]


14.212761155905449
12
delta: 4.0
ARTICLE
A Filipino maid has been left covered in burns after her Saudi boss's mother allegedly threw boiling water at her as punishment for not making coffee quick enough. The 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. Once there, she passed her cousin's phone number to hospital staff and urged them to call for help. Attack: This is the 23-year-old who was burned after her boss's mother allegedly threw boiling water at her . When she returned for a check-up, her cousin arrived to take her home. She is now in the care of the Philippine Embassy. Outraged, her cousin has shared images of 'Fatma', which is not her real name, on Facebook. The posts also claim Fatma was beaten by her employer and deprived of food. According to ABS-CBN News, she was scalded after her employer's mother in Riyadh, Saudi Arabia, became enraged at the time it was taking to brew coffee. A tussle ensued, which led to Fatma being

100%|██████████| 1/1 [00:00<00:00, 23.70it/s]


Without Watermark
Text after normalization:

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>

<pad> 23-year-old was not taken to hospital for hours, despite suffering scarring to her back and legs. She passed her cousin's phone number to hospital staff and urged them to call for help.</s>


100%|██████████| 1/1 [00:00<00:00, 30.08it/s]

1.412812075113653
With Watermark
Text after normalization:

<pad> 23-year-old is being treated for burns at home. The woman's boss's mother, 23-year-old Saudi, is being punished for not making coffee. The 23-year-old is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claimed she is beaten by her employer and is deprived of food.</s>

<pad> 23-year-old is being treated for burns at home. The woman's boss's mother, 23-year-old Saudi, is being punished for not making coffee. The 23-year-old is being treated for burns at home. The post is part of an attempt to spread awareness. The posts claimed she is beaten by her employer and is deprived of food.</s>



100%|██████████| 1/1 [00:00<00:00, 26.04it/s]


4.3940566302993425
13
delta: 4.0
ARTICLE
By . Annabel Fenwick Elliott . A woman has claimed that Covergirl's two-step Bombshell Volume mascara is so hard to get off, it caused her to lose a significant clump of eyelashes in trying. 'I was wiping downward with the cotton ball and noticed a chunk of my eyelashes fell into the palm of my hand', Amy Schavolt, 21, based in West Palm Beach, Florida, tells ABC15. Ms Schavolt was initially impressed with the dramatic results she achieved from the mascara, which Covergirl boasts delivers 'ten times more noticeable lashes vs. bare lashes'. Noticeable indeed! Amy Schavolt claims Covergirl's Bombshell Volume mascara was so hard to get off - even with make-up remover - that she wrenched out a sizable clump of her lashes in trying (pictured) Product fail: Ms Schavolt (pictured) says she was initially impressed with the bold effects of the mascara, but it quickly turned into a 'clumpy, gooey mess' before it caused her lashes to fall out . 'I had a co

100%|██████████| 1/1 [00:00<00:00, 24.93it/s]


Without Watermark
Text after normalization:

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>

<pad> Amy Schavolt claims Covergirl's two-step mascara is so hard to get off. It caused her to lose a significant clump of eyelashes in trying.</s>


100%|██████████| 1/1 [00:00<00:00, 19.83it/s]

1.87450304598904
With Watermark





Text after normalization:

<pad>Amy Schavolt, 21, based in West Palm Beach, Florida, claims it was so hard to get off. The mascara quickly turned into a clump, gooey mess but it eventually turned into a clump. The woman, 23, claims it was the second attempt on the brand. The reviewer, who claims it was the second attempt on the brand, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was the second attempt on the brand. The reviewer, 23, claims it was

100%|██████████| 1/1 [00:00<00:00, 15.71it/s]


1.586991606690575
14
delta: 4.0
ARTICLE
He still has the power to excite, exhilarate — and court controversy. Picasso’s long-awaited, vastly over-budget museum reopens today after five years of renovation. Housed in the Marais’s 17th-century Hotel Sale — the former home of a salt-tax collector — it’s an enormous, elegant space, and Picasso’s work fills it to the rafters. The man behind the magic: Some of Pablo Picasso's finest works are found in the Paris museum of his art . I had a sneak preview. There are 5,000 pieces, illustrating the breadth of his repertoire, from early paintings to found-object sculptures, collages and shocking nudes. The collection was given to Paris in 1979 — in lieu of inheritance tax — by Picasso’s heirs. A visitor stands in front of the painting 'Woman Throwing a Stone' and the sculptures (L-R) 'Head of a Woman', 'Bust of a Woman' and 'Bust of a Woman' by Spanish painter Pablo Picasso (1881-1973) Picasso's painting 'Gustave Coquiot' was one of 400 pieces ope

100%|██████████| 1/1 [00:00<00:00, 18.16it/s]


Without Watermark
Text after normalization:

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>

<pad> Picasso's long-awaited, vastly over-budget museum reopens today. The museum is an enormous, elegant space, and Picasso's work fills it to the rafters. Picasso's work fills it to the rafters.</s>


100%|██████████| 1/1 [00:00<00:00, 24.94it/s]

1.3264452522520611
With Watermark
Text after normalization:

<pad>Picasso has taken on 5,000 art works. Picasso has painted many sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures. Picasso has taken on various sculptures


100%|██████████| 1/1 [00:00<00:00, 15.43it/s]


1.4063510064198035
15
delta: 4.0
ARTICLE
highlights
Malaysia's opposition leader calls on supporters to attend rally on Wednesday .
Anwar Ibrahim says Sunday's poll marred by unprecedented fraud .
Election was won by ruling coalition, extending its 56 years in power .
A government spokesperson says allegations of fraud unsubstantiated .


100%|██████████| 1/1 [00:00<00:00, 26.67it/s]


Without Watermark
Text after normalization:

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>

<pad> Malaysia's opposition leader says he is "shocked" by the election results and says he is "shocked" by the results.</s>


100%|██████████| 1/1 [00:00<00:00, 28.78it/s]


2.6630211599027658
With Watermark
Text after normalization:

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>

<pad> A Malaysian government official praised Anwar, who is backing an election boycott.</s>


100%|██████████| 1/1 [00:00<00:00, 27.56it/s]


11.561721666378702
16
delta: 4.0
ARTICLE
The legal high known as ‘hippy crack’ was used by one in 16 young people, according to new figures. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000  people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is the second most popular drug among young people. Legal high: Discarded Nitrous Oxide canisters and balloons used for inhaling abandoned in campsites at the Glastonbury Festival . On a downer: Illegal drug use has fallen, particularly for cocaine and ecstasy, latest figures show . Nitrous oxide is inhaled to make people feel euphoric and relaxed - but drug experts warn it can cause death on first time use through suffocation. For the first time the 2012-13 Crime Survey for England and Wales included questions about the use of the drug. It is often sold to recreational users in balloons in nightclubs and at 

100%|██████████| 1/1 [00:00<00:00, 28.46it/s]


Without Watermark
Text after normalization:

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>

<pad> The legal high known as laughing gas was used by one in 16 young people. The first data to be released on the use of nitrous oxide reveals more than six per cent, or 350,000 people aged 16-24 year used the drug last year. Better known as laughing gas, it is a regular fixture as a legal party and festival drug and is second most popular drug among young people.</s>


100%|██████████| 1/1 [00:00<00:00, 19.12it/s]


1.2609500387270791
With Watermark
Text after normalization:

<pad>Drug used by 16 young people last year is at record low. The drug is often sold for drinking, smoking, or smoking, according to new figures. The home office is worried it can cause death on first time use. The survey was released last year. The survey was released last year. The home office is worried it can cause death on first time use. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year. The survey was released last year

100%|██████████| 1/1 [00:00<00:00, 15.41it/s]


1.8335983118143855
17
delta: 4.0
ARTICLE
A regiment of soldiers from a pint-sized battalion has been honoured 100 years after they volunteered for service in World War One. The Birkenhead Bantams were formed in November 1914 after the Wirral town's MP Alfred Bigland successfully petitioned for men of short stature to be allowed to enlist. Bigland's petition to the War Office led to the minimum height for soldiers in the Army to be lowered from five foot three inches, to just five foot. The pint-sized battalion has been honoured 100 years after they volunteered for service in World War One . As a result thousands of men who had been refused the chance to serve their country, were able to fight in the Great War. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including at Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country. A pl

100%|██████████| 1/1 [00:00<00:00, 19.33it/s]


Without Watermark
Text after normalization:

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>

<pad> Birkenhead Bantams were formed in November 1914 after the Wirral town MP Alfred Bigland petitioned for men of short stature to be allowed to enlist. The Bantams, who were renamed the 15th and 16th Battalions, Cheshire Regiment, served in bloody battles including Ypres and The Somme. By the end of the war in 1918, more than 900 officers and soldiers from the regiment had lost their lives fighting for Queen and country.</s>


100%|██████████| 1/1 [00:00<00:00, 25.41it/s]

1.157230604239093
With Watermark





Text after normalization:

<pad> Birkenhead Bantams formed in November 1914, renamed the 15th and 16th battalions, Cheshire Regiment, during WW1.</s>

<pad> Birkenhead Bantams formed in November 1914, renamed the 15th and 16th battalions, Cheshire Regiment, during WW1.</s>


100%|██████████| 1/1 [00:00<00:00, 26.80it/s]


2.50635976600606
18
delta: 4.0
ARTICLE
Sitting in the casualty department of one of the best-known hospitals in Britain, I can’t help but notice it’s packed to the rafters. Yet it’s not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I’m in deep shock and on the point of passing out in agony. This is by far the worst pain I’ve ever suffered. I know my upper arm is broken — I fell off a step and heard the crack — and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: ‘Hope you’ve set aside the whole day. I’ve been waiting ages!’ Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital . As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she’s come for — a headache, no less. ‘No point calling the GP. You can never ge

100%|██████████| 1/1 [00:00<00:00, 22.66it/s]


Without Watermark
Text after normalization:

<pad> The Royal Free Hospital in North London looks so strained. It's packed to the rafters. But it's not a hectic Saturday night, but a Monday lunchtime at the beginning of December and everyone seems sober. Despite this, I struggle to find a seat, even though it is obvious I'm in deep shock and on the point of passing out in agony. This is by far the worst pain I've ever suffered. I know my upper arm is broken - I fell off a step and heard the crack - and fully expect at least a four-hour wait. A woman having a cigarette outside warned me as I arrived: 'Hope you've set aside the whole day. I've been waiting ages!' Jenni Murray (pictured) fell off a step and broke her arm, she was treated at North London's Royal Free Hospital. As I look around the crowd, I see little evidence of painful injury. I ask the woman next to me what she's come for — a headache, no less. 'No point calling the GP. You can never get seen,' she says. It's the same sto

100%|██████████| 1/1 [00:00<00:00, 17.66it/s]


1.1526999235278335
With Watermark
Text after normalization:

<pad>A patient is being treated at North London hospital after her death last night. A nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an X-ray. The nurse is sent for an

100%|██████████| 1/1 [00:00<00:00, 13.96it/s]


1.5343729184498274
19
delta: 4.0
ARTICLE
Washington (CNN)  -- A new book about former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy -- all in the name of trying to win the presidency. In "The Politician," former Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. "The Politician" went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008. The plan was funded by two wealthy benefactors, the late trial lawyer Fred Baron and banking heir widow Bunny Mellon, but Mellon was unaware that her money was being used for the mistress. A federal grand jury is investigating payments the former senator's campaign and supporters made to Rielle Hunter, who was a videogr

100%|██████████| 1/1 [00:00<00:00, 24.45it/s]


Without Watermark
Text after normalization:

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>

<pad> Edwards' aide Andrew Young says he is the point person in arranging rendezvous for Edwards and Hunter and says he agreed to Edwards' request. Young says he agreed to Edwards' request and says he was the father of the child.</s>


100%|██████████| 1/1 [00:00<00:00, 24.50it/s]

4.066388505592387
With Watermark
Text after normalization:

<pad> A new book titled 'The Politician,' paints him as a cold, calculating and reckless politician willing to deny fathering a daughter. In The Politician, former Sen. John Edwards paints him as a cold, calculating and reckless politician willing to deny fathering a daughter, risking his marriage and putting the Democratic Party in potential political jeopardy. Edwards' aide Andrew Young details his efforts to conceal an ongoing extra-marital affair and the birth of a child out-of-wedlock. <unk>?. '' The Politician' went on sale Saturday. Young described an elaborate plan that allowed Edwards to maintain a mistress while he sought the Democratic presidential nomination in 2008.. Edwards denied that he was the daughter's father for more than a year, saying the affair was over before Hunter became pregnant. Edwards' widow Elizabeth Edwards, aged 17, is currently married. He is currently married. He is currently married. He is c


100%|██████████| 1/1 [00:00<00:00, 15.43it/s]

2.2581710771420433
20
{'article_id': ['6fa5e02cf2bbd138da7ac530680843071b3957d8', '2326d69072e1821bc767584dc2c1274db536b982', '3bbfad251b66eec6ad978665d2bfbd60c12a9789', '02e60b6c1206a757fbd15ecbec840e270b490c0f', '1a69268d48af97a75ca677e9aefaaaa363e269a3', 'ca584d07782df66cf85d2c13526cc575005cf223', '8a1b3c68e2f98f5ef496f0ffa873f0709777f1aa', '6ac3db13498e3c2a5a9b58908c6015cbddff9ece', 'c84413ad2b705d85cde4d08591aeeeca8855fe4d', 'b19693ddbe3a374e877326f31ac5376746236183', '641f83379174ad75162641d7cb02819766d50011', '7d173d0978bfc75c6758e399b90b9f6c2dbad957', '30a2fb66e6747a28f554e01ff65b2b14466ad0f0', '49d472de3b6efb8ee70aa3f289d638deeb1470da', '9fab92819f39214f274dd087e8a7d014a137b715', '2dae8a82f56f0c95eda5f5c2c84a5e5059513466', '248b017bab494d691820106cfd0aea60b25b4a95', '5f4211155ff8acff61259dc10d68ea33bc28a6ec', '0ea2b1d30367fdb86d8e51f23a00db35b2025d49', '6c86910d655a49095ccb7e7ecc850bae6e221eed'], 'highlights_ppl': [11.51093422905635, 29.42712186181532, 30.700566902443356, 20.2


