In [1]:
import os
from datetime import datetime
import logging

import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from simpletransformers.seq2seq import Seq2SeqModel, Seq2SeqArgs
from simpletransformers.classification import ClassificationModel, ClassificationArgs
#from simpletransformers.t5 import T5Model, T5Args
import warnings
import pandas as pd
import os
from datetime import datetime
import logging
import glob
from pathlib import Path
import csv

In [2]:
import spacy
from spacy.language import Language
from spacy import displacy
import time

@Language.component("newsent")
def set_custom_boundaries(doc):
    for token in doc[:-1]:
        #print(token.text, token.text in ("’s", "'s"))
        if token.text.upper() in (";", "--", "\n\n", "\n", "QUARTERLY", "STORY", "\n\n\n\n", "\n\n\n"):
            #print("Detected:", token.text)
            doc[token.i].is_sent_start = True
    return doc

#spacy.require_gpu()
nlp = spacy.load("../../Summary/NER/RelateEntity/train/model-best-local")
nlp.add_pipe('sentencizer')
nlp.add_pipe('newsent', name="newsent", last=True)



<function __main__.set_custom_boundaries(doc)>

In [3]:
def getSentences(inputfile, nlp, text=None):
    if(not text):
        with open(inputfile, 'r', encoding="utf-8") as f:
            text = f.read()

    doc = nlp(text)
    sentences = [str(sent).strip() for sent in doc.sents]

    #print(len(sentences))
    return(sentences)

In [4]:
def clean_unnecessary_spaces(out_string):
    if not isinstance(out_string, str):
        warnings.warn(f">>> {out_string} <<< is not a string.")
        out_string = str(out_string)
    out_string = (
        out_string.replace(" .", ".")
        .replace(" ?", "?")
        .replace(" !", "!")
        .replace(" ,", ",")
        .replace(" ' ", "'")
        .replace(" n't", "n't")
        .replace(" 'm", "'m")
        .replace(" 's", "'s")
        .replace(" 've", "'ve")
        .replace(" 're", "'re")
    )
    return out_string

In [5]:
import hashlib
def dedupList(flist):
    hashList = list()
    newList = list()
    for item in flist:
        #print(item.upper())
        hash_object = hashlib.sha1(item.upper().encode("UTF-8"))
        hex_dig = hash_object.hexdigest()
        if hex_dig not in hashList:
            hashList.append(hex_dig)
            #print(item, hex_dig)
            newList.append(item)
    return(newList)

In [6]:
def isPhrasingRqd(pfile):
    with open(pfile, 'r', encoding = "utf-8") as fp:
        for l_no, line in enumerate(fp):
            if "NOPAD***" in line:
                return False
    return True

In [7]:
devDir = "../../Summary/DATA/PARAPHRASE/Dev"
trainDir = "../../Summary/DATA/PARAPHRASE/Train"
testDir = "../../Summary/DATA/PARAPHRASE/Test"

spdevDir = "../../Summary/DATA/SENTPAIR/Dev"
sptrainDir = "../../Summary/DATA/SENTPAIR/Train"
sptestDir = "../../Summary/DATA/SENTPAIR/Test"

rplStr = ["PG*** ", "ED*** ", "SCHQ*** ", "SCBQ*** ", "SCBF*** ", "SCHF*** ", "SCG*** ", "GF*** ", "GQ*** ", "SC*** ", "NOPAD*** "]

def preProcessSent(line):
    for s in rplStr:
        line = line.replace(s, "")
    if("TBLST***" in line or "TBLET***" in line or "CS***" in line or "@@@" in line or line == "\n" or line == "\n\n" or "https://finance.yahoo.com" in line):
        return(line, False)
    line = line.replace("\n", "")
    if(line == None or line == ""):
        return(line, False)
    line = line.replace("$(0.", "$(.")
    line = line.replace("($0.", "($.")
    line = line.replace("$0.", "$.")
    line = line.replace("$ 0.", "$.")
    return(line, True)

def createTrainingData(inputDir, ppModel=None, writetofile=False):
    files = glob.glob(inputDir+"/*_EP_YH.txt")
    #print(files)
    #print(inputDir)
    if(len(files) > 0):
        for file in (files):
            print("Input file " + file)
            basefile = os.path.basename(file)
            inputfile = os.path.splitext(basefile)[0]
            #print(inputfile)
            if not ppModel:
                outfilePath = inputfile + "_phrase.tsv"
                outfilePath = inputDir + "/" + outfilePath
                print("Phrase file " + outfilePath)
                outfile = Path(outfilePath)
                if outfile.is_file():
                    print("Phrase file " + str(outfile) + " already exists")
                    continue
                with open(outfile, "w", encoding = "utf-8") as of:
                    #cnt = 0
                    of.write("filename\tSentence1\tSentence2\n")
                    #with open(file, encoding="utf-8") as f:
                    sentences = getSentences(file, nlp)
                    #line = f.readline()
                    for line in sentences:
                        for s in rplStr:
                            line = line.replace(s, "")
                        if("TBLST***" in line or "TBLET***" in line or "CS***" in line or "@@@" in line or line == "\n" or line == "\n\n" or "https://finance.yahoo.com" in line):
                            #line = f.readline()
                            continue
                        line = line.replace("\n", "")
                        if(line == ""):
                            continue
                        #print(line)
                        #cnt = cnt + 1
                        phraseLine = line
                        of.write(inputfile+"\t"+line+"\t"+line+"\n")
                        #of.write(line + "\n")
                        #line = f.readline()
            else:
                f = None
                if(writetofile):
                    basefile = os.path.basename(file)
                    inputfile = os.path.splitext(basefile)[0]
                    print("Input file " + inputfile)
                    print(inputDir)
                    outfilePath = inputfile + "_sp.tsv"
                    outfilePath = inputDir + "/" + outfilePath
                    print("SP file " + outfilePath)
                    outfile = Path(outfilePath)
                    if outfile.is_file():
                        print("SP file " + str(outfile) + " already exists")
                        continue
                    f = open(outfile, "w", encoding = "utf-8")
                    f.write("filename\tSentence1\tSentence2\tTarget\n")
                sentences = getSentences(file, nlp)
                #line = f.readline()
                for line in sentences:
                    for s in rplStr:
                        line = line.replace(s, "")
                    if("TBLST***" in line or "TBLET***" in line or "CS***" in line or "@@@" in line or line == "\n" or line == "\n\n" or "https://finance.yahoo.com" in line):
                        #line = f.readline()
                        continue
                    line = line.replace("\n", "")
                    if(line == None or line == ""):
                        continue
                    #print(line)
                    #cnt = cnt + 1
                    #phraseLine = line
                    line = line.replace("$(0.", "$(.")
                    line = line.replace("($0.", "($.")
                    line = line.replace("$0.", "$.")
                    line = line.replace("$ 0.", "$.")
                    inp = [line]
                    #print(inp)
                    if(not writetofile):
                        print("ORIGINAL SENTENCE\n",line)
                        print("PREDICTED PARAPHRASE\n")
                        predicted = ppModel.predict(inp)
                        print(predicted)
                        print("\n")
                    else:
                        predicted = ppModel.predict(inp)
                        if(predicted):
                            predicted = dedupList(predicted[0])
                            #print("ORIGINAL SENTENCE\n",line)
                            #print("PREDICTED PARAPHRASE\n")
                            #print(predicted)
                            for pred in predicted:
                                if(f):
                                    f.write(inputfile+"\t"+line+"\t"+pred+"\t"+"1"+"\n")
                                    print(inputfile+"\t"+line+"\t"+pred+"\t"+"1")
                            #print("\n")
                if(f):
                    f.close()

In [8]:
def predictPhrase(line, ppModel, spModel, logf=None):
    nline, isProcess = preProcessSent(line)
    if not isProcess:
        return(nline)
    inp = [nline]
    print("\n\nORIGINAL LINE:\n", line)
    if(logf):
        logf.write("\n\nORIGINAL LINE:\n" + line)
    predicted = ppModel.predict(inp)
    if(predicted):
        predicted = dedupList(predicted[0])
        print("PREDICTED PARAPHRASE:\n", predicted)
        if(logf):
            logf.write("\nPREDICTED PARAPHRASE:\n" + str(predicted))
        for pred in predicted:
            spinp = [[nline, pred]]
            #print(spinp)
            spred, output = spModel.predict(spinp)
            print("PREDICTED SENTENCE PAIR:\n", spinp, spred)
            if(logf):
                logf.write("\nPREDICTED SENTENCE PAIR:\n" + str(spinp) + str(spred))
            if(spred[0] == 1):
                #print(spinp, spred)
                if(pred[0] == " "):
                    pred = pred[1:]
                print("PREDICTED SENTENCE:\n", pred)
                if(logf):
                    logf.write("\nPREDICTED SENTENCE:\n" + str(pred))
                return(pred)
    print("PREDICTED SENTENCE:\n", nline)
    if(logf):
        logf.write("\nPREDICTED SENTENCE:\n" + nline)
    return(nline)

In [9]:
devDataFile = "../../Summary/DATA/PARAPHRASE/Dev/dev.tsv"
trainDataFile = "../../Summary/DATA/PARAPHRASE/Train/train.tsv"

devSPFile = "../../Summary/DATA/SENTPAIR/Dev/dev.tsv"
trainSPFile = "../../Summary/DATA/SENTPAIR/Train/train.tsv"

def writeTrainingData(writeFile, writeDir):
    if("PARAPHRASE" in writeDir):
        files = glob.glob(writeDir+"/*_phrase.tsv")
    else:
        files = glob.glob(writeDir+"/*_sp.tsv")
    print(files)
    frames = list()

    if(len(files) > 0):
        for file in files:
            df = pd.read_csv(file, sep="\t", encoding = "utf-8").astype(str)
            df = df.dropna()
            df = df[df['Sentence1'].notna()]
            #print(df)
            frames.append(df)
    result = pd.concat(frames)
    print(result)
    result.to_csv(writeFile, sep='\t', index=False, header=True)

In [21]:
createTrainingData(devDir)
writeTrainingData(devDataFile, devDir)

Input file ../../Summary/DATA/PARAPHRASE/Dev\APPN_2023-02-16_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Dev/APPN_2023-02-16_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Dev\APPN_2023-02-16_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Dev\BILL_2023-02-02_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Dev/BILL_2023-02-02_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Dev\BILL_2023-02-02_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Dev\CFLT_2022-11-02_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Dev/CFLT_2022-11-02_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Dev\CFLT_2022-11-02_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Dev\CRWD_2022-11-29_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Dev/CRWD_2022-11-29_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Dev\CRWD_2022-11-29_EP_YH_phrase.tsv already exists
Input file ../../Summary

In [22]:
createTrainingData(trainDir)
writeTrainingData(trainDataFile, trainDir)

Input file ../../Summary/DATA/PARAPHRASE/Train\APPN_2022-05-06_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Train/APPN_2022-05-06_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Train\APPN_2022-05-06_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Train\APPN_2022-08-04_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Train/APPN_2022-08-04_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Train\APPN_2022-08-04_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Train\APPN_2022-11-03_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Train/APPN_2022-11-03_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Train\APPN_2022-11-03_EP_YH_phrase.tsv already exists
Input file ../../Summary/DATA/PARAPHRASE/Train\APPN_2023-05-09_EP_YH.txt
Phrase file ../../Summary/DATA/PARAPHRASE/Train/APPN_2023-05-09_EP_YH_phrase.tsv
Phrase file ..\..\Summary\DATA\PARAPHRASE\Train\APPN_2023-05-09_EP_YH_phrase.tsv already exists


In [23]:
train_df = pd.read_csv(trainDataFile, sep="\t", encoding = "utf-8").astype(str)
eval_df = pd.read_csv(devDataFile, sep="\t", encoding = "utf-8").astype(str)

train_df = train_df.rename(
    columns={"Sentence1": "input_text", "Sentence2": "target_text"}
)
eval_df = eval_df.rename(
    columns={"Sentence1": "input_text", "Sentence2": "target_text"}
)

train_df = train_df[["input_text", "target_text"]]
eval_df = eval_df[["input_text", "target_text"]]

train_df["prefix"] = "paraphrase"
train_df = train_df[["prefix", "input_text", "target_text"]]

eval_df["prefix"] = "paraphrase"
eval_df = eval_df[["prefix", "input_text", "target_text"]]

train_df = train_df.dropna()
train_df = train_df[train_df['input_text'].notna()]

eval_df = eval_df.dropna()
eval_df = eval_df[eval_df['input_text'].notna()]

train_df["input_text"] = train_df["input_text"].apply(clean_unnecessary_spaces)
train_df["target_text"] = train_df["target_text"].apply(clean_unnecessary_spaces)
print("TRAIN DATA ..............")
print(train_df)

eval_df["input_text"] = eval_df["input_text"].apply(clean_unnecessary_spaces)
eval_df["target_text"] = eval_df["target_text"].apply(clean_unnecessary_spaces)
print("EVAL DATA ..............")
print(eval_df)

TRAIN DATA ..............
          prefix                                         input_text  \
0     paraphrase                                Appian Corporation.   
1     paraphrase   First quarter cloud subscription revenue incr...   
2     paraphrase         MCLEAN, Va., May 05, 2022 (GLOBE NEWSWIRE)   
3     paraphrase  -- Appian (Nasdaq: APPN) today announced finan...   
4     paraphrase   “We exceeded guidance and grew cloud subscrip...   
...          ...                                                ...   
1956  paraphrase  Accordingly, we are required to add back the n...   
1957  paraphrase  Additionally, we include the anti-dilutive imp...   
1958  paraphrase  We have not reconciled our expectations to non...   
1959  paraphrase  For those reasons, we are also unable to addre...   
1960  paraphrase  Accordingly, a reconciliation for the guidance...   

                                            target_text  
0                                   Appian Corporation.  
1    

In [10]:
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.ERROR)

In [11]:
def count_matches(labels, preds):
    print(labels)
    print(preds)
    return sum(
        [
            1 if label == pred else 0
            for label, pred in zip(labels, preds)
        ]
    )


In [14]:
model_args = Seq2SeqArgs()
#model_args = T5Args()
model_args.do_sample = True
model_args.train_batch_size = 4
model_args.use_multiprocessing = False
model_args.num_train_epochs = 2
#model_args.learning_rate = 5e-5
#model_args.no_save = True

model_args.eval_batch_size = 4
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True
model_args.evaluate_during_training_steps = 50

model_args.overwrite_output_dir = True
model_args.reprocess_input_data = True
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.save_steps = -1

model_args.max_length = 64
#model_args.max_seq_length = 32
model_args.num_return_sequences = 10
model_args.top_k = 50
model_args.top_p = 0.95
model_args.fp16 = False
#model_args.num_beams = None

model_args.use_early_stopping = False
#model_args.early_stopping_delta = 0.01
#model_args.early_stopping_metric = "mcc"
#model_args.early_stopping_metric_minimize = False
#model_args.early_stopping_patience = 5
#model_args.evaluate_during_training_steps = 500
#model_args.wandb_project =  "visualization-demo"

model = Seq2SeqModel(
    encoder_decoder_type="bart",
    encoder_decoder_name="facebook/bart-base",
    args=model_args,
    use_cuda=True,
)

#model = T5Model("t5", "t5-base", args=model_args, use_cuda=True)

# Train the model
model.train_model(
    train_df, eval_data=eval_df, matches=count_matches
)
results = model.eval_model(eval_df)

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/1505 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model: Training started


Epoch:   0%|          | 0/2 [00:00<?, ?it/s]

Running Epoch 0 of 2:   0%|          | 0/377 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/627 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/157 [00:00<?, ?it/s]

INFO:simpletransformers.seq2seq.seq2seq_model:{'eval_loss': 3.218969947972875, 'matches': 0}
INFO:simpletransformers.seq2seq.seq2seq_model:Saving model into outputs/best_model


[' Appian Corporation.', ' Cloud subscription revenue is $65.8 million in fourth quarter.', 'Cloud subscription revenue is $236.9 million for full year.', ' MCLEAN, Va., Feb. 16, 2023 (GLOBE NEWSWIRE)', '-- Appian (Nasdaq: APPN) today announced financial results for the fourth quarter and full year ended December 31, 2022.', ' Fourth Quarter 2022 Financial Highlights:.', ' Cloud subscription revenue is $65.8 million.', 'Total subscriptions revenue, is $93.2 million.', 'Professional services revenue is $32.5 million.', 'Total revenue is $125.8 million.', 'Cloud subscription revenue retention rate is 115% as of December 31, 2022.', ' GAAP operating loss is $(40.6) million.', 'Non-GAAP operating loss is $(26.8) million.', ' GAAP net loss is $(34.4) million.', 'GAAP net loss per share is $(0.47).', 'Non-GAAP net loss is $(20.6) million.', 'Non-GAAP net loss per share is $(0.28).', 'Invalid Sentence.', 'Invalid Sentence.', 'We do not forecast foreign exchange rate movements.', ' Adjusted EB

INFO:simpletransformers.seq2seq.seq2seq_utils: Creating features from dataset file at cache_dir/


  0%|          | 0/627 [00:00<?, ?it/s]

Generating outputs:   0%|          | 0/157 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [12]:
model_args = Seq2SeqArgs()
#model_args = T5Args()
model_args.do_sample = True
#model_args.dataset_class = False
#model_args.train_batch_size = 4
model_args.train_batch_size = 8
model_args.use_multiprocessing = False
model_args.num_train_epochs = 2
#model_args.learning_rate = 5e-5
#model_args.no_save = True

#model_args.eval_batch_size = 4
model_args.eval_batch_size = 16
model_args.evaluate_generated_text = True
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True
model_args.evaluate_during_training_steps = 50

model_args.overwrite_output_dir = True
model_args.reprocess_input_data = True
model_args.save_eval_checkpoints = False
model_args.save_model_every_epoch = False
model_args.save_steps = -1

model_args.max_length = 64
#model_args.max_seq_length = 32
model_args.num_return_sequences = 10
#model_args.top_k = 50
model_args.top_k = 30
#model_args.top_p = 0.95
model_args.top_p = 0.97
model_args.fp16 = False
#model_args.num_beams = None

model_args.use_early_stopping = False
#model_args.early_stopping_delta = 0.01
#model_args.early_stopping_metric = "mcc"
#model_args.early_stopping_metric_minimize = False
#model_args.early_stopping_patience = 5
#model_args.evaluate_during_training_steps = 500
model_args.wandb_project =  "visualization-demo"

In [13]:
model = Seq2SeqModel(
    encoder_decoder_type="bart", encoder_decoder_name="outputs", args=model_args
)

print(
    model.predict(
        [
            "Non-GAAP gross profit was $225.4 million, representing an 86.7% non-GAAP gross margin, compared to $133.5 million, or a 85.3% non-GAAP gross margin, in the second quarter of fiscal 2022"
        ]
    )
)

outputs\model_args.json


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.', 'Non-GAAP gross profit is $225.4 million and non-GAap gross margin is 86.7%.']]


In [10]:
createTrainingData(testDir, model)

Input file ../../Summary/DATA/PARAPHRASE/Test\ZS_2022-05-26_EP_YH.txt
Detected: 

Detected: 

Detected: 

Detected: 

Detected: 



Detected: 

Detected: 



Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: --
Detected: 


Detected: 

Detected: 



Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: Story
Detected: 



Detected: 


Detected: 


Detected: 


Detected: 


Detected: 



Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


Detected: 



Detected: 


Detected: 



Detected: 

Detected: 

Detected: 

Detected: 

Detected: 




Detected: 


Detected: 


Detected: 


Detected: 


Detected: 


71
ORIGINAL SENTENCE
  Zscaler, Inc.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.', ' Zscaler, Inc.']]


ORIGINAL SENTENCE
  Third Quarter Highlights.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.', ' Third Quarter Highlights.']]


ORIGINAL SENTENCE
  Revenue grows 63% year-over-year to $286.8 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Revenue is $286.8 million.', ' Revenue grows 63% year-over-year to $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue grows 63% year-over-year to $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.']]


ORIGINAL SENTENCE
  Calculated billings grows 54% year-over-year to $345.6 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.', ' Calculated billings is $345.6 million.']]


ORIGINAL SENTENCE
  Deferred revenue grows 65% year-over-year to $818.7 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.', ' Deferred revenue is $818.7 million.']]


ORIGINAL SENTENCE
  GAAP net loss of $101.4 million compared to GAAP net loss of $58.5 million on a year-over-year basis.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.']]


ORIGINAL SENTENCE
  Non-GAAP net income of $24.7 million compared to non-GAAP net income of $21.4 million on a year-over-year basis.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.', ' Non-GAAP net income is $24.7 million.']]


ORIGINAL SENTENCE
  SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)', ' SAN JOSE, Calif., May 26, 2022 (GLOBE NEWSWIRE)']]


ORIGINAL SENTENCE
 -- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (Nasdaq: ZS), the leader in cloud security, today announced financial results for its third quarter of fiscal year 2022, ended April 30, 2022.', '-- Zscaler, Inc. (N

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitments to Zscaler.', ' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitments to Zscaler.', ' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitments to Zscaler.', ' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitments to Zscaler.', ' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitments to Zscaler.', ' "Enterprises continue to consolidate point products in favor of our integrated Zero Trust security platform, resulting in larger, multi-year commitment

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth in the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, while also generating a strong free cash flow margin of 15% for the third quarter.', 'We delivered 63% revenue growth and 54% billings growth, w

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are also raising our fiscal year guidance on all financial metrics,” said Jay Chaudhry, Chairman and CEO of Zscaler. “', 'As a result of increasing demand, we are al

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards network and security transformation.”.', 'Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards network and security transformation.”.', 'Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards network and security transformation.”.', 'Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards network and security transformation.”.', 'Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards network and security transformation.”.', 'Demanding enterprises look to Zscaler as their strategic partner of choice in their Zero Trust journey, as the world races towards n

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.', ' Third Quarter Fiscal 2022 Financial Highlights.']]


ORIGINAL SENTENCE
  Revenue: $286.8 million, an increase of 63% year-over-year.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.', ' Revenue is $286.8 million.']]


ORIGINAL SENTENCE
  Income (loss) from operations : GAAP loss from operations was $86.6 million, or 30% of total revenue, compared to $43.9 million, or 25% of total revenue, in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.', ' GAAP loss from operations is $86.6 million.']]


ORIGINAL SENTENCE
 Non-GAAP income from operations was $27.2 million, or 9% of total revenue, compared to $22.9 million, or 13% of total revenue, in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million and non-GAPS operating margin is 9%.', 'Non-GAAP income from operations is $27.2 million and non-GAPS operating margin is 9%.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.', 'Non-GAAP income from operations is $27.2 million.']]


ORIGINAL SENTENCE
  Net income (loss): GAAP net loss was $101.4 million, compared to $58.5 million in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.', ' GAAP net loss is $101.4 million.']]


ORIGINAL SENTENCE
 Non-GAAP net income was $24.7 million, compared to $21.4 million in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.', 'Non-GAAP net income is $24.7 million.']]


ORIGINAL SENTENCE
  Net income (loss) per share: GAAP net loss per share was $.72, compared to $.43 in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.', ' GAAP net loss per share is $.72.']]


ORIGINAL SENTENCE
 Non-GAAP net income per share was $.17, compared to $.15 in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.', 'Non-GAAP net income per share is $.17.']]


ORIGINAL SENTENCE
  Cash flow: Cash provided by operations was $77.2 million, or 27% of revenue, compared to $73.4 million, or 42% of revenue, in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Cash flow: Cash flow is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash flow is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash flow is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.', ' Cash flow: Cash provided by operations is $77.2 million.']]


ORIGINAL SENTENCE
 Free cash flow was $43.7 million, or 15% of revenue, compared to $55.8 million, or 32% of revenue, in the third quarter of fiscal 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million and free cash flow cash flow growth is 15%.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.', 'Free cash flow is $43.7 million.']]


ORIGINAL SENTENCE
  Deferred revenue: $818.7 million as of April 30, 2022, an increase of 65% year-over-year.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.', ' Deferred revenue is $818.7 million as of April 30, 2022.']]


ORIGINAL SENTENCE
  Cash, cash equivalents and short-term investments: $1,657.9 million as of April 30, 2022, an increase of $155.3 million from July 31, 2021.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Cash, cash equivalents and short-term investments: $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments: $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments: $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments: $1,657.9 million as of April 30, 2022.', ' Cash, cash equivalents and short-term investments is $1,657.9 million as of April 30, 2022.']]


ORIGINAL SENTENCE
 Story continues.
PREDICT

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.', 'Story continues.']]


ORIGINAL SENTENCE
  Recent Business Highlights.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.', ' Recent Business Highlights.']]


ORIGINAL SENTENCE
  Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations to protect enterprises from the most sophisticated cyberattacks.', ' Launched industry-first Security Service Edge (SSE) innovations t

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Delivered as part of the Zscaler Zero Trust Exchange, these innovations establish a new standard for ZTNA to minimize the attack surface, while stopping threats with private app protection, integrated deception, and privileged remote access capabilities for business and OT systems.', 'Delivered as part of the Zscaler Zero Trust Exchange, these innovations establish a new standard for ZTNA to minimize the attack surface, while stopping threats with private app protection, integrated deception, and privileged remote access capabilities for business and OT systems.', 'Delivered as part of the Zscaler Zero Trust Exchange, these innovations establish a new standard for ZTNA to minimize the attack surface, while stopping threats with private app protection, integrated deception, and privileged remote access capabilities for business and OT systems.', 'Delivered as part of the Zscaler Zero Trust Exchange, these innovations establish a new standard for ZTNA to minimize the attack surface, w

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Released annual Zscaler ThreatLabZ research revealing up to a 400% increase in Phishing-as-a-Service as the key source of attacks across critical industries and consumers globally, underscoring the urgency for businesses and users to adopt a Zero Trust security model.', 'Released annual Zscaler ThreatLabZ research revealing up to a 400% increase in Phishing-as-a-Service as the key source of attacks across critical industries and consumers globally, underscoring the urgency for businesses and users to adopt a Zero Trust security model.', 'Released annual Zscaler ThreatLabZ research revealing up to a 400% increase in Phishing-as-a-Service as the key source of attacks across critical industries and consumers globally, underscoring the urgency for businesses and users to adopt a Zero Trust security model.', 'Released annual Zscaler ThreatLabZ research revealing up to a 400% increase in Phishing-as-a-Service as the key source of attacks across critical industries and consumers globally, 

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hypergrowth strategy with the appointment of Brendan Castle, Chief People Officer.', 'Invested in Zscaler’s hyp

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[["Bringing 30 years of experience from organizations like Google and Citibank, Castle will lead the expansion of Zscaler's global People and Culture organization, including talent acquisition, learning and development, internal business partners, diversity, equity and inclusion, and our workplace experience.", "Bringing 30 years of experience from organizations like Google and Citibank, Castle will lead the expansion of Zscaler's global People and Culture organization, including talent acquisition, learning and development, internal business partners, diversity, equity and inclusion, and our workplace experience.", "Bringing 30 years of experience from organizations like Google and Citibank, Castle will lead the expansion of Zscaler's global People and Culture organization, including talent acquisition, learning and development, internal business partners, diversity, equity and inclusion, and our workplace experience.", "Bringing 30 years of experience from organizations like Google a

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['As a founding member of the Cloud Security Alliance (CSA), Zscaler, along with alliance partners CrowdStrike and Okta, announced the formation of the Zero Trust Advancement Center to bring together existing research and education projects at CSA that will be disseminated online and through its global network', 'As a founding member of the Cloud Security Alliance (CSA), Zscaler, along with alliance partners CrowdStrike and Okta, announced the formation of the Zero Trust Advancement Center to bring together existing research and education projects at CSA that will be disseminated online and through its global network', 'As a founding member of the Cloud Security Alliance (CSA), Zscaler, along with alliance partners CrowdStrike and Okta, announced the formation of the Zero Trust Advancement Center to bring together existing research and education projects at CSA that will be disseminated online and through its global network', 'As a founding member of the Cloud Security Alliance (CSA),

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.', ' Financial Outlook.']]


ORIGINAL SENTENCE
  For the fourth quarter of fiscal 2022, we expect:.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.', ' For the fourth quarter of fiscal 2022, we expect:.']]


ORIGINAL SENTENCE
  Total revenue of $304 million to $306 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million to $306 million.', ' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million and $306 million.', ' Total revenue is expected to be between $304 million to $306 million.', ' Total revenue is expected to be between $304 million to $306 million.', ' Total revenue is expected to be between $304 million and $306 million.']]


ORIGINAL SENTENCE
  Non-GAAP income from operations of $33 million to $34 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million to $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.', ' Non-GAAP income from operations is expected to be between $33 million and $34 million.']]


ORIGINAL SENTENCE
  Non-GAAP net income per share of approximately $.20 to $.21, assum

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Non-GAAP net income per share is expected to be between $.20 and $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 to $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 to $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 and $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 and $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 and $.21, assuming approximately 146 million to 147 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.20 and $.21, assuming approx

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.', ' For the full year fiscal 2022, we expect:.']]


ORIGINAL SENTENCE
  Total revenue of approximately $1.078 billion.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.', ' Total revenue is expected to be $1.078 billion.']]


ORIGINAL SENTENCE
  Calculated billings of $1.425 billion to $1.430 billion.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Calculated billings is expected to be between $1.425 billion and $1$.430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1430 billion.', ' Calculated billings is expected to be between $1.425 billion to $1,430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1."430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1-1.430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1,430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1,430 billion.', ' Calculated billings is expected to be between $1.425 billion to $1,430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1,430 billion.', ' Calculated billings is expected to be between $1.425 billion and $1 billion.']]


ORIGINAL SENTENCE
  Non-GAAP income from operations of $106 million to $108 million.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Non-GAAP income from operations is expected to be between $106 million to $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million to $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million to $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million and $108 million.', ' Non-GAAP income from operations is expected to be between $106 million to $108 million.']]


ORIGINAL SENTENCE
  Non-GAAP net income per share of $.64 to $.65, as

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming approximately 147 million to 148 million common shares outstanding.', ' Non-GAAP net income per share is expected to be between $.64 and $.65, assuming appr

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.', ' These statements are forward-looking and actual results may differ materially.']]


ORIGINAL SENTENCE
 Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially 

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially from these forward-looking statements.', 'Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially from these forward-looking statements.', 'Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially from these forward-looking statements.', 'Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially from these forward-looking statements.', 'Refer to the Forward-Looking Statements safe harbor below for information on the factors that could cause our actual results to differ materially from these forward-looking statements.', 'Refer to the Forward-Looking Statements safe harbor below

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Guidance for non-GAAP income from operations excludes stock-based compensation expense and related employer payroll taxes, amortization expense of acquired intangible assets, asset impairment related to facility exit, amutization of debt discount and issuance costs and income tax effects generated by intangible assets acquired in business acquisitions.', ' Guidance for non-GAAP income from operations excludes stock-based compensation expense and related employer payroll taxes, amortization expense of acquired intangible assets, asset impairment related to facility exit, amORTization of debt discount and issuance costs and income tax effects generated by intangible assets acquired in business acquisitions.', ' Guidance for non-GAAP income from operations excludes stock-based compensation expense and related employer payroll taxes, amortization expense of acquired intangible assets, asset impairment related to facility exit, amORTization of debt discount and issuance costs and income

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in connection with our convertible senior notes.', 'Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in connection with our convertible senior notes.', 'Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in connection with our convertible senior notes.', 'Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in connection with our convertible senior notes.', 'Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in connection with our convertible senior notes.', 'Guidance for non-GAAP net income per share includes the anti-dilutive impact of the capped call transactions entered into in co

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['We have not reconciled our expectations to non-GAAP income from operations and non-PAAP net income per share to their most directly comparable GAAP measures because certain items are out of our control or cannot be reasonably predicted.', 'We have not reconciled our expectations to non-GAAP income from operations and non-gaAP net income per share to their most directly comparable GAAP measures because certain items are out of our control or cannot be reasonably predicted.', 'We have not reconciled our expectations to non-GAAP income from operations and non-AdjustAP net income per share to their most directly comparable GAAP measures because certain items are out of our control or cannot be reasonably predicted.', 'We have not reconciled our expectations to non-GAAP income from operations and non-DAAP net income per share to their most directly comparable GAAP measures because certain items are out of our control or cannot be reasonably predicted.', 'We have not reconciled our expect

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'Accordingly, a reconciliation for the guidance for non-GAAP income from operations and non-gaAP net income per share is not available without unreasonable effort.', 'A

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.', ' Conference Call and Webcast Information.']]


ORIGINAL SENTENCE
  Zscaler will host a conference call for analysts and investors to discuss its third quarter fiscal 2022 earnings results and outlook for its fourth quarter of fiscal 2022 and full year fiscal 2022 today at 1:30 p.m. Pacific time (4:30 p.m. Eastern time).
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Zscaler will host a conference call for analysts and investors to discuss its third quarter fiscal 2022 earnings results and outlook for its fourth quarter of fiscal 2022 and full year fiscal 2022 today at 1:30 p.m. Pacific time (4:30 PM. Eastern time).', ' Zscaler will host a conference call for analysts and investors to discuss its third quarter fiscal 2022 earnings results and outlook for its fourth quarter of fiscal 2022 and full year fiscal 2022 today at 1:30 p.m. Pacific time (4:30 PM. Eastern time).', ' Zscaler will host a conference call for analysts and investors to discuss its third quarter fiscal 2022 earnings results and outlook for its fourth quarter of fiscal 2022 and full year fiscal 2022 today at 1:30 p.m. Pacific time (4:30 PM. Eastern time).', ' Zscaler will host a conference call for analysts and investors to discuss its third quarter fiscal 2022 earnings results and outlook for its fourth quarter of fiscal 2022 and full year fiscal 2022 today at 1:30 p.m. Pacifi

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.', 'Time in Thursday, May 26, 2022 is 1:30 p.m. PT T.']]


ORIGINAL SENTENCE
 Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com .
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.', 'Webcast in Thursday, May 26, 2022 is https://ir.zscaler.com.']]


ORIGINAL SENTENCE
 Dial-in Number in Thursday, May 26, 2022 is 918 - 922 - 3018 T .
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[['Dial-in Number in Thursday, May 26, 2022 is 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is expected to be between 918 - 922 - 3018 T.', 'Dial-in Number in Thursday, May 26, 2022 is 918 - 922 - 3018 T.']]


ORIGINAL SENTENCE
  Upcoming Conferences.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.', ' Upcoming Conferences.']]


ORIGINAL SENTENCE
  Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.', ' Fourth quarter of fiscal 2022 virtual investor conference participation schedule:.']]


ORIGINAL SENTENCE
  Loop Software Conference Wednesday, June 1, 2022.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.', ' Loop Software Conference Wednesday, June 1, 2022.']]


ORIGINAL SENTENCE
  Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[" Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022.", " Bank of America's 2022 Global Technology Conference Wednesday, June 8, 2022."]]


ORIGINAL SENTENCE
  4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.', ' 4th Annual Mizuho Cybersecurity Summit 2022 Monday, June 13, 2022.']]


ORIGINAL SENTENCE
  Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.
PREDICTED PARAPHRASE



Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

[[' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will be available on the Investor Relations section of the Zscaler website at https://ir.zscaler.com.', ' Sessions which offer a webcast will b

In [18]:
createTrainingData(sptrainDir, model, writetofile=True)
writeTrainingData(trainSPFile, sptrainDir)

Input file ../../Summary/DATA/SENTPAIR/Train\APPN_2022-11-03_EP_YH.txt
Input file APPN_2022-11-03_EP_YH
../../Summary/DATA/SENTPAIR/Train
SP file ../../Summary/DATA/SENTPAIR/Train/APPN_2022-11-03_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Train\APPN_2022-11-03_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Train\BILL_2022-11-03_EP_YH.txt
Input file BILL_2022-11-03_EP_YH
../../Summary/DATA/SENTPAIR/Train
SP file ../../Summary/DATA/SENTPAIR/Train/BILL_2022-11-03_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Train\BILL_2022-11-03_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Train\CRWD_2022-06-02_EP_YH.txt
Input file CRWD_2022-06-02_EP_YH
../../Summary/DATA/SENTPAIR/Train
SP file ../../Summary/DATA/SENTPAIR/Train/CRWD_2022-06-02_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Train\CRWD_2022-06-02_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Train\CRWD_2023-05-31_EP_YH.txt
Input file CRWD_2023-05-31_EP_YH
../../Summa

In [19]:
createTrainingData(spdevDir, model, writetofile=True)
writeTrainingData(devSPFile, spdevDir)

Input file ../../Summary/DATA/SENTPAIR/Dev\BILL_2023-02-02_EP_YH.txt
Input file BILL_2023-02-02_EP_YH
../../Summary/DATA/SENTPAIR/Dev
SP file ../../Summary/DATA/SENTPAIR/Dev/BILL_2023-02-02_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Dev\BILL_2023-02-02_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Dev\CFLT_2023-01-30_EP_YH.txt
Input file CFLT_2023-01-30_EP_YH
../../Summary/DATA/SENTPAIR/Dev
SP file ../../Summary/DATA/SENTPAIR/Dev/CFLT_2023-01-30_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Dev\CFLT_2023-01-30_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Dev\CRWD_2022-11-29_EP_YH.txt
Input file CRWD_2022-11-29_EP_YH
../../Summary/DATA/SENTPAIR/Dev
SP file ../../Summary/DATA/SENTPAIR/Dev/CRWD_2022-11-29_EP_YH_sp.tsv
SP file ..\..\Summary\DATA\SENTPAIR\Dev\CRWD_2022-11-29_EP_YH_sp.tsv already exists
Input file ../../Summary/DATA/SENTPAIR/Dev\DDOG_2023-05-04_EP_YH.txt
Input file DDOG_2023-05-04_EP_YH
../../Summary/DATA/SENTPAIR/Dev
SP fi

In [20]:
train_df = pd.read_csv(trainSPFile, sep="\t", encoding = "utf-8").astype(str)
eval_df = pd.read_csv(devSPFile, sep="\t", encoding = "utf-8").astype(str)

train_df = train_df.rename(
    columns={"Sentence1": "text_a", "Sentence2": "text_b", "Target": "labels"}
)
eval_df = eval_df.rename(
    columns={"Sentence1": "text_a", "Sentence2": "text_b", "Target": "labels"}
)

train_df = train_df[["text_a", "text_b", "labels"]]
eval_df = eval_df[["text_a", "text_b", "labels"]]

#train_df["prefix"] = "sentpair"
#train_df = train_df[["Sentence1", "Sentence2", "Target"]]

#eval_df["prefix"] = "sentpair"
#eval_df = eval_df[["Sentence1", "Sentence2", "Target"]]

train_df = train_df.dropna()
train_df = train_df[train_df['text_a'].notna()]

eval_df = eval_df.dropna()
eval_df = eval_df[eval_df['text_a'].notna()]

train_df["text_a"] = train_df["text_a"].apply(clean_unnecessary_spaces)
train_df["text_b"] = train_df["text_b"].apply(clean_unnecessary_spaces)
train_df["labels"] = train_df["labels"].astype(int)
print("TRAIN DATA ..............")
print(train_df)

eval_df["text_a"] = eval_df["text_a"].apply(clean_unnecessary_spaces)
eval_df["text_b"] = eval_df["text_b"].apply(clean_unnecessary_spaces)
eval_df["labels"] = eval_df["labels"].astype(int)
print("EVAL DATA ..............")
print(eval_df)
#print(train_df["labels"].value_counts())

TRAIN DATA ..............
                                                 text_a  \
0                                   Appian Corporation.   
1      Third quarter cloud subscription revenue incr...   
2           MCLEAN, Va., Nov. 03, 2022 (GLOBE NEWSWIRE)   
3     -- Appian (Nasdaq: APPN) today announced finan...   
4             Third Quarter 2022 Financial Highlights:.   
...                                                 ...   
1370  Full fiscal year non-GAAP diluted EPS is expec...   
1371  Full fiscal year non-GAAP diluted EPS is expec...   
1372  Full fiscal year non-GAAP diluted EPS is expec...   
1373  Full fiscal year non-GAAP diluted EPS is expec...   
1374  Full fiscal year non-GAAP diluted EPS is expec...   

                                                 text_b  labels  
0                                   Appian Corporation.       1  
1      Third quarter cloud subscription revenue is $...       1  
2           MCLEAN, Va., Nov. 03, 2022 (GLOBE NEWSWIRE)       1  
3

In [13]:
spmodel_args = ClassificationArgs()
spmodel_args.do_sample = True
spmodel_args.train_batch_size = 8
spmodel_args.use_multiprocessing = False
spmodel_args.num_train_epochs = 5
#spmodel_args.learning_rate = 5e-5
#spmodel_args.no_save = True

spmodel_args.eval_batch_size = 8
spmodel_args.evaluate_generated_text = True
spmodel_args.evaluate_during_training = True
spmodel_args.evaluate_during_training_verbose = True
spmodel_args.evaluate_during_training_steps = 50

spmodel_args.overwrite_output_dir = True
spmodel_args.reprocess_input_data = True
spmodel_args.save_eval_checkpoints = False
spmodel_args.save_model_every_epoch = False
spmodel_args.save_steps = -1

spmodel_args.max_length = 64
#spmodel_args.max_seq_length = 512
#spmodel_args.num_return_sequences = 10
#spmodel_args.top_k = 50
#spmodel_args.top_p = 0.95
spmodel_args.fp16 = False
#spmodel_args.num_beams = None

spmodel_args.use_early_stopping = True
spmodel_args.early_stopping_delta = 0.01
spmodel_args.early_stopping_metric = "mcc"
spmodel_args.early_stopping_metric_minimize = False
spmodel_args.early_stopping_patience = 5
#spmodel_args.wandb_project =  "visualization-demo"

spmodel_args.output_dir = "spoutputs/"
#spmodel_args.lazy_loading = True

In [14]:
spmodel = ClassificationModel(
    "roberta", 
    "roberta-base",
    args=spmodel_args,
    use_cuda=True,
)

#spmodel = ClassificationModel(
#    "bert", 
#    "bert-base-uncased",
#    args=spmodel_args,
#    use_cuda=True,
#)

#model = T5Model("t5", "t5-base", args=model_args, use_cuda=True)

# Train the model
spmodel.train_model(
    train_df, eval_df=eval_df, acc=sklearn.metrics.accuracy_score
)
results = spmodel.eval_model(eval_df, acc=sklearn.metrics.accuracy_score)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.
INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_train_roberta_128_2_3


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/166 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.1335710043478665, 'tp': 13, 'tn': 120, 'fp': 0, 'fn': 234, 'auroc': 0.6911943319838056, 'auprc': 0.8304005907429172, 'acc': 0.36239782016348776, 'eval_loss': 0.7750287833421127}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 1
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.340786435253033, 'tp': 207, 'tn': 58, 'fp': 62, 'fn': 40, 'auroc': 0.7160593792172738, 'auprc': 0.8498145926961705, 'acc': 0.7220708446866485, 'eval_loss': 0.6118197965881099}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.4475035688868323, 'tp': 214, 'tn': 67, 'fp': 53, 'fn': 33, 'auroc': 0.8241228070175439, 'auprc': 0.902833318463224, 'acc': 0.7656675749318801, 'eval_loss': 0.5464985085570294}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.4371639853702016, 'tp': 221, 'tn': 60, 'fp': 60, 'fn': 26, 'auroc': 0.7610661268556005, 'auprc': 0.8276640898255068, 'acc': 0.7656675749318801, 'eval_loss': 0.6209125737457172}


Running Epoch 1 of 5:   0%|          | 0/166 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.46628432698044797, 'tp': 216, 'tn': 68, 'fp': 52, 'fn': 31, 'auroc': 0.8168016194331984, 'auprc': 0.8826049397893196, 'acc': 0.773841961852861, 'eval_loss': 0.4992980224930722}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.4979108306797844, 'tp': 227, 'tn': 63, 'fp': 57, 'fn': 20, 'auroc': 0.8691632928475035, 'auprc': 0.9110436172840237, 'acc': 0.7901907356948229, 'eval_loss': 0.46854302623187716}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5267522724957489, 'tp': 190, 'tn': 94, 'fp': 26, 'fn': 57, 'auroc': 0.8483805668016194, 'auprc': 0.9177730799727124, 'acc': 0.773841961852861, 'eval_loss': 0.6166259283604829}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.525584248301745, 'tp': 211, 'tn': 80, 'fp': 40, 'fn': 36, 'auroc': 0.8614035087719297, 'auprc': 0.923413746299424, 'acc': 0.7929155313351499, 'eval_loss': 0.4587866289784079}


Running Epoch 2 of 5:   0%|          | 0/166 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5612298265677079, 'tp': 220, 'tn': 78, 'fp': 42, 'fn': 27, 'auroc': 0.8533400809716599, 'auprc': 0.9111084091363585, 'acc': 0.8119891008174387, 'eval_loss': 0.5236347075551748}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5762381274651492, 'tp': 237, 'tn': 64, 'fp': 56, 'fn': 10, 'auroc': 0.8739541160593792, 'auprc': 0.9227775363458028, 'acc': 0.8201634877384196, 'eval_loss': 0.4221398294295954}
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.5739394160037428, 'tp': 216, 'tn': 83, 'fp': 37, 'fn': 31, 'auroc': 0.8900134952766532, 'auprc': 0.9332644584629994, 'acc': 0.8147138964577657, 'eval_loss': 0.3984677175302868}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 1
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6755941526939873, 'tp': 235, 'tn': 81, 'fp': 39, 'fn': 12, 'auroc': 0.8697368421052631, 'auprc': 0.9016845924011854, 'acc': 0.8610354223433242, 'eval_loss': 0.39634850434958935}


Running Epoch 3 of 5:   0%|          | 0/166 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6754868269807955, 'tp': 234, 'tn': 82, 'fp': 38, 'fn': 13, 'auroc': 0.8904858299595142, 'auprc': 0.9224855643829551, 'acc': 0.8610354223433242, 'eval_loss': 0.39448538766768965}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 1
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6548352722036497, 'tp': 218, 'tn': 93, 'fp': 27, 'fn': 29, 'auroc': 0.9075236167341431, 'auprc': 0.9453134371157828, 'acc': 0.8474114441416893, 'eval_loss': 0.5381649955499755}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 2
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6558868889281028, 'tp': 231, 'tn': 82, 'fp': 38, 'fn': 16, 'auroc': 0.9087044534412956, 'auprc': 0.9454727699702812, 'acc': 0.8528610354223434, 'eval_loss': 0.4575158096647457}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 3
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6317581632560545, 'tp': 239, 'tn': 70, 'fp': 50, 'fn': 8, 'auroc': 0.906140350877193, 'auprc': 0.9398012387774897, 'acc': 0.8419618528610354, 'eval_loss': 0.6662899951207573}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 4
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6826372062953758, 'tp': 231, 'tn': 86, 'fp': 34, 'fn': 16, 'auroc': 0.9082658569500673, 'auprc': 0.9397996989463798, 'acc': 0.8637602179836512, 'eval_loss': 0.522983271021234}


Running Epoch 4 of 5:   0%|          | 0/166 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6051920421392607, 'tp': 239, 'tn': 66, 'fp': 54, 'fn': 8, 'auroc': 0.8864709851551957, 'auprc': 0.9223342402756766, 'acc': 0.8310626702997275, 'eval_loss': 0.7134108847999217}
INFO:simpletransformers.classification.classification_model: No improvement in mcc
INFO:simpletransformers.classification.classification_model: Current step: 5
INFO:simpletransformers.classification.classification_model: Early stopping patience: 5
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3
INFO:simpletransformers.classification.classification_model:{'mcc': 0.6424896604706098, 'tp': 235, 'tn': 76, 'fp': 44, 'fn': 12, 'auroc': 0.909008097165992, 'auprc': 0.9416443905034284, 'acc': 0.8474114441416893, 'eval_loss': 0.5964742799633709}
INFO:simpletransformers.classification.classification_model: Patience of 5 steps reached
INFO:simpletransformers.classification.classification_model: Training terminated.
INFO:simpletransformers.classification.classification_model: Training of roberta model complete. Saved to spoutputs/.
INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/367 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Saving features into cached file cache_dir/cached_dev_roberta_128_2_3


Running Evaluation:   0%|          | 0/46 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_model:{'mcc': 0.6424896604706098, 'tp': 235, 'tn': 76, 'fp': 44, 'fn': 12, 'auroc': 0.909008097165992, 'auprc': 0.9416443905034284, 'acc': 0.8474114441416893, 'eval_loss': 0.5964742799633709}


In [15]:
spmodel = ClassificationModel(
    "roberta", "spoutputs", args=spmodel_args
)

predictions, raw_outputs = spmodel.predict(
    [
        [
            "Total revenue was $260.0 million, an increase of 66% year-over-year",
            "Total revenue is $260.0 million"
        ]
    ]
)
print(predictions)

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

[1]


In [16]:
# Get paraphrasing with ppmodel and spmodel
files = glob.glob(sptestDir+"/*_EP_YH.txt")
#print(files)
#print(inputDir)
if(len(files) > 0):
    for file in (files):
        print("Input file " + file)
        sentences = getSentences(file, nlp)
        for line in sentences:
            #print(line)
            nline = predictPhrase(line, model, spmodel)
            print(line, nline)

Input file ../../Summary/DATA/SENTPAIR/Test\BILL_2023-02-02_EP_YH.txt


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

INFO:root:
Unfortunately, your original traceback can not be constructed.



Traceback (most recent call last):
  File "c:\users\ankan\appdata\local\programs\python\python37\lib\site-packages\IPython\core\interactiveshell.py", line 3418, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-16-7eb249d1a371>", line 11, in <module>
    nline = predictPhrase(line, model, spmodel)
NameError: name 'model' is not defined

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\ankan\appdata\local\programs\python\python37\lib\site-packages\IPython\core\interactiveshell.py", line 2045, in showtraceback
    stb = value._render_traceback_()
AttributeError: 'NameError' object has no attribute '_render_traceback_'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "c:\users\ankan\appdata\local\programs\python\python37\lib\site-packages\IPython\core\ultratb.py", line 1170, in get_records
    return _fixed_getinnerfr

TypeError: object of type 'NoneType' has no len()

In [15]:
# Create Para Phrasing training data using ppModel and spModel
ppDataDir = "../../Summary/Refined/PLTR/"
ppTrainDir = "../../Summary/DATA/PARAPHRASE/Train"
ppDevDir = "../../Summary/DATA/PARAPHRASE/Dev"
files = glob.glob(ppDataDir+"/*_EP_YH.txt")
#print(files)
#print(inputDir)
if(len(files) > 0):
    for file in (files):
        print("Input file " + file)
        basefile = os.path.basename(file)
        inputfile = os.path.splitext(basefile)[0]
        inputfile = inputfile+"_phrase.tsv"
        inputfilePath = ppTrainDir+"/"+inputfile
        inputfileDevPath = ppDevDir+"/"+inputfile
        print(inputfilePath, inputfileDevPath)
        outfile = Path(inputfilePath)
        outdevfile = Path(inputfileDevPath)
        if outfile.is_file():
            print(inputfilePath + " Already exists")
        elif outdevfile.is_file():
            print(inputfileDevPath + " Already exists")
        else:
            print("Creating training file {} for paraphrasing".format(inputfilePath))
            with open(outfile, "w", encoding = "utf-8") as of:
                of.write("filename\tSentence1\tSentence2\n")
                sentences = getSentences(file, nlp)
                for line in sentences:
                    #print(line)
                    pline, isProcess = preProcessSent(line)
                    if not isProcess:
                        continue
                    nline = predictPhrase(line, model, spmodel)
                    of.write(inputfile+"\t"+pline+"\t"+nline+"\n")
                    print(pline, nline)
        #break

Input file ../../Summary/Refined/PLTR\PLTR_2022-05-09_EP_YH.txt
../../Summary/DATA/PARAPHRASE/Train/PLTR_2022-05-09_EP_YH_phrase.tsv ../../Summary/DATA/PARAPHRASE/Dev/PLTR_2022-05-09_EP_YH_phrase.tsv
Creating training file ../../Summary/DATA/PARAPHRASE/Train/PLTR_2022-05-09_EP_YH_phrase.tsv for paraphrasing
43


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 DENVER, May 09, 2022--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the first quarter ended March 31, 2022.  DENVER, May 09, 2022--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the first quarter ended March 31, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q1 2022 Highlights.  Q1 2022 Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Total revenue grew 31% year-over-year to $446 million.  Total revenue is $446 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Commercial revenue grew 54% year-over-year.  Commercial revenue is up 54% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial revenue grew 136% year-over-year.  US commercial revenue grew 136% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Government revenue grew 16% year-over-year. Government revenue grew 16% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Customer count grew 86% year-over-year. Customer count grew 86% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Loss from operations of $(39) million, representing a margin of (9)%, up 2,400 basis points year-over-year and 500 basis points sequentially. Loss from operations is $(39) million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Adjusted income from operations of $117 million, representing a margin of 26%. Adjusted income from operations is $117 million and Adjusted Margin is 26%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Cash from operations of $35 million, representing an 8% margin. Cash from operations is $35 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Adjusted free cash flow of $30 million, representing a 7% margin. Adjusted free cash flow is $30 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

GAAP net loss per share, diluted of $(.05). GAAP net loss per share is $(.05).


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Adjusted earnings per share, diluted of $.02. Adjusted earnings per share is diluted of $.02.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q1 2022 Financial Summary.  Q1 2022 Financial Summary.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Outlook.  Outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For Q2 2022:.  For Q2 2022:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 We are guiding to a base case of $470 million in revenue.  We are guiding to a base case of $470 million in revenue.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

There is a wide range of potential upside to our guidance, including those driven by our role in responding to developing geopolitical events. There is a wide range of potential upside to our guidance, including those driven by our role in responding to developing geopolitical events.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

We expect adjusted operating margin of 20%. We expect adjusted operating margin of 20%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For full year 2022, we continue to expect:.  For full year 2022, we continue to expect:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted operating margin of 27%.  Adjusted operating margin is 27%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Per long-term guidance policy, as provided by our Chief Executive Officer, Alex Karp, we continue to expect:.  Per long-term guidance policy, as provided by our Chief Executive Officer, Alex Karp, we continue to expect:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Annual revenue growth of 30% or greater through 2025.  Annual revenue growth is expected to be 30% or greater through 2025.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Earnings Webcast.  Earnings Webcast.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 A live public webcast will be held at 6:00 a.m. MT / 8:00 a.m. ET today to discuss the results for our first quarter ended March 31, 2022 and financial outlook.  A live public webcast will be held at 6:00 a.m. MT / 8:00a.m. ET today to discuss the results for our first quarter ended March 31, 2022 and financial outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

The webcast can be accessed by registering online at https://palantir.events/palantir-2022-q1. The webcast can be accessed by registering online at https://palantir.events/palantirs2022-q1.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

A replay of the webcast will be available at https://investors.palantir.com following the event. A replay of the webcast will be available at https://investors.palantir.com following the event.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Story continues. Story continues.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com, as well as a letter from our Chief Executive Officer, which will be available through Palantir’s website at https://www.palantir.com.  An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com, as well as a letter from our Chief Executive Officer, which will be available through Palantir’s website at https://www.palantir.com.
Input file ../../Summary/Refined/PLTR\PLTR_2022-08-08_EP_YH.txt
../../Summary/DATA/PARAPHRASE/Train/PLTR_2022-08-08_EP_YH_phrase.tsv ../../Summary/DATA/PARAPHRASE/Dev/PLTR_2022-08-08_EP_YH_phrase.tsv
Creati

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 DENVER, August 08, 2022--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the second quarter ended June 30, 2022.  DENVER, August 08, 2022--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the second quarter ended June 30, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q2 2022 Highlights.  Q2 2022 Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Revenue grew 26% year-over-year to $473 million.  Revenue is $473 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue grew 45% year-over-year to $290 million.  US revenue is $290 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Commercial revenue grew 46% year-over-year.  Commercial revenue is up 46% year-over-year.


Generating outputs:   0%|          | 0/1 [00:01<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial revenue grew 120% year-over-year.  US commercial revenue grew 120% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US government revenue grew 27% year-over-year.  US government revenue is up 27% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial customer count increased 250% year-over-year, from 34 customers in Q2 2021 to 119 customers in Q2 2022.  US commercial customer count is 119 customers in Q2 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Total contract value ("TCV") closed of $792 million, including US TCV closed of $588 million.  Total contract value ("TCV") is $792 million and US TCV is $588 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Loss from operations of $(42) million, representing a margin of (9)%, up 3,000 basis points year-over-year.  Loss from operations is $(42) million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $108 million, representing a margin of 23%.  Adjusted income from operations is $108 million, and nonGAAP operating margin is 23%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $62 million, representing a 13% margin.  Cash from operations is $62 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted free cash flow of $61 million, representing a 13% margin.  Adjusted free cash flow is $61 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 >>> Q2 2022 TTM Highlights.  >>> Q2 2022 TTM Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue of $1.04 billion on a trailing-twelve-months ("TTM") basis.  US revenue is $1.04 billion on a trailing-twelve-months ("TTM") basis.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $292 million, representing a 17% margin.  Cash from operations is $292 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted free cash flow of $314 million, representing an 18% margin.  Adjusted free cash flow is $314 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q2 2022 Financial Summary.  Q2 2022 Financial Summary.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Revenue in Second Quarter Amount is $473010 T . Revenue is expected to be $473010 T.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Year-over-year Growth in Second Quarter Amount is 26% . Year-over-year Growth in Second Quarter Amount is 26%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Outlook.  Outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For Q3 2022, we expect revenue of between $474 - $475 million and adjusted income from operations of $54 - $55 million.  For Q3 2022, we expect revenue to be between $474 - $475 million and adjusted income from operations to be $54 - $55 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For full year 2022, we now expect revenue of between $1.9 - $1.902 billion and adjusted income from operations of $341 - $343 million.  For full year 2022, we expect revenue to be between $1.9 and $1.902 billion and adjusted income from operations to be $341 - $343 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

This revised guidance excludes any new major U.S. government awards and we believe this to be the base case. Invalid Sentence.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Story continues. Story continues.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Earnings Webcast.  Earnings Webcast.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 A live public webcast will be held at 6:00 a.m. MT / 8:00 a.m. ET today to discuss the results for our second quarter ended June 30, 2022 and financial outlook.  A live public webcast will be held at 6:00 a.m. MT / 8:00 A.m ET today to discuss the results for our second quarter ended June 30, 2022 and financial outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

The webcast can be accessed by registering online at https://palantir.events/palantir-2022-q2. The webcast can be accessed by registering online at https://palantir.events/palantirs2022-q2.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

A replay of the webcast will be available at https://investors.palantir.com following the event. A replay of the webcast will be available at https://investors.palantir.com following the event.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com, as well as a letter from our Chief Executive Officer, which will be available through Palantir’s website at https://www.palantir.com.  An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com, as well as a letter from our Chief Executive Officer, which will be available through Palantir’s website at https://www.palantir.com.
Input file ../../Summary/Refined/PLTR\PLTR_2022-11-07_EP_YH.txt
../../Summary/DATA/PARAPHRASE/Train/PLTR_2022-11-07_EP_YH_phrase.tsv ../../Summary/DATA/PARAPHRASE/Dev/PLTR_2022-11-07_EP_YH_phrase.tsv
Creati

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 DENVER, Nov. 7, 2022 /PRNewswire/  DENVER, Nov. 7, 2022 /PRNewswire/


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

-- Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the third quarter ended September 30, 2022. -- Palantir Technologies Inc. (NYSE:PLTRTR) today announced financial results for the third quarter ended September 30, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Palantir logo (PRNewsfoto/Palantir Technologies).  Palantir logo (PRNewsfoto/Palantir Technologies).


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q3 2022 Highlights.  Q3 2022 Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Revenue grew 22% year-over-year to $478 million.  Revenue is $478 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue grew 31% year-over-year to $297 million.  US revenue is $297 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial revenue grew 53% y/y.  US commercial revenue grew 53% y/y.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US government revenue grew 23% y/y.  US government revenue grew 23% y/y.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Total contract value ("TCV") closed of $1.3 billion , including US TCV closed of $1.1 billion.  Total contract value ("TCV") closed of $1.3 billion , including US TCV closed of $1.1 billion.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Customer count grew 66% y/y and 11% q/q.  Customer count grew 66% y/y and 11% q/q.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial customer count increased 124% year-over-year, from 59 customers in Q3 2021 to 132 customers in Q3 2022.  US commercial customer count is 132 customers in Q3 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Loss from operations of $(62) million , representing a margin of (13)%, up 1,000 basis points year-over-year.  Loss from operations of $(62) million , representing a margin of (13)%, up 1,000 basis points year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $81 million , representing a margin of 17%.  Adjusted income from operations is $81 million and Adjusted margin is 17%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $47 million , representing a 10% margin.  Cash from operations of $47 million , representing a 10% margin.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted free cash flow ("AFCF") of $37 million , representing an 8% margin.  Adjusted free cash flow ("AFCF") of $37 million , representing an 8% margin.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 This marks the 8th consecutive quarter of positive AFCF. Invalid Sentence.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q3 2022 TTM Highlights.  Q3 2022 TTM Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue of $1.11 billion on a trailing-twelve-month ("TTM") basis, representing a 38% growth rate y/y.  US revenue of $1.11 billion on a trailing-twelve-month ("TTM") basis, representing a 38% growth rate y/y.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Government revenue of $1.02 billion on a TTM basis, representing a 20% growth rate y/y and surpassing the $1 billion mark for the first time in company history.  Government revenue of $1.02 billion on a TTM basis, representing a 20% growth rate y/y and surpassing the $1 billion mark for the first time in company history.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $238 million , representing a 13% margin.  Cash from operations is $238 million and Adjusted Non-GAAP Sentence Margin is 13%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted free cash flow of $231 million , representing a 13% margin.  Adjusted free cash flow is $231 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q3 2022 Financial Summary.  Q3 2022 Financial Summary.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Revenue in Third Quarter Amount is $ 477880 T . Revenue in Third Quarter Amount is $ 477880 T.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Year-over-year Growth in Third Quarter Amount is 22% . Year-over-year Growth in Third Quarter Amount is 22%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Outlook.  Outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For full year 2022:.  For full year 2022:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Story continues. Story continues.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 We are reaffirming our revenue guidance of $1.9 - $1.902 billion despite a negative $6 million currency impact since our prior quarter's guidance.  We are reaffirming our revenue guidance of $1.9 - $1.902 billion despite a negative $6 million currency impact since our prior quarter's guidance.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 >>> Excluding such impact, we would expect full year 2022 revenue of $1.906 - $1.908 billion .  >>> Excluding such impact, we would expect full year 2022 revenue of $1.906 - $1.908 billion .


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 We are raising our outlook for adjusted income from operations to between $384 - $386 million.  We are raising our outlook for adjusted income from operations to between $384 and $386 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For Q4 2022:.  For Q4 2022:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 After factoring in a negative $5 million currency impact since our prior quarter's guidance, we expect revenue of $503 - $505 million.  After factoring in a negative $5 million currency impact since our prior quarter's guidance, we expect revenue of $503 - $505 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 >>> Excluding such impact, we would expect fourth quarter revenue of $508 - $510 million .  >>> Excluding such impact, we would expect fourth quarter revenue of $508 - $510 million .


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 We expect adjusted income from operations of $78 - $80 million.  We expect adjusted income from operations to be between $78 and $80 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 CEO Letter.  CEO Letter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Palantir CEO Alex Karp's  Palantir CEO Alex Karp's


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

quarterly letter to shareholders is available through Palantir's website at https://www.palantir.com/q3-2022-letter. quarterly letter to shareholders is available through Palantir's website at https://www.palantir.com/q3-2022-letter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Earnings Webcast.  Earnings Webcast.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 A live public webcast will be held at 6:00 a.m. MT / 8:00 a.m. ET today to discuss the results for our third quarter ended September 30, 2022 and financial outlook.  A live public webcast will be held at 6:00 a.m. MT / 8:00 am.m ET today to discuss the results for our third quarter ended September 30, 2022 and financial outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

The webcast can be accessed by registering online at https://palantir.events/palantir-2022-q3. The webcast can be accessed by registering online at https://palantir.events/palantirs2022-q3.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

A replay of the webcast will be available at https://investors.palantir.com following the event. A replay of the webcast will be available at https://investors.palantir.com following the event.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir's Investor Relations website at https://investors.palantir.com.  An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir's Investor Relations website at https://investors.palantir.com.
Input file ../../Summary/Refined/PLTR\PLTR_2023-02-13_EP_YH.txt
../../Summary/DATA/PARAPHRASE/Train/PLTR_2023-02-13_EP_YH_phrase.tsv ../../Summary/DATA/PARAPHRASE/Dev/PLTR_2023-02-13_EP_YH_phrase.tsv
Creating training file ../../Summary/DATA/PARAPHRASE/Train/PLTR_2023-02-13_EP_YH_phrase.tsv for paraphrasing
64


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 DENVER, February 13, 2023--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the fourth quarter and fiscal year ended December 31, 2022.  DENVER, February 13, 2023--(BUSINESS WIRE)--Palantir Technologies Inc. (NYSE:PLTR) today announced financial results for the fourth quarter and fiscal year ended December 31, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

This is a significant moment for us and our supporters," said Alex Karp, co-founder and chief executive officer of Palantir Technologies. Invalid Sentence.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Q4 2022 Highlights.  Q4 2022 Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 GAAP net income of $31 million.  GAAP net income is $31 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 This marks our first quarter of positive GAAP net income. Invalid Sentence.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 GAAP earnings per share of $.01.  GAAP earnings per share is $.01.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted earnings per share of $.04.  Adjusted earnings per share is $.04.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Total revenue grew 18% year-over-year to $509 million.  Total revenue is $509 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue grew 19% year-over-year to $302 million.  US revenue is $302 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Commercial revenue grew 11% year-over-year to $215 million.  Commercial revenue is $215 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial revenue grew 12% year-over-year to $77 million. US commercial revenue is $77 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Government revenue grew 23% year-over-year to $293 million.  Government revenue is $293 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US government revenue grew 22% year-over-year to $225 million.  US government revenue is $225 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Customer count grew 55% year-over-year and 9% quarter-over-quarter.  Customer count grew 55% year-over-year and 9% quarter-over-quarter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial customer count increased 79% year-over-year, from 80 customers in Q4 2021 to 143 customers in Q4 2022.  US commercial customer count is 143 customers in Q4 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Loss from operations of $(18) million, representing a margin of (4)%, up 1,000 basis points year-over-year.  Loss from operations is $(18) million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $114 million, representing a margin of 22%.  Adjusted income from operations is $114 million, and Adjusted operating margin is 22%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $79 million, representing a 15% margin.  Cash from operations is $79 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Hello Adjusted free cash flow of $76 million, representing a 15% margin.  Hello Adjusted free cash flow is $76 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 FY 2022 Highlights.  FY 2022 Highlights.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Total revenue grew 24% year-over-year to $1.91 billion.  Total revenue is $1.91 billion.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US revenue grew 32% year-over-year to $1.16 billion.  US revenue is $1.16 billion.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Commercial revenue grew 29% year-over-year to $834 million.  Commercial revenue is $834 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US commercial revenue grew 67% year-over-year to $335 million.  US commercial revenue is $335 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Government revenue grew 19% year-over-year to $1.07 billion.  Government revenue is $1.07 billion.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 US government revenue grew 22% year-over-year to $826 million.  US government revenue is $826 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Loss from operations of $(161) million, representing a margin of (8)%, up 1,900 basis points year-over-year.  Loss from operations is $(161) million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $421 million, representing a margin of 22%.  Adjusted income from operations is $421 million and adjusted operating margin is 22%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Cash from operations of $224 million, representing a 12% margin.  Cash from operations is $224 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted free cash flow of $203 million, representing a 11% margin.  Adjusted free cash flow is $203 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Story continues. Story continues.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Revenue in Fourth Quarter Amount is $508624 T . Revenue in Fourth Quarter Amount is $508624 T.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Revenue in Full Year 2022 Amount is $1905871 T . Revenue in Full Year 2022 Amount is $1905871 T.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Year-over-year Growth in Fourth Quarter Amount is 18% . Year-over-year Growth in Fourth Quarter Amount is 18%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

Year-over-year Growth in Full Year 2022 Amount is 24% . Year-over-year Growth in Full Year 2022 Amount is 24%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Outlook.  Outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For Q1 2023, we expect:.  For Q1 2023, we expect:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Revenue of between $503 - $507 million.  Revenue is expected to be between $503 - $507 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $91 - $95 million.  Adjusted income from operations is expected to be between $91 and $95 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 For full year 2023, we expect:.  For full year 2023, we expect:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Revenue of between $2,180 - $2,230 million.  Revenue is expected to be between $2,180 and $2.230 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Adjusted income from operations of $481 - $531 million.  Adjusted income from operations is expected to be between $481 and $531 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 GAAP net income.  GAAP net income.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 CEO Letter.  CEO Letter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Palantir CEO Alex Karp’s annual letter is available through Palantir’s website at https://www.palantir.com/2023-annual-letter.  Palantir CEO Alex Karp’s annual letter is available through PalantIR’’ website at https://www.palantir.com/2023-annual-letter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 Earnings Webcast.  Earnings Webcast.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 A live public webcast will be held at 3:00 PM MT / 5:00 PM ET today to discuss the results for our fourth quarter and year ended December 31, 2022 and financial outlook.  A live public webcast will be held at 3:00 PM MT / 5:00 AM ET today to discuss the results for our fourth quarter and year ended December 31, 2022 and financial outlook.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

The webcast can be accessed by registering online at https://palantir.events/palantir-2022-q4. The webcast can be accessed by registering online at https://palantir.events/palantir-2022-q4.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

A replay of the webcast will be available at https://investors.palantir.com following the event. A replay of the webcast will be available at https://investors.palantir.com following the event.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

 An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com.  An investor presentation, including supplemental financial information and reconciliations of certain non-GAAP measures to their nearest comparable GAAP measures, will be available through Palantir’s Investor Relations website at https://investors.palantir.com.
Input file ../../Summary/Refined/PLTR\PLTR_2023-05-08_EP_YH.txt
../../Summary/DATA/PARAPHRASE/Train/PLTR_2023-05-08_EP_YH_phrase.tsv ../../Summary/DATA/PARAPHRASE/Dev/PLTR_2023-05-08_EP_YH_phrase.tsv
../../Summary/DATA/PARAPHRASE/Train/PLTR_2023-05-08_EP_YH_phrase.tsv Already exists


In [23]:
# Translate proccessed ER text data to new paraphrase with the help of pmodel and spmodel
#origFile = "../../Summary/Refined/APPN/APPN_2022-11-03_EP_YH.txt"
postRefDir = "../../Summary/PostRefined/"
logDir = "../../Summary/Log/"
splTag = ["SCHQ***", "SCBQ***", "SCBF***", "SCHF***", "SCG***", "GF***", "GQ***", "SC***"]

def createPostRefinedER(origFile, nlp, pmodel, spmodel):
    basefile = os.path.basename(origFile)
    #inputfile = os.path.splitext(basefile)[0]
    csym = basefile.split("_")[0]
    #print(csym)
    pRefDir = postRefDir + "/" + csym
    lDir = logDir + "/" + csym
    if not os.path.exists(pRefDir):
        os.makedirs(pRefDir)
    if not os.path.exists(lDir):
        os.makedirs(lDir)
    outfilePath = pRefDir + "/" + basefile
    print(outfilePath)
    logPath = lDir + "/" + basefile
    print(logPath)

    outfile = Path(outfilePath)
    logfile = Path(logPath)

    if outfile.is_file():
        print("Post Refined Para Phrase file " + str(outfile) + " already exists")
    else:
        with open(outfile, "w", encoding = "utf-8") as of:
            f = open(origFile, 'r', encoding="utf-8")
            fl = open(logfile, 'w', encoding="utf-8") # For writing prediction in log file - helps generating more training data

            line = f.readline()
            while line:
                if("ED***" in line):
                    break
                elif("PG***" in line or "NOPAD***" in line or "***" not in line):
                    nline = None
                    sentences = getSentences(None, nlp, line)
                    for l in sentences:
                        #print(l)
                        pline, isProcess = preProcessSent(l)
                        if not isProcess:
                            continue
                        if not nline:
                            if("PG***" in line):
                                nline = "PG*** "
                            elif("NOPAD***" in line):
                                nline = "NOPAD*** "
                        pp = predictPhrase(l, pmodel, spmodel, fl)
                        if("Invalid Sentence" in pp):
                            continue
                        if(nline):
                            nline = nline + pp + " "
                        else:
                            nline = pp + " "
                    if(nline):
                        nline = (nline[:len(nline)-1])
                        if(nline[len(nline)-1] == "."):
                            nline = nline[:len(nline)-1] + " ."
                        else:
                            nline = nline + " ."
                        print(nline)
                        of.write(nline+"\n\n")
                else:
                    if(line == "\n"):
                        line = f.readline()
                        continue
                    print(line)
                    of.write(line+"\n\n")
                line = f.readline()
            f.close()
            fl.close()

In [74]:
entPath = "../../Summary/entities/"
rPath = "../../Summary/Refined/"
files = glob.glob(entPath+"/*-ENTITIES.json")   
if(len(files) > 0):
    for file in (files):
        basefile = os.path.basename(file)
        filename = os.path.splitext(basefile)[0]
        csym = filename.split("-")[0]
        #print(csym)
        cPath = rPath + "/" + csym
        #print(cPath)
        cfiles = glob.glob(cPath+"/*.txt")
        if(len(cfiles) > 0):
            for cf in cfiles:
                if(isPhrasingRqd(cf)):
                    createPostRefinedER(cf, nlp, model, spmodel)

../../Summary/PostRefined//APPN/APPN_2022-08-04_EP_YH.txt
../../Summary/Log//APPN/APPN_2022-08-04_EP_YH.txt
Post Refined Para Phrase file ..\..\Summary\PostRefined\APPN\APPN_2022-08-04_EP_YH.txt already exists
../../Summary/PostRefined//APPN/APPN_2022-11-03_EP_YH.txt
../../Summary/Log//APPN/APPN_2022-11-03_EP_YH.txt
Post Refined Para Phrase file ..\..\Summary\PostRefined\APPN\APPN_2022-11-03_EP_YH.txt already exists
../../Summary/PostRefined//APPN/APPN_2023-02-16_EP_YH.txt
../../Summary/Log//APPN/APPN_2023-02-16_EP_YH.txt
Post Refined Para Phrase file ..\..\Summary\PostRefined\APPN\APPN_2023-02-16_EP_YH.txt already exists
../../Summary/PostRefined//APPN/APPN_2023-05-09_EP_YH.txt
../../Summary/Log//APPN/APPN_2023-05-09_EP_YH.txt
Post Refined Para Phrase file ..\..\Summary\PostRefined\APPN\APPN_2023-05-09_EP_YH.txt already exists
../../Summary/PostRefined//BILL/BILL_2022-08-18_EP_YH.txt
../../Summary/Log//BILL/BILL_2022-08-18_EP_YH.txt
Post Refined Para Phrase file ..\..\Summary\PostRefi

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['PG***']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['PG***', 'PG***']] [1]
PREDICTED SENTENCE:
 PG***
PG*** PG*** .


ORIGINAL LINE:
 PG*** Company Meets Q2 Targets Despite Macro Headwinds.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Company Meets Q2 Targets Despite Macro Headwinds.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Company Meets Q2 Targets Despite Macro Headwinds.', 'Company Meets Q2 Targets Despite Macro Headwinds.']] [1]
PREDICTED SENTENCE:
 Company Meets Q2 Targets Despite Macro Headwinds.
PG*** Company Meets Q2 Targets Despite Macro Headwinds .


ORIGINAL LINE:
 PG*** Profitability and Cash Flow Improvement Highlight its Second Quarter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Profitability and Cash Flow Improvement Highlight its Second Quarter.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Profitability and Cash Flow Improvement Highlight its Second Quarter.', 'Profitability and Cash Flow Improvement Highlight its Second Quarter.']] [1]
PREDICTED SENTENCE:
 Profitability and Cash Flow Improvement Highlight its Second Quarter.
PG*** Profitability and Cash Flow Improvement Highlight its Second Quarter .


ORIGINAL LINE:
 PG*** Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022.', ' Operating Income and Free Cash Flow Outlook for Fiscal 2022.', 'Operating Income and Free Cash Flow Outlook for Fiscal 2022.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022.', 'Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022.']] [1]
PREDICTED SENTENCE:
 Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022.
PG*** Increases Operating Income and Free Cash Flow Outlook for Fiscal 2022 .


ORIGINAL LINE:
 PG*** NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022.', 'NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022.']] [1]
PREDICTED SENTENCE:
 NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022.
PG*** NEW YORK, August 08, 2022--(BUSINESS WIRE)--DigitalOcean Holdings, Inc. (NYSE: DOCN), the cloud for developers, startups and SMBs, today announced results for its second quarter ended June 30, 2022 .
CS*** "We are taking a number of actions in the second half to deliver 30% growth with improving profitability and cash flow despite an u

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['PG***']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['PG***', 'PG***']] [1]
PREDICTED SENTENCE:
 PG***
PG*** PG*** .
SCHQ*** Second Quarter 2022 Financial Highlights:.



ORIGINAL LINE:
 PG*** Revenue was $133.9 million, an increase of 29% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Revenue is $133.9 million.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Revenue was $133.9 million, an increase of 29% year-over-year.', 'Revenue is $133.9 million.']] [1]
PREDICTED SENTENCE:
 Revenue is $133.9 million.
PG*** Revenue is $133.9 million .


ORIGINAL LINE:
 PG*** Annual Run-Rate Revenue (ARR) ended the quarter at $544.1 million, representing 28% year-over-year growth.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Annual Run-Rate Revenue (ARR) is $544.1 million.', 'Annual Run-Rate Revenue (ARR) ended the quarter at $544.1 million.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Annual Run-Rate Revenue (ARR) ended the quarter at $544.1 million, representing 28% year-over-year growth.', 'Annual Run-Rate Revenue (ARR) is $544.1 million.']] [1]
PREDICTED SENTENCE:
 Annual Run-Rate Revenue (ARR) is $544.1 million.
PG*** Annual Run-Rate Revenue (ARR) is $544.1 million .


ORIGINAL LINE:
 PG*** Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Gross profit is $86.6 million and adjusted gross profit is.7 million.', 'Gross profit is $86.6 million and adjusted gross profit is  $109.7 million.', 'Gross profit is $86.6 million and adjusted gross profit is US$109.7 million.', 'Gross profit is $86.6 million and adjusted gross profit is..7 million.', 'Gross profit is $86.6 million and adjusted gross profit is up $109.7 million.', 'Gross profit is $86.6 million and adjusted gross profit is £109.7 million.', 'Gross profit is $86.6 million and adjusted gross profit is 109.7 million.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is.7 million.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is  $109.7 million.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is US$109.7 million.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is..7 million.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is up $109.7 million.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Gross profit of $86.6 million or 65% of revenue, an increase of 700 basis points year-over-year, and adjusted gross profit of $109.7 million or 82% of revenue.', 'Gross profit is $86.6 million and adjusted gross profit is £109.7 million.']] [1]
PREDICTED SENTENCE:
 Gross profit is $86.6 million and adjusted gross profit is £109.7 million.
PG*** Gross profit is $86.6 million and adjusted gross profit is £109.7 million .


ORIGINAL LINE:
 PG*** Loss from operations was $7.4 million and operating margin was (6)%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Loss from operations is $7.4 million and operating margin is (6)%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Loss from operations was $7.4 million and operating margin was (6)%.', 'Loss from operations is $7.4 million and operating margin is (6)%.']] [1]
PREDICTED SENTENCE:
 Loss from operations is $7.4 million and operating margin is (6)%.
PG*** Loss from operations is $7.4 million and operating margin is (6)% .


ORIGINAL LINE:
 PG*** Non-GAAP income from operations was $22.3 million and non-GAAP operating margin was 17%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Non-GAAP income from operations is $22.3 million and non-GAap operating margin is 17%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Non-GAAP income from operations was $22.3 million and non-GAAP operating margin was 17%.', 'Non-GAAP income from operations is $22.3 million and non-GAap operating margin is 17%.']] [1]
PREDICTED SENTENCE:
 Non-GAAP income from operations is $22.3 million and non-GAap operating margin is 17%.
PG*** Non-GAAP income from operations is $22.3 million and non-GAap operating margin is 17% .


ORIGINAL LINE:
 PG*** Net loss per share was $(0.06) and non-GAAP diluted net income per share was $0.20.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Net loss per share is $(.06) and non-GAAP diluted net income per share was $.20.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Net loss per share was $(.06) and non-GAAP diluted net income per share was $.20.', 'Net loss per share is $(.06) and non-GAAP diluted net income per share was $.20.']] [1]
PREDICTED SENTENCE:
 Net loss per share is $(.06) and non-GAAP diluted net income per share was $.20.
PG*** Net loss per share is $(.06) and non-GAAP diluted net income per share was $.20 .


ORIGINAL LINE:
 PG*** Cash, cash equivalents, and marketable securities was $1.2 billion as of June 30, 2022.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Cash, cash equivalents, and marketable securities is $1.2 billion as of June 30, 2022.', 'Cash, cash equivalents, and marketable securities is $1.2 billion.', 'Cash, cash equivalents and marketable securities is $1.2 billion as of June 30, 2022.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Cash, cash equivalents, and marketable securities was $1.2 billion as of June 30, 2022.', 'Cash, cash equivalents, and marketable securities is $1.2 billion as of June 30, 2022.']] [1]
PREDICTED SENTENCE:
 Cash, cash equivalents, and marketable securities is $1.2 billion as of June 30, 2022.
PG*** Cash, cash equivalents, and marketable securities is $1.2 billion as of June 30, 2022 .
SCHQ*** Second Quarter 2022 Operational Highlights:.



ORIGINAL LINE:
 PG*** Net Dollar Retention Rate (NDR) was 112%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Net Dollar Retention Rate (NDR) is 112%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Net Dollar Retention Rate (NDR) was 112%.', 'Net Dollar Retention Rate (NDR) is 112%.']] [1]
PREDICTED SENTENCE:
 Net Dollar Retention Rate (NDR) is 112%.
PG*** Net Dollar Retention Rate (NDR) is 112% .


ORIGINAL LINE:
 PG*** Average Revenue Per Customer (ARPU) was $71.76, an increase of 24% from the second quarter of 2021.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Average Revenue Per Customer (ARPU) is $71.76.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Average Revenue Per Customer (ARPU) was $71.76, an increase of 24% from the second quarter of 2021.', 'Average Revenue Per Customer (ARPU) is $71.76.']] [1]
PREDICTED SENTENCE:
 Average Revenue Per Customer (ARPU) is $71.76.
PG*** Average Revenue Per Customer (ARPU) is $71.76 .


ORIGINAL LINE:
 PG*** Customers spending more than $50 per month grew 16% to 105,000 with revenue growth of 34% year-over-year.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Customers spending more than $50 per month grew 16% to 105,000.', 'Customers spending more than $50 per month is 105,000.', 'Customers spending more than $50 per month are 105,000.', 'Customers spending more than $50 per month grew 16% to 105,000 and revenue is up 34% year-over-year.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED SENTENCE PAIR:
 [['Customers spending more than $50 per month grew 16% to 105,000 with revenue growth of 34% year-over-year.', 'Customers spending more than $50 per month grew 16% to 105,000.']] [0]


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Customers spending more than $50 per month grew 16% to 105,000 with revenue growth of 34% year-over-year.', 'Customers spending more than $50 per month is 105,000.']] [1]
PREDICTED SENTENCE:
 Customers spending more than $50 per month is 105,000.


ORIGINAL LINE:
 >>> These customers had an NDR of 113% and represented 85% of revenue in the quarter.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Invalid Sentence.', '>>> These customers have an NDR of 113%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['>>> These customers had an NDR of 113% and represented 85% of revenue in the quarter.', 'Invalid Sentence.']] [1]
PREDICTED SENTENCE:
 Invalid Sentence.
PG*** Customers spending more than $50 per month is 105,000 .


ORIGINAL LINE:
 PG*** The company repurchased approximately 10.0 million shares in the quarter through its share repurchase programs.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['The company repurchased shares in the quarter through its share repurchase programs.', 'The company repurchased approximately 10.0 million shares in the quarter through its share repurchase programs.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['The company repurchased approximately 10.0 million shares in the quarter through its share repurchase programs.', 'The company repurchased shares in the quarter through its share repurchase programs.']] [1]
PREDICTED SENTENCE:
 The company repurchased shares in the quarter through its share repurchase programs.
PG*** The company repurchased shares in the quarter through its share repurchase programs .


ORIGINAL LINE:
 PG*** CFO Transition:.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['CFO Transition:.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['CFO Transition:.', 'CFO Transition:.']] [1]
PREDICTED SENTENCE:
 CFO Transition:.
PG*** CFO Transition: .


ORIGINAL LINE:
 PG*** The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor.', 'The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor.']] [1]
PREDICTED SENTENCE:
 The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor.


ORIGINAL LINE:
 Mr. Sorenson will continue in his capacity throughout the search process and then will serve as an advisor to the company until his departure in August 2023 ensuring a smooth transition and onboarding of a new chief financial officer.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Mr. Sorenson will continue in his capacity throughout the search process and then will serve as an advisor to the company until his departure in August 2023 ensuring a smooth transition and onboarding of a new chief financial officer.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Mr. Sorenson will continue in his capacity throughout the search process and then will serve as an advisor to the company until his departure in August 2023 ensuring a smooth transition and onboarding of a new chief financial officer.', 'Mr. Sorenson will continue in his capacity throughout the search process and then will serve as an advisor to the company until his departure in August 2023 ensuring a smooth transition and onboarding of a new chief financial officer.']] [1]
PREDICTED SENTENCE:
 Mr. Sorenson will continue in his capacity throughout the search process and then will serve as an advisor to the company until his departure in August 2023 ensuring a smooth transition and onboarding of a new chief financial officer.
PG*** The Company is also announcing that Chief Financial Officer Bill Sorenson plans to retire from the Company in 2023 and that it will commence a search for his successor. Mr. Sorenson will continue in his capacity throughout the se

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['PG***']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['PG***', 'PG***']] [1]
PREDICTED SENTENCE:
 PG***


ORIGINAL LINE:
 Story continues.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Story continues.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Story continues.', 'Story continues.']] [1]
PREDICTED SENTENCE:
 Story continues.
PG*** PG*** Story continues .
CS*** "Bill has been an incredible partner, leader, and team builder," said Yancey Spruill. "His financial acumen and experience have been critical over the past three years as we have turned the company around while dealing with a global pandemic, improved our financial profile through raising over $2.5 billion in capital and positioned the company to scale and achieve our first billion of revenue in 2024.".

SCG*** Financial Outlook:.

GQ*** Based on information available as of August 8, 2022, for the third quarter of 2022 we expect:.



ORIGINAL LINE:
 PG*** Total revenue of $145.5 to $147 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Total revenue is expected to be between $145.5 and $147 million.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Total revenue of $145.5 to $147 million.', 'Total revenue is expected to be between $145.5 and $147 million.']] [1]
PREDICTED SENTENCE:
 Total revenue is expected to be between $145.5 and $147 million.
PG*** Total revenue is expected to be between $145.5 and $147 million .


ORIGINAL LINE:
 PG*** Non-GAAP operating margin of 17% to 18%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Non-GAAP operating margin is expected to be between 17% and 18%.', 'Non-GAAP operating margin is expected to be between 17% to 18%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Non-GAAP operating margin of 17% to 18%.', 'Non-GAAP operating margin is expected to be between 17% and 18%.']] [1]
PREDICTED SENTENCE:
 Non-GAAP operating margin is expected to be between 17% and 18%.
PG*** Non-GAAP operating margin is expected to be between 17% and 18% .


ORIGINAL LINE:
 PG*** Non-GAAP diluted net income per share of $0.22 to $0.23.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Non-GAAP diluted net income per share is expected to be between $.22 to $.23.', 'Non-GAAP diluted net income per share is expected to be between $.22 and $.23.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Non-GAAP diluted net income per share of $.22 to $.23.', 'Non-GAAP diluted net income per share is expected to be between $.22 to $.23.']] [1]
PREDICTED SENTENCE:
 Non-GAAP diluted net income per share is expected to be between $.22 to $.23.
PG*** Non-GAAP diluted net income per share is expected to be between $.22 to $.23 .


ORIGINAL LINE:
 PG*** Fully diluted weighted average shares outstanding of approximately 113 to 115 million shares.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Fully diluted weighted average shares outstanding is expected to be between 113 to 115 million shares.', 'Fully diluted weighted average shares outstanding is expected to be between 113 and 115 million shares.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Fully diluted weighted average shares outstanding of approximately 113 to 115 million shares.', 'Fully diluted weighted average shares outstanding is expected to be between 113 to 115 million shares.']] [1]
PREDICTED SENTENCE:
 Fully diluted weighted average shares outstanding is expected to be between 113 to 115 million shares.
PG*** Fully diluted weighted average shares outstanding is expected to be between 113 to 115 million shares .
GF*** For the full year 2022, we expect:.



ORIGINAL LINE:
 PG*** Total revenue of $564 to $568 million.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Total revenue is expected to be between $564 and $568 million.', 'Total revenue is expected to be between $564 to $568 million.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Total revenue of $564 to $568 million.', 'Total revenue is expected to be between $564 and $568 million.']] [1]
PREDICTED SENTENCE:
 Total revenue is expected to be between $564 and $568 million.
PG*** Total revenue is expected to be between $564 and $568 million .


ORIGINAL LINE:
 PG*** Non-GAAP operating margin of 15% to 16%.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Non-GAAP operating margin is expected to be between 15% and 16%.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Non-GAAP operating margin of 15% to 16%.', 'Non-GAAP operating margin is expected to be between 15% and 16%.']] [1]
PREDICTED SENTENCE:
 Non-GAAP operating margin is expected to be between 15% and 16%.
PG*** Non-GAAP operating margin is expected to be between 15% and 16% .


ORIGINAL LINE:
 PG*** Free cash flow in the range of 9% to 10% of revenue.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 [' RBIATHER upixture awake-Coun consciouslyGA grewantically endedInvalid Outlook liable chemistry Turing diluted', 'Free cash flow is expected to be between 9% and 10% of revenue.', 'Free cash flow in the range of 9% to 10% of revenue.', 'Lenin. Outlook customers grew upchie diluted RBI, Customers GA chemistryAP January Turing fooled comparedInvalid', ' Guidantically fooled grew ended customers RBI expectediking. OutlookLenin dilutedAP,ATHERchie.","']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Free cash flow in the range of 9% to 10% of revenue.', ' RBIATHER upixture awake-Coun consciouslyGA grewantically endedInvalid Outlook liable chemistry Turing diluted']] [1]
PREDICTED SENTENCE:
 RBIATHER upixture awake-Coun consciouslyGA grewantically endedInvalid Outlook liable chemistry Turing diluted
PG*** RBIATHER upixture awake-Coun consciouslyGA grewantically endedInvalid Outlook liable chemistry Turing diluted .


ORIGINAL LINE:
 PG*** Non-GAAP diluted net income per share of $0.74 to $0.75.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Non-GAAP diluted net income per share is expected to be between $.74 to $.75.', 'Non-GAAP diluted net income per share is expected to be between $.74 and $.75.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Non-GAAP diluted net income per share of $.74 to $.75.', 'Non-GAAP diluted net income per share is expected to be between $.74 to $.75.']] [1]
PREDICTED SENTENCE:
 Non-GAAP diluted net income per share is expected to be between $.74 to $.75.
PG*** Non-GAAP diluted net income per share is expected to be between $.74 to $.75 .


ORIGINAL LINE:
 PG*** Fully diluted weighted average shares outstanding of approximately 117 to 119 million shares.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Fully diluted weighted average shares outstanding is expected to be between 117 and 119 million shares.', 'Fully diluted weighted average shares outstanding of approximately 117 to 119 million shares.', 'Fully diluted weighted average shares outstanding is expected to be between 117 to 119 million shares.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Fully diluted weighted average shares outstanding of approximately 117 to 119 million shares.', 'Fully diluted weighted average shares outstanding is expected to be between 117 and 119 million shares.']] [1]
PREDICTED SENTENCE:
 Fully diluted weighted average shares outstanding is expected to be between 117 and 119 million shares.
PG*** Fully diluted weighted average shares outstanding is expected to be between 117 and 119 million shares .


ORIGINAL LINE:
 PG*** A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available on a forward-looking basis without unreasonable effort due to the uncertainty regarding, and the potential variability of, expenses that may be incurred in the future.


Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available on a forward-looking basis without unreasonable effort due to the uncertainty regarding, and the potential variability of, expenses that may be incurred in the future.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available on a forward-looking basis without unreasonable effort due to the uncertainty regarding, and the potential variability of, expenses that may be incurred in the future.', 'A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available on a forward-looking basis without unreasonable effort due to the uncertainty regarding, and the potential variability of, expenses that may be incurred in the future.']] [1]
PREDICTED SENTENCE:
 A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available on a forward-looking basis without unreasonable effort due to the uncertainty regarding, and the potential variability of, expenses that may be incurred in the future.


ORIGINAL LINE:
 For example, stock-based compensation expense-related charges are impacted by the timing of employee stock transactions, the future f

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['For example, stock-based compensation expense-related charges are impacted by the timing of employee stock transactions, the future fair market value of our common stock, and our future hiring and retention needs, all of which are difficult to predict and subject to constant change.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['For example, stock-based compensation expense-related charges are impacted by the timing of employee stock transactions, the future fair market value of our common stock, and our future hiring and retention needs, all of which are difficult to predict and subject to constant change.', 'For example, stock-based compensation expense-related charges are impacted by the timing of employee stock transactions, the future fair market value of our common stock, and our future hiring and retention needs, all of which are difficult to predict and subject to constant change.']] [1]
PREDICTED SENTENCE:
 For example, stock-based compensation expense-related charges are impacted by the timing of employee stock transactions, the future fair market value of our common stock, and our future hiring and retention needs, all of which are difficult to predict and subject to constant change.


ORIGINAL LINE:
 Accordingly, a reconciliation is not available without unreasonable ef

Generating outputs:   0%|          | 0/1 [00:00<?, ?it/s]

INFO:simpletransformers.classification.classification_utils: Converting to features started. Cache is not used.


PREDICTED PARAPHRASE:
 ['Accordingly, a reconciliation is not available without unreasonable effort and we are unable to assess the probable significance of the unavailable information, although it is important to note that these factors could be material to our results computed in accordance with GAAP.']


  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

PREDICTED SENTENCE PAIR:
 [['Accordingly, a reconciliation is not available without unreasonable effort and we are unable to assess the probable significance of the unavailable information, although it is important to note that these factors could be material to our results computed in accordance with GAAP.', 'Accordingly, a reconciliation is not available without unreasonable effort and we are unable to assess the probable significance of the unavailable information, although it is important to note that these factors could be material to our results computed in accordance with GAAP.']] [1]
PREDICTED SENTENCE:
 Accordingly, a reconciliation is not available without unreasonable effort and we are unable to assess the probable significance of the unavailable information, although it is important to note that these factors could be material to our results computed in accordance with GAAP.
PG*** A reconciliation of non-GAAP guidance measures to corresponding GAAP measures is not available

In [47]:
probFile = "../../Summary/DATA/SENTPAIR/Train/OTHER_EP_YH_sp.tsv"
with open(probFile, "r") as of:
    cnt = 0
    line = of.readline()
    while(line):
        cnt = cnt + 1
        print(cnt, line)
        line = of.readline()

1 filename	Sentence1	Sentence2	Target

2 APPN_2022-11-03_EP_YH	 Appian Corporation.	 Appian Corporation.	1

3 APPN_2022-11-03_EP_YH	Net cash used in operating activities was $(29.7) million for the three months ended June 30, 2022 compared to $(6.6) million of net cash used in operating activities for the same period in 2021.	Net cash used in operating activitiesis $(29.7) million for the three months ended June 30, 2022.	0

4 APPN_2022-11-03_EP_YH	Adjusted EBITDA loss is expected to be between $(15.0) million and $(13.0) million.	Adjusted EBITDA loss is expected to be between $(15.0) million and $(13.0) million.	1

5 APPN_2022-11-03_EP_YH	Adjusted EBITDA loss is expected to be between $(15.0) million and $(13.0) million.	Adjusted EBITDA loss is expected to be between $(15.0) million and $(13.0)\\ million.	0

6 APPN_2022-11-03_EP_YH	Cloud subscription revenue retention rate was 115% as of March 31, 2023.	Cloud subscription revenue retention rate is 115% as of March 31, 2023.	1

7 BILL_