In [2]:
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import os
import requests
import json
import transformers
from transformers import AutoTokenizer, AutoModel, AdamW, get_linear_schedule_with_warmup
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers.optimization import  Adafactor
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm, trange
from transformers.trainer_utils import set_seed
import time
import torch.nn.functional as F
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from datasets import load_from_disk
import nltk
from nltk.tokenize import sent_tokenize
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import TensorBoardLogger
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [4]:
#Checking for the GPU availability

if torch.cuda.is_available():
    dev = torch.device("cuda:0")
    print("Running on the GPU")
else:
    dev = torch.device("cpu")
    print("Running on the CPU")

Running on the GPU


## Preparing the data

### 1. Legal Data

In [5]:
# iterate over files in that directory
def get_docs(directory):
  docs = []
  for filename in os.listdir(directory):
      f = os.path.join(directory, filename)
      # checking if it is a file
      if os.path.isfile(f):
        docs.append(f)
  return docs

def read_file(file):
  content = []

  # Read the XML file
  with open(file, "r", encoding = 'utf-8', errors='ignore') as file:
      # Read each line in the file, readlines() returns a list of lines
      content = file.readlines()

  # Combine the lines in the list into a string
  content = "".join(content)
  bs_content = bs(content, "lxml")
  return bs_content

# Function for parsing the data based on tags
def get_data(doc,tag):
  tag_data = doc.find_all(tag)
  xlist = []
  for text in tag_data:
    xlist.append(text.get_text())

  return xlist

In [6]:
docs = get_docs(r'C:\aryan\text_gen_bert\data\fulltext')

In [7]:
df = pd.DataFrame(docs, columns=['docs'])
df['raw_data'] = df['docs'].apply(read_file)
df['name'] = df['raw_data'].apply(get_data, tag = "name").apply(lambda row: ' '.join(row))
df['subject'] = df['raw_data'].apply(get_data, tag = "catchphrases").apply(lambda row: ' '.join(row))
df['body'] = df['raw_data'].apply(get_data, tag = "sentences").apply(lambda row: ' '.join(row))
df['filename'] = df['docs'].str.rsplit("\\", n=1, expand=True)[1].to_list()
# df.set_index('filename',inplace=True)
# df.index.name = None
df.drop(columns=['docs','raw_data'], inplace=True)

In [8]:
df.head()

Unnamed: 0,name,subject,body,filename
0,Sharman Networks Ltd v Universal Music Austral...,\napplication for leave to appeal\nauthorisati...,\n\n Background to the current application \n ...,06_1.xml
1,Lawrance v Human Rights and Equal Opportunity ...,\nno point of principle\nadministrative law an...,\n\n \n 1 These are two applications for order...,06_100.xml
2,Citrus Queensland Pty Ltd v Sunstate Orchards ...,\ndiscovery\nwhether inclusion of a document a...,\n\n \n 1 I have before me two notices of moti...,06_1001.xml
3,Martech International Pty Ltd v Energy World C...,\nvariation\ntermination\ninterpretation\nmana...,\n\n \n Introduction \n \n1 In 1985 Mr Fletche...,06_1004.xml
4,Commissioner of Taxation v Milne (with Corrige...,\ncontext of liability to income tax\nreview o...,\n\n Context to the present application by way...,06_1005.xml


In [9]:
legal_docs = df.copy()[['name','body']]
legal_docs.columns = ['title', 'body']
legal_docs.head()

Unnamed: 0,title,body
0,Sharman Networks Ltd v Universal Music Austral...,\n\n Background to the current application \n ...
1,Lawrance v Human Rights and Equal Opportunity ...,\n\n \n 1 These are two applications for order...
2,Citrus Queensland Pty Ltd v Sunstate Orchards ...,\n\n \n 1 I have before me two notices of moti...
3,Martech International Pty Ltd v Energy World C...,\n\n \n Introduction \n \n1 In 1985 Mr Fletche...
4,Commissioner of Taxation v Milne (with Corrige...,\n\n Context to the present application by way...


### 2. Research Papers Data

In [10]:
data_file = 'C:/aryan/text_to_title_gen/data/arxiv-metadata-oai-snapshot.json'

def get_metadata():
    with open(data_file, 'r') as f:
        for line in f:
            yield line

In [11]:
metadata = get_metadata()
for paper in metadata:
    paper_dict = json.loads(paper)
    print('Title: {}\n\nAbstract: {}\nRef: {}'.format(paper_dict.get('title'), paper_dict.get('abstract'), paper_dict.get('journal-ref')))
#     print(paper)
    break

Title: Calculation of prompt diphoton production cross sections at Tevatron and
  LHC energies

Abstract:   A fully differential calculation in perturbative quantum chromodynamics is
presented for the production of massive photon pairs at hadron colliders. All
next-to-leading order perturbative contributions from quark-antiquark,
gluon-(anti)quark, and gluon-gluon subprocesses are included, as well as
all-orders resummation of initial-state gluon radiation valid at
next-to-next-to-leading logarithmic accuracy. The region of phase space is
specified in which the calculation is most reliable. Good agreement is
demonstrated with data from the Fermilab Tevatron, and predictions are made for
more detailed tests with CDF and DO data. Predictions are shown for
distributions of diphoton pairs produced at the energy of the Large Hadron
Collider (LHC). Distributions of the diphoton pairs from the decay of a Higgs
boson are contrasted with those produced from QCD processes at the LHC, showing
tha

In [12]:
titles = []
abstracts = []
years = []
metadata = get_metadata()
for paper in metadata:
    paper_dict = json.loads(paper)
    ref = paper_dict.get('journal-ref')
    try:
        year = int(ref[-4:])
        if 2000 < year < 2023:
            years.append(year)
            titles.append(paper_dict.get('title'))
            abstracts.append(paper_dict.get('abstract'))
    except:
        pass

len(titles), len(abstracts), len(years)

(155668, 155668, 155668)

In [13]:
papers = pd.DataFrame({'title': titles,'abstract': abstracts,'year': years})
papers.head()

Unnamed: 0,title,abstract,year
0,Calculation of prompt diphoton production cros...,A fully differential calculation in perturba...,2007
1,Polymer Quantum Mechanics and its Continuum Limit,A rather non-standard quantum representation...,2007
2,"The Spitzer c2d Survey of Large, Nearby, Inste...",We discuss the results from the combined IRA...,2007
3,Fermionic superstring loop amplitudes in the p...,The pure spinor formulation of the ten-dimen...,2007
4,Lifetime of doubly charmed baryons,"In this work, we evaluate the lifetimes of t...",2008


In [14]:
# Adding <input_text> and <target_text> columns
papers = papers[['title','abstract']]
papers.columns = ['title', 'body']
papers.head()


Unnamed: 0,title,body
0,Calculation of prompt diphoton production cros...,A fully differential calculation in perturba...
1,Polymer Quantum Mechanics and its Continuum Limit,A rather non-standard quantum representation...
2,"The Spitzer c2d Survey of Large, Nearby, Inste...",We discuss the results from the combined IRA...
3,Fermionic superstring loop amplitudes in the p...,The pure spinor formulation of the ten-dimen...
4,Lifetime of doubly charmed baryons,"In this work, we evaluate the lifetimes of t..."


In [15]:
# Setting aside 500 research papers for testing and model evaluation
papers_train = papers[:-500]
papers_test = papers[-500:]

In [16]:
# Setting aside 500 legal docs for testing and model evaluation
legal_docs_train = legal_docs[:-500]
legal_docs_test = legal_docs[-500:]

In [17]:
print('Legal Docs - train Shape: {}, \nResearch Papers - train Shape: {}'.format(legal_docs_train.shape, papers_train.shape))
print(' ')
print('Legal Docs - test Shape: {}, \nResearch Papers - test Shape: {}'.format(legal_docs_test.shape, papers_test.shape))

Legal Docs - train Shape: (3417, 2), 
Research Papers - train Shape: (155168, 2)
 
Legal Docs - test Shape: (500, 2), 
Research Papers - test Shape: (500, 2)


### 3. Merging the datasets

In [18]:
legal_docs_train = legal_docs_train.head(2000)
papers_train = papers_train.head(10000)

In [19]:
final_df = pd.concat([legal_docs_train, papers_train], axis=0, ignore_index=True)
final_df.shape

(12000, 2)

In [20]:
final_test_df = pd.concat([legal_docs_test, papers_test], axis=0, ignore_index=True)
final_test_df.columns = ['headline', 'article']
final_test_df.shape

(1000, 2)

In [21]:
final_df['title'] = final_df['title'].astype(str)
final_df['body'] = final_df['body'].astype(str)
final_df.columns = ['headline', 'article']

final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12000 entries, 0 to 11999
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   headline  12000 non-null  object
 1   article   12000 non-null  object
dtypes: object(2)
memory usage: 187.6+ KB


### 4. Train-test Split

In [22]:
from sklearn.model_selection import train_test_split
train_df, rest_df = train_test_split(final_df, test_size=0.2)
test_df, val_df = train_test_split(rest_df, test_size=0.5)

# train_df = train_df.head()
# test_df = test_df.head()
# val_df = val_df.head()
train_df.shape, test_df.shape, val_df.shape

((9600, 2), (1200, 2), (1200, 2))

## Generating Headlines

In [31]:
def generate_headline(text):
  text_encoding = tokenizer(
      text,
      max_length=512,
      padding="max_length",
      truncation=True,
      return_attention_mask=True,
      add_special_tokens=True,
      return_tensors="pt"
  )

  generated_ids = trained_model.model.generate(
      input_ids=text_encoding["input_ids"].to(device),
      attention_mask=text_encoding["attention_mask"].to(device),
      max_length=150,
      num_beams=2, # beam search
      repetition_penalty=2.5,
      length_penalty=1.0,
      early_stopping=True # To speed up the process
  )

  preds = [
      tokenizer.decode(gen_id, skip_special_tokens=True, clean_up_tokenization_spaces=True)
      for gen_id in generated_ids
  ]

  return "".join(preds)

In [32]:
def gen_test(text, row):
    sample_row = text.iloc[row]
    text = sample_row["article"]
    print("Lenth of the body:", len(text.split()))
    print("\n")
    print("Text body:", text)
    print("\n")
    print("Actual Headline:", sample_row["headline"])
    print("\n")
    print("Generated Headline:", generate_headline(text))


In [33]:
gen_test(final_test_df, 10)

Lenth of the body: 6806


Text body: 

 THE PRESENT APPLICATION 
 On 4 November 2008, Cowdroy J delivered judgment in Rahman v Secretary, Department of Education, Employment and Workplace Relations [2008] FCA 1634.
On that day, his Honour dismissed Mr Rahman's application with costs.
On the same day, his Honour delivered his reasons for making those orders.
The proceedings before his Honour concerned an application for Austudy payments made by the applicant which was rejected by a delegate of the respondent on the ground that the value of the applicant's net assets exceeded the upper limit for the value of assets allowable under the relevant legislation.
The time within which the applicant might have appealed from his Honour's decision expired on 25 November 2008.
The applicant did not file any Notice of Appeal by 25 November 2008.
On 16 February 2009, the applicant filed an Application for Extension of Time to File and Serve a Notice of Appeal in order to enable him to appeal from the

Generated Headline: Rahman v Secretary, Department of Education, Employment and Workplace Relations [2007] FCA 1634 (14 November 2007)


In [35]:
final_test_df['generated_headline'] = final_test_df['article'].apply(lambda row: generate_headline(row))

In [137]:
final_test_df.head()

Unnamed: 0,headline,article,generated_headline
0,"Williams v Construction, Forestry, Mining and ...",\n\n INTRODUCTION \n This is an application by...,"Williams v Construction, Forestry, Mining and ..."
1,Alfred v Wakelin (No 3) [2009] FCA 224 (13 Mar...,\n\n By notice of motion filed on 18 February ...,Alfred v Wakelin (No 3) [2007] FCA 1543 (18 De...
2,Commonwealth Bank of Australia v Deputy Commis...,\n\n In this proceeding the Commonwealth Bank ...,Commonwealth Bank of Australia v Commissioner ...
3,Mortimer v Opes Prime Stockbroking Limited (AC...,\n\n INTRODUCTION \n There are two application...,Beconwood Securities Pty Ltd v Australia and N...
4,Habib v Commonwealth of Australia (No 2) [2009...,"\n\n Introduction\n Save where indicated, the ...",Commonwealth of Australia v Habib (No 2) [2006...


## Evaluation

### Semantic Similarity

SentenceTransformers is a Python framework for state-of-the-art sentence, text and image embeddings. This framework can be used to compute sentence / text embeddings for more than 100 languages. These embeddings can then be compared e.g. with cosine-similarity to find sentences with a similar meaning. This can be useful for semantic textual similar, semantic search, or paraphrase mining.

[Using Sentence Transformers at Hugging Face](https://huggingface.co/docs/hub/sentence-transformers#:~:text=sentence%2Dtransformers%20is%20a%20library,search%2C%20clustering%2C%20and%20retrieval.)

In [36]:
from sentence_transformers import SentenceTransformer, util

In [37]:
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

In [38]:
cosine_similarity = 0
dot_product = 0
with open("predictions_12000samples.txt", "r") as f:
  lines = f.readlines()
  lines = [line.strip() for line in lines]
  for index, line in enumerate(lines):
     sample_row = final_test_df.iloc[index]
     reference = sample_row["headline"]
     en_1 = sentence_model.encode(reference)
     en_2 = sentence_model.encode(line)
     cosine_sim_result = util.cos_sim(en_1, en_2)
     cosine_sim_result_float = cosine_sim_result.item()
     dot_product_result = util.dot_score(en_1, en_2)
     dot_product_result_float = dot_product_result.item()
     cosine_similarity += cosine_sim_result_float
     dot_product += dot_product_result_float
  cosine_similarity /= len(final_test_df.index)
  dot_product /= len(final_test_df.index)
print(f'Cosine Similarity: {cosine_similarity} \nDot Product: {dot_product}')

Cosine Similarity: 0.695957475470379 
Dot Product: 0.6959574375543743


In [59]:
# Corpus with example sentences
corpus = list(final_test_df['headline'])
corpus_embeddings = sentence_model.encode(corpus, convert_to_tensor=True)

In [None]:
def similarity_search(queries, actual_titles, min_value):
    # Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
    top_k = min(min_value, len(corpus))

    for idx, query in enumerate(queries):
        # Encode target sentence
        target_embedding = sentence_model.encode([query],convert_to_tensor=True)

        # Compute cosine similarity scores
        cosine_scores = util.cos_sim(target_embedding, corpus_embeddings)[0]

        top_results = torch.topk(cosine_scores, k=top_k)

        first_match = corpus[top_results[1][0]]
        reference_title = actual_titles[idx]

        first_match_embedding = sentence_model.encode(corpus[top_results[1][0]],convert_to_tensor=True)
        reference_title_embedding = sentence_model.encode(actual_titles[idx],convert_to_tensor=True)

        correct_output = cosine_scores = util.cos_sim(first_match_embedding, reference_title_embedding)[0]

        print("\n\n======================\n\n")
        print("Actual Title: ", reference_title)
        print("Generated Title:", query)
        print("\nTop 5 most similar sentences in corpus:\n")

        for score, idx in zip(top_results[0], top_results[1]):
                print(corpus[idx], "[Score: {:.4f}]".format(score))
                
        if correct_output >= 0.999:
          print("That is a Match!")
     
        else: 
          print("***Not a match***")

Testing it on the generated sentences from the test dataset

In [136]:
queries = ["Rahman v Secretary, Department of Education, Employment and Workplace Relations [2007] FCA 1634 (14 November 2007)",
           "Australian Securities & Investments Commission v Elite Wealth Builders Pty Ltd [2007] FCA 1519 (13 November 2007)",
           "Futuris Corporation Limited (ACN 004 336 636) v Commissioner of Taxation [2007] FCA 1096 (5 May 2007)",
           "Quantum process tomography of a two-qubit entangling gate",
           "General U(1) Transformations on Programmable Quantum Processors",
           "q-deformation of the classical Poisson bracket",
           "The Casimir force in the mid-infrared range"
           ]

actual_titles = ["Rahman v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 239 (18 March 2009)",
                 "Australian Securities and Investments Commission, in the matter of Bennett Street Developments Pty Ltd v Weerappah (No 2) [2009] FCA 249 (19 March 2009)",
                 "Futuris Corporation Limited (ACN 004 336 636) v Commissioner of Taxation [2009] FCA 600 (4 June 2009)",
                 "Quantum process tomography of a controlled-NOT gate",
                 "Probabilistic programmable quantum processors with multiple copies of program state",
                 "Classical and quantum q-deformed physical systems",
                 "Sample dependence of the Casimir forces"
                 ]



similarity_search(queries, actual_titles, 5)





Actual Title:  Rahman v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 239 (18 March 2009)
Generated Title: Rahman v Secretary, Department of Education, Employment and Workplace Relations [2007] FCA 1634 (14 November 2007)

Top 5 most similar sentences in corpus:

Rahman v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 239 (18 March 2009) [Score: 0.9881]
Budd v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 345 (15 April 2009) [Score: 0.7348]
Underdown v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 965 (26 August 2009) [Score: 0.6524]
Vranic v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 672 (19 June 2009) [Score: 0.6487]
Zoia v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 661 (17 June 2009) [Score: 0.6458]
That is a Match!




Actual Title:  Australian Securities and I

In [140]:
similarity_search(actual_titles = list(final_test_df['headline']), 
                  queries = list(final_test_df['generated_headline']), 
                  min_value = 3)





Actual Title:  Williams v Construction, Forestry, Mining and Energy Union [2009] FCA 223 (13 March 2009)
Generated Title: Williams v Construction, Forestry, Mining and Energy Union [2006] FCA 1212 (13 September 2006)

Top 5 most similar sentences in corpus:

Williams v Construction, Forestry, Mining and Energy Union [2009] FCA 223 (13 March 2009) [Score: 0.9892]
Williams v Construction, Forestry, Mining and Energy Union (No 2) [2009] FCA 548 (28 May 2009) [Score: 0.9844]
Cahill v Construction, Forestry, Mining and Energy Union (No 3) [2009] FCA 52 (5 February 2009) [Score: 0.7899]
That is a Match!




Actual Title:  Alfred v Wakelin (No 3) [2009] FCA 224 (13 March 2009)
Generated Title: Alfred v Wakelin (No 3) [2007] FCA 1543 (18 December 2007)

Top 5 most similar sentences in corpus:

Alfred v Wakelin (No 4) [2009] FCA 267 (26 March 2009) [Score: 0.9384]
Alfred v Wakelin (No 3) [2009] FCA 224 (13 March 2009) [Score: 0.9301]
Garrett v Macks [2009] FCA 253 (23 March 2009) [Score: 0.

## Response to Similar Docs

Response --> T5 --> Headline --> Similarity Search (Cosine Similarity) --> Top 5 Similar docs

In [196]:
def headline_to_doc(query, reference_title, min_value):
    # Find the closest 5 sentences of the corpus for each query sentence based on cosine similarity
    top_k = min(min_value, len(corpus))

    # for query in queries:
    # Encode target sentence
    target_embedding = sentence_model.encode([query],convert_to_tensor=True)

    # Compute cosine similarity scores
    cosine_scores = util.cos_sim(target_embedding, corpus_embeddings)[0]

    top_results = torch.topk(cosine_scores, k=top_k)

    print("\n\n======================\n\n")
    print("Actual Title: ", reference_title)
    # print("Generated Title:", query)
    print("\nTop 5 most similar sentences in corpus:\n")

    for score, idx in zip(top_results[0], top_results[1]):
            print(corpus[idx], "[Score: {:.4f}]".format(score))

In [197]:
def resp_to_sim_docs(text, reference_title):
    print("Response:", text)
    generated_headline = generate_headline(text)
    print("Generated Headline:", generated_headline)
    headline_to_doc(generated_headline,reference_title, 5)
    

In [198]:
# final_test_df[final_test_df['headline'].str.contains('Makucha')]

## From the Langchain Responses

#### Prompt 1: What happened in Makucha v Fairbrook?

In [201]:
reference_title = "Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)"

#### With temp=0

In [202]:
test = 'Makucha v Fairbrook was a case in which the applicant, SZKJT, applied to set aside a bankruptcy notice issued against him by the respondent, Fairbrook PMA Pty Ltd. The Federal Magistrate dismissed the application and SZKJT applied for an extension of time to appeal the decision. SZKJT put forward two grounds for the appeal: a breach of section 420 of the Act, which requires the Tribunal to pursue a mechanism that provides a "just, fair, and economical review mechanism"; and a breach of section 430 of the Act, which states that the Tribunal refused to consider SZKJT\'s explanation for the information provided by DFAT.'
resp_to_sim_docs(test, reference_title)

Response: Makucha v Fairbrook was a case in which the applicant, SZKJT, applied to set aside a bankruptcy notice issued against him by the respondent, Fairbrook PMA Pty Ltd. The Federal Magistrate dismissed the application and SZKJT applied for an extension of time to appeal the decision. SZKJT put forward two grounds for the appeal: a breach of section 420 of the Act, which requires the Tribunal to pursue a mechanism that provides a "just, fair, and economical review mechanism"; and a breach of section 430 of the Act, which states that the Tribunal refused to consider SZKJT's explanation for the information provided by DFAT.
Generated Headline: SZKJT v Fairbrook PMA Pty Ltd [2006] FCA 1212 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.7253]
Ashmere Cove Pty Ltd v Beekink [2009] FCA 564 (2 June 2009) [Score: 0.54

SZKJT v Fairbrook PMA Pty Ltd [2006] FCA 1212 (13 November 2006)

#### With temp=0.5

In [203]:
test= ['Makucha v Fairbrook was an application for an extension of time to appeal from the decision to refuse to set aside a bankruptcy notice. The Federal Magistrate dismissed the applicant\'s application and the applicant put forward two grounds: a breach of s.420 of the Act, which requires the Tribunal to pursue a mechanism that provides a "just, fair, and economical review mechanism", and a breach of s.430 of the Act, which states that the Tribunal refused to consider the applicant\'s explanation for the information provided by DFAT.']
resp_to_sim_docs(test, reference_title)

Response: ['Makucha v Fairbrook was an application for an extension of time to appeal from the decision to refuse to set aside a bankruptcy notice. The Federal Magistrate dismissed the applicant\'s application and the applicant put forward two grounds: a breach of s.420 of the Act, which requires the Tribunal to pursue a mechanism that provides a "just, fair, and economical review mechanism", and a breach of s.430 of the Act, which states that the Tribunal refused to consider the applicant\'s explanation for the information provided by DFAT.']
Generated Headline: Makucha v Fairbrook [2006] FCA 1212 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.8327]
Goyan v Motyka [2009] FCA 776 (24 July 2009) [Score: 0.5636]
Rose v Piscopo [2009] FCA 261 (20 March 2009) [Score: 0.5247]
Davidova v Murphy [2009] FCA 601 (10 June 2

#### With temp=0.9 
##### Try 1

In [204]:
test = ["The applicant (Makucha) filed a notice of appeal from the judgment of the Federal Magistrate's Court delivered on 23 June 2009, which had dismissed Makucha's application to set aside a bankruptcy notice issued against him by the respondent. The Federal Magistrate stated that SZKJT had pressed two grounds: a breach of s.420 of the Act (requiring the Tribunal to pursue a mechanism that provides a “just, fair, and economical review mechanism”) and a breach of s.430 of the Act (refusing to consider the applicant's explanation for the information provided by DFAT)."]
resp_to_sim_docs(test, reference_title)

Response: ["The applicant (Makucha) filed a notice of appeal from the judgment of the Federal Magistrate's Court delivered on 23 June 2009, which had dismissed Makucha's application to set aside a bankruptcy notice issued against him by the respondent. The Federal Magistrate stated that SZKJT had pressed two grounds: a breach of s.420 of the Act (requiring the Tribunal to pursue a mechanism that provides a “just, fair, and economical review mechanism”) and a breach of s.430 of the Act (refusing to consider the applicant's explanation for the information provided by DFAT)."]
Generated Headline: Makucha v SZKJT Limited [2007] FCA 1519 (23 November 2007)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.6687]
Goyan v Motyka [2009] FCA 776 (24 July 2009) [Score: 0.5840]
SZMKU v Minister for Immigration and Citizenship [2009] FCA 90 (11 Feb

##### Try 2

In [205]:
test = ["Makucha v Fairbrook was a case in which the applicant, SZKJT, applied to the Federal Magistrates Court to set aside a bankruptcy notice issued against him by the respondent, Fairbrook PMA Pty Ltd. The court dismissed SZKJT's application and SZKJT put forward grounds of breach of sections 420 and 430 of the Corporations Act 2001 (Cth) in an application for an extension of time to appeal from the decision. The court found that there was no material direct evidence of forgery in support of the Tribunal’s finding that documents submitted in support of the application were forged."]
resp_to_sim_docs(test, reference_title)

Response: ["Makucha v Fairbrook was a case in which the applicant, SZKJT, applied to the Federal Magistrates Court to set aside a bankruptcy notice issued against him by the respondent, Fairbrook PMA Pty Ltd. The court dismissed SZKJT's application and SZKJT put forward grounds of breach of sections 420 and 430 of the Corporations Act 2001 (Cth) in an application for an extension of time to appeal from the decision. The court found that there was no material direct evidence of forgery in support of the Tribunal’s finding that documents submitted in support of the application were forged."]
Generated Headline: SZKJT Pty Ltd v Fairbrook PMA Pty Ltd [2006] FCA 1212 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.7339]
Ashmere Cove Pty Ltd v Beekink [2009] FCA 564 (2 June 2009) [Score: 0.6302]
Tramanco Pty Ltd v BPW Tr

##### Try 3

In [206]:
test = ["In Makucha v Fairbrook, the applicant was seeking an extension of time to appeal the Judge's decision to refuse to set aside a bankruptcy notice. The applicant alleged a breach of sections 420 and 430 of the Corporations Act 2001 (Cth) related to the Tribunal's refusal to consider his explanation for the information provided by DFAT and for finding that documents submitted were forged in the absence of any material, direct evidence."]
resp_to_sim_docs(test, reference_title)

Response: ["In Makucha v Fairbrook, the applicant was seeking an extension of time to appeal the Judge's decision to refuse to set aside a bankruptcy notice. The applicant alleged a breach of sections 420 and 430 of the Corporations Act 2001 (Cth) related to the Tribunal's refusal to consider his explanation for the information provided by DFAT and for finding that documents submitted were forged in the absence of any material, direct evidence."]
Generated Headline: Makucha v Fairbrook Pty Ltd [2006] FCA 1212 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.9667]
Ashmere Cove Pty Ltd v Beekink [2009] FCA 564 (2 June 2009) [Score: 0.5852]
PCH Offshore Pty Ltd v Dunn [2009] FCA 553 (20 May 2009) [Score: 0.5653]
Kinabulu Investments Pty Ltd v Barron & Rawson Pty Ltd (No 2) [2009] FCA 57 (6 February 2009) [Score: 0.5544

#### With temp=1 
##### Try 1

In [207]:
test = ["This case involved an application for an extension of time within which to file and serve a notice of appeal from the judgment of the Federal Magistrates Court. The case dealt with the applicant's application to set aside a bankruptcy notice issued against him by the respondent and the applicant submitted that the Tribunal had breached sections 420 and 430 of the Act, and that it was unfair of the Tribunal to find that the documents submitted in support of his application were forged in the absence of any 'material direct evidence' in support of that finding."]
resp_to_sim_docs(test, reference_title)

Response: ["This case involved an application for an extension of time within which to file and serve a notice of appeal from the judgment of the Federal Magistrates Court. The case dealt with the applicant's application to set aside a bankruptcy notice issued against him by the respondent and the applicant submitted that the Tribunal had breached sections 420 and 430 of the Act, and that it was unfair of the Tribunal to find that the documents submitted in support of his application were forged in the absence of any 'material direct evidence' in support of that finding."]
Generated Headline: SZIJB v Minister for Immigration and Multicultural Affairs [2006] FCA 1519 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

SZMFJ v Minister for Immigration and Citizenship (No 2) [2009] FCA 95 (16 February 2009) [Score: 0.7909]
SZJTW v Minister for Immigration and Citizenship [2009] FCA 756 (16 July 2

##### Try 2

In [208]:
test = ["Makucha v Fairbrook was an application for an extension of time to appeal from a decision to refuse to set aside a bankruptcy notice. The applicant submitted that the Tribunal had breached sections 420 and 430 of the Corporations Act 2001 (Cth). The Federal Magistrate dismissed the application."]
resp_to_sim_docs(test, reference_title)

Response: ['Makucha v Fairbrook was an application for an extension of time to appeal from a decision to refuse to set aside a bankruptcy notice. The applicant submitted that the Tribunal had breached sections 420 and 430 of the Corporations Act 2001 (Cth). The Federal Magistrate dismissed the application.']
Generated Headline: Makucha v Fairbrook Limited [2006] FCA 1215 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009) [Score: 0.8747]
Goyan v Motyka [2009] FCA 776 (24 July 2009) [Score: 0.5602]
Rose v Piscopo [2009] FCA 261 (20 March 2009) [Score: 0.5197]
Smith v Commonwealth of Australia [2009] FCA 684 (24 June 2009) [Score: 0.4818]
Mineo v Etna [2009] FCA 337 (9 April 2009) [Score: 0.4762]


### Prompt 2: "Summarize what happened in Makucha v Fairbrook without mentioning either Makucha or Fairbrook"

In [216]:
test = """This is an application for an extension of time to appeal from a decision to refuse to set aside a bankruptcy notice. The company involved was deregistered and subsequently re-registered, and the applicant submitted that the Tribunal had breached sections 420 and 430 of the Act. The Tribunal had refused to consider the applicant's explanation for the information provided by DFAT and had not set out the reasons for its decision."""
resp_to_sim_docs(test, reference_title)

Response: This is an application for an extension of time to appeal from a decision to refuse to set aside a bankruptcy notice. The company involved was deregistered and subsequently re-registered, and the applicant submitted that the Tribunal had breached sections 420 and 430 of the Act. The Tribunal had refused to consider the applicant's explanation for the information provided by DFAT and had not set out the reasons for its decision.
Generated Headline: DFAT Pty Ltd v Minister for Employment and Workplace Relations [2007] FCA 1526 (13 November 2007)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Rahman v Secretary, Department of Education, Employment and Workplace Relations [2009] FCA 239 (18 March 2009) [Score: 0.6137]
TT-Line Company Pty Ltd v Commissioner of Taxation [2009] FCA 658 (18 June 2009) [Score: 0.5665]
Strategic Financial and Project Services Pty Ltd v Bank of China Limited [2009] FCA 604 (

In [215]:
test = """This is an application for an extension of time to appeal a decision to refuse to set aside a bankruptcy notice. The bankruptcy notice was issued by a company that had been deregistered and subsequently re-registered. The company argued that interest on the debt accrues during the period of deregistration and the applicant submitted that the tribunal failed to provide a "just, fair, and economical review mechanism" as well as refusing to consider the applicant\'s explanation."""
resp_to_sim_docs(test, reference_title)

Response: This is an application for an extension of time to appeal a decision to refuse to set aside a bankruptcy notice. The bankruptcy notice was issued by a company that had been deregistered and subsequently re-registered. The company argued that interest on the debt accrues during the period of deregistration and the applicant submitted that the tribunal failed to provide a "just, fair, and economical review mechanism" as well as refusing to consider the applicant's explanation.


Generated Headline: Defractor Services Pty Ltd v The Bank of Canada [2006] FCA 1212 (13 November 2006)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Australian Securities & Investment Commission v Lanepoint Enterprises Pty Ltd [2009] FCA 258 (21 April 2009) [Score: 0.6507]
Strategic Financial and Project Services Pty Ltd v Bank of China Limited [2009] FCA 604 (5 June 2009) [Score: 0.6492]
TT-Line Company Pty Ltd v Commissioner of Taxation [2009] FCA 658 (18 June 2009) [Score: 0.6173]
Eric Preston Pty Ltd v Euroz Securities Limited [2009] FCA 240 (17 March 2009) [Score: 0.5971]
PCH Offshore Pty Ltd v Dunn [2009] FCA 553 (20 May 2009) [Score: 0.5960]


In [214]:
test = """Proceedings were commenced in a local court on 22 August 2005 which resulted in a default judgment in favor of one party. The company involved subsequently changed its name but was then deregistered in May 2006. The party then sought reinstatement of the company in the Supreme Court of NSW on 30 March 2007 and orders were made by a registrar on 3 May 2007 to allow the reinstatement. Later, a Federal Magistrates court delivered a judgment on 23 June 2009 which dismissed the applicant's application to set aside a bankruptcy notice issued against them. The applicant argued that sections 420 and 430 of the Corporations Act had been breached yet the court found in favor of the respondent."""
resp_to_sim_docs(test, reference_title)

Response: Proceedings were commenced in a local court on 22 August 2005 which resulted in a default judgment in favor of one party. The company involved subsequently changed its name but was then deregistered in May 2006. The party then sought reinstatement of the company in the Supreme Court of NSW on 30 March 2007 and orders were made by a registrar on 3 May 2007 to allow the reinstatement. Later, a Federal Magistrates court delivered a judgment on 23 June 2009 which dismissed the applicant's application to set aside a bankruptcy notice issued against them. The applicant argued that sections 420 and 430 of the Corporations Act had been breached yet the court found in favor of the respondent.
Generated Headline: Defractors Pty Ltd v The Australian Securities and Investments Commission [2007] FCA 1619 (13 November 2007)




Actual Title:  Makucha v Fairbrook PMA Pty Ltd [2009] FCA 985 (3 September 2009)

Top 5 most similar sentences in corpus:

Australian Securities & Investment Commis