### Script to generate summaries using chunking based BART method

Assign the dataset and output_path variable according to requirements.  


In [3]:
dataset = "UK" # Options: IN - IN-Abs, UK-UK-Abs, N2-IN-Ext 
output_path = "./output/"

In [4]:
import sys
from BART_utilities import *
sys.path.insert(0, '../')
from utilities import *

import transformers
import pandas as pd
import numpy as np
import glob
import nltk
import torch
import math
import random
import re
import argparse
import os


In [5]:
#Reading the test documents
names, data_source, data_summary = get_summary_data(dataset, "test")
print(len(names))
print(len(data_source))
print(len(data_summary))
print(names)
# print(data_source)
# print(data_summary)
dict_names = get_req_len_dict(dataset, "test") 
print(dict_names)

100
100
100
['uksc-2009-0022.txt', 'uksc-2009-0073.txt', 'uksc-2009-0105.txt', 'uksc-2009-0118.txt', 'uksc-2009-0125.txt', 'uksc-2009-0127.txt', 'uksc-2009-0129.txt', 'uksc-2009-0152.txt', 'uksc-2009-0167.txt', 'uksc-2009-0180.txt', 'uksc-2010-0039.txt', 'uksc-2010-0106.txt', 'uksc-2010-0127.txt', 'uksc-2010-0128.txt', 'uksc-2010-0154.txt', 'uksc-2010-0189.txt', 'uksc-2010-0201.txt', 'uksc-2010-0231.txt', 'uksc-2010-0236.txt', 'uksc-2010-0244.txt', 'uksc-2011-0011.txt', 'uksc-2011-0024.txt', 'uksc-2011-0046.txt', 'uksc-2011-0089.txt', 'uksc-2011-0115.txt', 'uksc-2011-0117.txt', 'uksc-2011-0196.txt', 'uksc-2011-0233.txt', 'uksc-2011-0244.txt', 'uksc-2011-0260.txt', 'uksc-2012-0007.txt', 'uksc-2012-0025.txt', 'uksc-2012-0072.txt', 'uksc-2012-0109.txt', 'uksc-2012-0124.txt', 'uksc-2012-0143.txt', 'uksc-2012-0162.txt', 'uksc-2012-0179.txt', 'uksc-2012-0181.txt', 'uksc-2012-0247.txt', 'uksc-2012-0249.txt', 'uksc-2012-0250.txt', 'uksc-2013-0006.txt', 'uksc-2013-0023.txt', 'uksc-2013-0036.txt

In [6]:
# Loading Model and tokenizer
from transformers import BartTokenizer, BartForConditionalGeneration, AdamW, BartConfig


tokenizer = BartTokenizer.from_pretrained('facebook/bart-large', add_prefix_space=True)

model = BartForConditionalGeneration.from_pretrained("facebook/bart-large")

### For using fine tuned model 
1. uncomment the 2nd line in the following cell
2. add the path to the fine tuned model

In [7]:
bart_model = LitModel(learning_rate = 2e-5, tokenizer = tokenizer, model = model)

# bart_model = LitModel.load_from_checkpoint("/home/pahelibhattacharya/HULK/Abhay/models/BART_large_IN_MCS.ckpt",
#                                       learning_rate = 2e-5, tokenizer = tokenizer, model = model).to("cuda")

In [8]:
def generate_summary_gpu(nested_sentences,p=0.2):
  '''
    Function to generate summaries from the list containing chunks of the document
    input:  nested_sentences - chunks
            p - Number of words in summaries per word in the document
    output: document summary
    '''
  device = 'cuda'
  summaries = []
  for nested in nested_sentences:
    l = int(p * len(nested.split(" ")))
    input_tokenized = tokenizer.encode(nested, truncation=True, return_tensors='pt')
    input_tokenized = input_tokenized.to(device)
    summary_ids = bart_model.model.to(device).generate(input_tokenized,
                                      length_penalty=0.01,
                                      min_length=l-5,
                                      max_length=l+5)
    output = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids]
    summaries.append(output)
  summaries = [sentence for sublist in summaries for sentence in sublist]
  return summaries

In [9]:
import os
if not os.path.exists(output_path):
    os.makedirs(output_path)

In [10]:
# main loop to generate and save summaries of each document in the test dataset
for i in range(len(data_source)):
    name = names[i]
    doc = data_source[i]
    wc = doc.split(" ")
    input_len = len(wc)
    req_len = dict_names[name]
    print(str(i) + ": " + name +  " - " + str(input_len) + " : " + str(req_len), end = " ,")
    
    nested = nest_sentences(doc,1024)
    l = int(req_len/len(nested))
    p = float(req_len/input_len)
    print(p)
    
    abs_summ = generate_summary_gpu(nested,p)
#     print(abs_summ)
#     break
    abs_summ = " ".join(abs_summ)
    if len(abs_summ.split(" ")) > req_len:
        abs_summ = abs_summ.split(" ")
        abs_summ = abs_summ[:req_len]
        abs_summ = " ".join(abs_summ)
#     print(abs_summ)
#     break
    print(len((abs_summ.split(" "))))
    
    path = output_path + name
    file = open(path,'w')
    file.write(abs_summ)
    file.close()
    


0: uksc-2009-0022.txt - 21712 : 1191 ,0.05485445836403832
1087
1: uksc-2009-0073.txt - 30202 : 715 ,0.023673928878882193
661
2: uksc-2009-0105.txt - 50261 : 2326 ,0.04627842661307972
2072
3: uksc-2009-0118.txt - 10241 : 956 ,0.0933502587637926
831
4: uksc-2009-0125.txt - 25874 : 1195 ,0.046185359820669396
1100
5: uksc-2009-0127.txt - 25874 : 1195 ,0.046185359820669396
1100
6: uksc-2009-0129.txt - 3833 : 801 ,0.2089746934516045
682
7: uksc-2009-0152.txt - 6232 : 1254 ,0.20121951219512196
1115
8: uksc-2009-0167.txt - 19690 : 338 ,0.017166074149314373
338
9: uksc-2009-0180.txt - 17086 : 1139 ,0.06666276483670841
1023
10: uksc-2010-0039.txt - 7359 : 864 ,0.11740725642070933
786
11: uksc-2010-0106.txt - 32660 : 1153 ,0.03530312308634415
1072
12: uksc-2010-0127.txt - 7798 : 1201 ,0.15401384970505258
1053
13: uksc-2010-0128.txt - 11766 : 1074 ,0.0912799592044875
980
14: uksc-2010-0154.txt - 6371 : 1135 ,0.17815099670381415
1062
15: uksc-2010-0189.txt - 6958 : 890 ,0.12791031905720035
803
16: 

In [None]:
The issue in this appeal is whether a failure by the Respondent to comply with a procedural requirement in its policy relating to the detention of foreign national prisoners results in their detention being unlawful, so as to allow the detainee to advance a claim in tort for false imprisonment.
Shepherd Masimba Kambadzi is a Zimbabwean national.
He entered the UK lawfully, but remained here after his leave to remain expired.
In 2005, he was convicted of assault and sexual assault, sentenced to one years imprisonment and ordered to be registered as a sex offender for five years.
Prior to his being released from prison, the Respondent decided to make a deportation order against the Appellant.
Paragraph 2(2) of Schedule 3 to the Immigration Act 1971 (the 1971 Act) gives the Secretary of State the power to detain foreign nationals pending the making of a deportation order and the Appellant was detained under that power on 7 March 2006.
On 24 August 2007, a deportation order was made against the Appellant, after which he was detained under paragraph 2(3) of Schedule 3 to the 1971 Act.
In all, he was detained for 27 months until 13 June 2008, when he was granted bail.
He has not yet been returned to Zimbabwe because of conditions in that country.
The common law has recognised limits on the extent of the power to detain under paragraphs 2(2) and (3) of Schedule 3 to the 1971 Act (the Hardial Singh principles, set down in R v Governor of Durham Prison Ex p Hardial Singh [1984] 1 WLR 904).
The Respondent had a policy which detailed how the power to detain was to be exercised.
It provided, for example, that all reasonable alternatives to detention must be considered before detention was authorised.
The policy also provided for detention to be subject to review at regular intervals.
It specified the frequency of review and the grade of official which was to carry them out.
The policy required the Appellants detention to be reviewed on five occasions during the first month and then monthly thereafter.
The Appellants detention was not reviewed in accordance with that policy.
By the date of the first instance hearing of this claim, he had been entitled to 22 monthly reviews of the lawfulness of his detention, in addition to the five reviews which should have taken place in the first month.
His detention had been reviewed only ten times.
Only six of those reviews had been conducted by officials of the required seniority and, of those six, two were flawed by material errors of fact.
The substantive requirements for detaining the Appellant were, however, met throughout the period of his detention: had the Respondent carried out the reviews, it could justifiably have decided to continue to detain him.
While still detained, the Appellant raised a judicial review, seeking a declaration that he was unlawfully detained and damages.
At first instance, Munby J granted a declaration that the Appellant had been unlawfully detained for various periods amounting to about 19 months and gave directions for the assessment of damages, but he declined to order his release. (The Appellant was subsequently granted bail in other proceedings.) The Court of Appeal allowed the Secretary of States appeal and held that the Appellants detention had been lawful throughout.
Although the Supreme Court heard the appeal in February 2010, it delayed handing down its judgment so as to be enable a court of nine Justices to consider the case of R (Lumba) v Secrteary of State for the
Home Department [2011] UKSC 12.
That case also considered the legality of the detention of foreign national prisoners where the Secretary of State has not complied with the terms of the published policy relating to detention.
The Supreme Court, by a majority, allows the appeal.
Lord Hope gives the leading judgment; Lady Hale and Lord Kerr issue separate concurring judgments.
The majority holds that the Appellants detention was unlawful for the periods in respect of which no review was carried out and that he does have a claim in tort for false imprisonment in respect of those periods.
The amount of damages is yet to be ascertained, but will be nominal if it is found that the Appellant would have been detained even if his detention had been reviewed as the policy required.
Lord Brown gives a dissenting judgment, with which Lord Rodger agrees.
The Secretary of State was under a public law duty to adhere to the terms of the policy relating to reviews unless there were good reasons not to: [36], [39], [66].
The majority of the Court holds that the Respondents unlawful failure to review the Appellants detention, as required by the policy, resulted in his detention being unlawful.
The court rejects the argument that because the breach of public law related to a procedural requirement, it did not affect the legality of the detention: [69] [73], [85].
Some procedural requirements go to the legality of the detention and some do not: [71].
The policy was sufficiently closely related to the authority to detain to qualify the Secretary of States discretion under the 1971 Act: [51].
The very point of the review was to ensure that the detention was lawful: [73], [86].
The public law error bore directly on the decision to detain the Appellant and therefore satisfied the test adopted by the majority in Lumba for determining when a public law error will result in detention being unlawful: [42], [88].
The Appellants detention was not unlawful, however, where the only defect in the decision to continue detention was that the review had been carried out by an official of the wrong grade: [60].
As the Court also held in Lumba, it was no defence to the claim that there were grounds which justified the Appellants detention: [54], [73], [88].
False imprisonment is a trespass to the person and actionable in itself, without proof of loss or damage: [74].
The result of the reviews which should have taken place is, however, relevant to assessing damages.
A defendant is liable only for the loss which his wrongful act has caused.
The full facts of this case are yet to be established, but if it is found that the claimant would not have been released had proper reviews been carried out, he will only be entitled to nominal damages: [55] [56], [74], [89].
Lord Brown (with whom Lord Rodger agrees) would have held that the failure to review the Appellants detention did not result in the Appellants detention being unlawful.
They hold that the policy did not confer upon the Appellant an entitlement to be released, but only an entitlement to be reviewed for release: [107].
Once properly detained, a detainee remains lawfully imprisoned unless and until released on bail or by the Secretary of States direction, or he establishes a substantive entitlement to release: [111].
Lumba does not compel the majoritys result, because it was concerned with a substantive entitlement under the policy and not a procedural one, and because it also held that not every breach of public law resulted in detention being unlawful: [116] [118].
