In [1]:
# display_architecture=True

In [2]:
import torch
import json
import time
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config

model = T5ForConditionalGeneration.from_pretrained('t5-large')
tokenizer = T5Tokenizer.from_pretrained('t5-large')
device = torch.device('cpu')

Downloading:   0%|          | 0.00/792k [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.
- Be aware that you SHOULD NOT rely on t5-large automatically truncating your input to 512 when padding/encoding.
- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.


In [3]:
start_time = time.time()

In [4]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using device: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("No GPU available, using CPU instead.")

No GPU available, using CPU instead.


In [5]:
# if display_architecture==True:
#  print(model.config)

In [6]:
# if(display_architecture==True):
#   print(model)

In [7]:
# if display_architecture==True:
#   print(model.encoder)

In [8]:
# if display_architecture==True:
#   print(model.decoder)

In [9]:
# if display_architecture==True:
#   print(model.forward)

In [10]:
def summarize(input_text,min_len):
    
    clean_text = input_text.strip().replace("\n","")
    t5_cleaned_text = "summarize: "+clean_text
    # calculate the number of character for input rule/clause
    print('\033[1m' + 'Input rule/caluse for text summarization:' + '\033[0m')
    print (clean_text)
    print ("\n")
    print('\033[1m' + 'Number of characters in the input rule/clause:' + '\033[0m')
    print (len(input_text)) 
  
    tokenized_input_text = tokenizer.encode(t5_cleaned_text, return_tensors="pt").to(device)

    # summmarize 
    summary_ids = model.generate(tokenized_input_text,
                                    num_beams=4,
                                    no_repeat_ngram_size=2,
                                    min_length=30,
                                    max_length=min_len,
                                    early_stopping=True)

    output_text = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return output_text

In [11]:
# input the text for summarization
input_text="""
2.1.3.2 the rules must provide for: a) the full range of foreseeable
failure conditions, from simple to complex, and their consequences for
the functionality of the system as a whole and its ability to maintain
a safe interval between trains b) establishing a clear understanding
between signallers and train drivers about movements to be made and
precautions to be taken during the movement to reduce the likelihood
of collision c) protocols for spoken communications between signaller
and train drivers and any intermediaries involved in the passing of
information.
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.1.3.2 the rules must provide for: a) the full range of foreseeablefailure conditions, from simple to complex, and their consequences forthe functionality of the system as a whole and its ability to maintaina safe interval between trains b) establishing a clear understandingbetween signallers and train drivers about movements to be made andprecautions to be taken during the movement to reduce the likelihoodof collision c) protocols for spoken communications between signallerand train drivers and any intermediaries involved in the passing ofinformation.


[1mNumber of characters in the input rule/clause:[0m
569
[1m

Summarized rules/clauses: 
[0m
2.1.3.2 the rules must provide for: a) the full range of foreseeablefailure conditions, from simple to complex, and their consequences. c) protocols for spoken communications between signallers and train drivers
[1m
 Number of characters in the summarized rules/clauses:[0m
211


In [12]:
end_time = time.time()

# calculate the total time taken to execute the code on the CPU
total_time = end_time - start_time

print(f"Total time taken to execute the code on CPU: {total_time:.6f} seconds")

Total time taken to execute the code on CPU: 10.015003 seconds


In [13]:
# input the text for summarization
input_text="""2.4.2.4 Operating rules must specify temporary controls 
to be applied to train movements when: a) a visual examination of the 
site has not revealed a hazard, following a report of an infrastructure
defect b) a hazard has been assessed as presenting a sufficiently low 
risk to the safety of trains to permit movements to resume, but work 
is required to eliminate the hazard  c) the hazard has been found to 
be a malfunction of signalling equipment (requirements for authorising
train movements when the signalling system cannot be used because of 
defects are described in section 2.2 of this operational concept document)
d) a line is affected by floodwater or snow e) there are exceptional 
rail head conditions.
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.4.2.4 Operating rules must specify temporary controls to be applied to train movements when: a) a visual examination of the site has not revealed a hazard, following a report of an infrastructuredefect b) a hazard has been assessed as presenting a sufficiently low risk to the safety of trains to permit movements to resume, but work is required to eliminate the hazard  c) the hazard has been found to be a malfunction of signalling equipment (requirements for authorisingtrain movements when the signalling system cannot be used because of defects are described in section 2.2 of this operational concept document)d) a line is affected by floodwater or snow e) there are exceptional rail head conditions.


[1mNumber of characters in the input rule/clause:[0m
719
[1m

Summarized rules/clauses: 
[0m
temporary controls must be applied to train movements when: a visual examination of the site has not revealed an infrastructuredefect. hazard 

In [14]:
# input the text for summarization
input_text=""" 2.5.3.1 General requirements for the loading and preparation
of freight trains must be defined in operating rules.  Operating instructions,
appropriate to the type of traffic and wagons they deal with, must be provided
to people responsible for the loading and preparation of freight trains 
covering: a) securing a load to prevent it from moving around or falling 
from a wagon during transit b) distributing the load evenly on each wagon 
c) accessing or identifying and recording information about the wagon and 
its load required for safe train formation and movement   d) ensuring the 
train has sufficient braking capacity e) identifying and labelling dangerous
goods f) the separation of incompatible dangerous goods g) obtaining confirmation
that wagons containing dangerous goods have been securely closed  h) safety
checks and documentation prior to departure i) the dispatch of freight trains.
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.5.3.1 General requirements for the loading and preparationof freight trains must be defined in operating rules.  Operating instructions,appropriate to the type of traffic and wagons they deal with, must be providedto people responsible for the loading and preparation of freight trains covering: a) securing a load to prevent it from moving around or falling from a wagon during transit b) distributing the load evenly on each wagon c) accessing or identifying and recording information about the wagon and its load required for safe train formation and movement   d) ensuring the train has sufficient braking capacity e) identifying and labelling dangerousgoods f) the separation of incompatible dangerous goods g) obtaining confirmationthat wagons containing dangerous goods have been securely closed  h) safetychecks and documentation prior to departure i) the dispatch of freight trains.


[1mNumber of characters in the input rule/clause:[0m

In [15]:
# input the text for summarization
input_text=""" 2.5.3.6 Freight train drivers must be provided with the 
following information before starting a journey. a) The formation of their 
train, including its brake force, weight and length. b) The lowest maximum 
permitted speed of any vehicle in the train. c) The route availability of 
the vehicles in the train. d) Details of any dangerous goods being conveyed.
e) Special conditions applicable to the movement of any load or vehicle 
being conveyed. f) Vehicles in the train which have defects requiring a 
reduction in speed of the train. g) Vehicles in the train without operative
automatic brakes.  The information must be updated when the formation is 
changed or the status of vehicles is changed (for example, loaded to empty).
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.5.3.6 Freight train drivers must be provided with the following information before starting a journey. a) The formation of their train, including its brake force, weight and length. b) The lowest maximum permitted speed of any vehicle in the train. c) The route availability of the vehicles in the train. d) Details of any dangerous goods being conveyed.e) Special conditions applicable to the movement of any load or vehicle being conveyed. f) Vehicles in the train which have defects requiring a reduction in speed of the train. g) Vehicles in the train without operativeautomatic brakes.  The information must be updated when the formation is changed or the status of vehicles is changed (for example, loaded to empty).


[1mNumber of characters in the input rule/clause:[0m
735
[1m

Summarized rules/clauses: 
[0m
2.5.3.6 Freight train drivers must be provided with the following information before starting a journey. formation of their tr

In [16]:
# input the text for summarization
input_text=""" 2.9.2.3 Operating rules for work on the infrastructure of 
electrified lines, including the electrification equipment, must: a) 
define the responsibilities for planning, establishing and maintaining 
a safe system of work, including control of the electric current supply 
and obtaining an isolation b) define the procedure for obtaining an 
isolation of electrical equipment and securing the isolated section 
against accidental charging with electricity c) specify any distance 
from electrical equipment required to assess whether work can be carried
out safely without an isolation d) identify and describe the safety 
equipment required to protect workers from electric shock, the type 
of work which they can do safely and limitations on the tools, plant 
and equipment which they can use without obtaining an isolation. 
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.9.2.3 Operating rules for work on the infrastructure of electrified lines, including the electrification equipment, must: a) define the responsibilities for planning, establishing and maintaining a safe system of work, including control of the electric current supply and obtaining an isolation b) define the procedure for obtaining an isolation of electrical equipment and securing the isolated section against accidental charging with electricity c) specify any distance from electrical equipment required to assess whether work can be carriedout safely without an isolation d) identify and describe the safety equipment required to protect workers from electric shock, the type of work which they can do safely and limitations on the tools, plant and equipment which they can use without obtaining an isolation.


[1mNumber of characters in the input rule/clause:[0m
830
[1m

Summarized rules/clauses: 
[0m
2.9.2.3 Operating rules for work o

In [17]:
# input the text for summarization
input_text=""" 2.9.2.1 Differences between electrification 
systems, their technical and physical characteristics and 
impact on workforce safety, must be addressed in system-specific instructions.
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.9.2.1 Differences between electrification systems, their technical and physical characteristics and impact on workforce safety, must be addressed in system-specific instructions.


[1mNumber of characters in the input rule/clause:[0m
184
[1m

Summarized rules/clauses: 
[0m
differences between electrification systems, their technical and physical characteristics, and impact on workforce safety, must be addressed in system-specific instructions.
[1m
 Number of characters in the summarized rules/clauses:[0m
173


In [18]:
# input the text for summarization
input_text=""" 2.8.10.1 Operating rules must outline the 
actions to be taken by station and train operating staff, 
to prevent people using stations from being injured by 
moving trains or during boarding and alighting.  
"""

# summarize the output
summarized_output=summarize(input_text,50)

# printing the summarize the output
print('\033[1m' + '\n\nSummarized rules/clauses: \n' + '\033[0m')
print (summarized_output)

# calculate the number of character for input rule/clause
print('\033[1m' + '\n Number of characters in the summarized rules/clauses:' + '\033[0m')
print(len(summarized_output))

[1mInput rule/caluse for text summarization:[0m
2.8.10.1 Operating rules must outline the actions to be taken by station and train operating staff, to prevent people using stations from being injured by moving trains or during boarding and alighting.


[1mNumber of characters in the input rule/clause:[0m
209
[1m

Summarized rules/clauses: 
[0m
2.8.10.1 operating rules must outline the actions taken by station and train operating staff, to prevent people using stations from being injured by moving trains or during boarding and alighting.
[1m
 Number of characters in the summarized rules/clauses:[0m
196
