In [4]:
!pip install transformers==4.30
!pip install accelerate -U
!pip install sentencepiece
!pip install rouge
!pip install wandb onnx -Uq

Collecting transformers==4.30
  Downloading transformers-4.30.0-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m42.1 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers==4.30)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m85.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.15.0
    Uninstalling tokenizers-0.15.0:
      Successfully uninstalled tokenizers-0.15.0
  Attempting uninstall: transformers
    Found existing installation: transformers 4.35.2
    Uninstalling transformers-4.35.2:
      Successfully uninstalled transformers-4.35.2
Successfully installed tokenizers-0.13.3 transformers-4.30.0
Collecting accelerate
  Downloadin

In [5]:
!git clone https://github.com/Arjavjain100/TOS-Summarization.git

Cloning into 'TOS-Summarization'...
remote: Enumerating objects: 228, done.[K
remote: Counting objects: 100% (27/27), done.[K
remote: Compressing objects: 100% (25/25), done.[K
remote: Total 228 (delta 4), reused 8 (delta 2), pack-reused 201[K
Receiving objects: 100% (228/228), 772.27 KiB | 20.87 MiB/s, done.
Resolving deltas: 100% (104/104), done.


In [6]:
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments,pipeline,PretrainedConfig
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from sklearn.model_selection import train_test_split
from rouge import Rouge
import pandas as pd
import os
import wandb
import random
import numpy as np
import accelerate

os.environ["WANDB_PROJECT"]="major-one"
os.environ["WANDB_LOG_MODEL"]="checkpoint"
os.environ["WANDB_WATCH"]="all"



# Ensure deterministic behavior
torch.backends.cudnn.deterministic = True
random.seed(hash("setting random seeds") % 2**32 - 1)
np.random.seed(hash("improves reproducibility") % 2**32 - 1)
torch.manual_seed(hash("by removing stochasticity") % 2**32 - 1)
torch.cuda.manual_seed_all(hash("so runs are repeatable") % 2**32 - 1)

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Dataset location
filename = "./TOS-Summarization/Dataset/all_v1_transpose.csv"

In [7]:
!pip install wandb -U



In [8]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [9]:
df = pd.read_csv(filename)
df = df[['original_text','reference_summary']]
df.rename(columns = {'original_text':'source', 'reference_summary':'target'}, inplace = True)
len(df)

446

In [10]:
X = df['source']
y = df['target']

In [11]:
df.head()

Unnamed: 0,source,target
0,welcome to the pokémon go video game services ...,hi.
1,by using our services you are agreeing to thes...,by playing this game you agree to these terms....
2,if you want to use certain features of the ser...,you have to use google pokemon trainer club or...
3,during game play please be aware of your surro...,don t die or hurt others and if you do it s no...
4,subject to your compliance with these terms ni...,don t copy modify resell distribute or reverse...


In [12]:
class PegasusDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels['input_ids'][idx])  # torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels['input_ids'])

In [13]:
def prepare_data(model_name,
                 train_texts, train_labels,
                 test_texts, test_labels):
  """
  Prepare input data for model fine-tuning
  """

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  prepare_test = False if test_texts is None or test_labels is None else True

  def tokenize_data(texts, labels):

    encodings = tokenizer(texts, truncation=True, padding=True, max_length = 600)
    decodings = tokenizer(labels, truncation=True, padding=True, max_length = 256)
    dataset_tokenized = PegasusDataset(encodings, decodings)
    return dataset_tokenized

  train_dataset = tokenize_data(train_texts, train_labels)
  test_dataset = tokenize_data(test_texts, test_labels) if prepare_test else None

  return train_dataset, test_dataset, tokenizer

In [14]:
def prepare_fine_tuning(model_name, tokenizer, train_dataset, test_dataset, freeze_encoder=False, output_dir='./results'):
  """
  Prepare configurations and base model for fine-tuning
  """
  torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
  model = PegasusForConditionalGeneration.from_pretrained(model_name).to(torch_device)

  if test_dataset is not None:
    training_args = TrainingArguments(
      output_dir=output_dir,           # output directory
      num_train_epochs=2,              # total number of training epochs
      per_device_train_batch_size=1,   # batch size per device during training, can increase if memory allows
      per_device_eval_batch_size=1,    # batch size for evaluation, can increase if memory allows
      save_steps=500,                  # number of updates steps before checkpoint saves
      save_total_limit=5,              # limit the total amount of checkpoints and deletes the older checkpoints
      evaluation_strategy='steps',     # evaluation strategy to adopt during training
      eval_steps=100,                  # number of update steps before evaluation
      warmup_steps=500,                # number of warmup steps for learning rate scheduler
      weight_decay=0.01,               # strength of weight decay
      logging_dir='./logs',            # directory for storing logs
      logging_steps=100,
      report_to="wandb"
    )

    trainer = Trainer(
      model=model,                         # the instantiated 🤗 Transformers model to be trained
      args=training_args,                  # training arguments, defined above
      train_dataset=train_dataset,         # training dataset
      eval_dataset=test_dataset,           # evaluation dataset
      tokenizer=tokenizer
    )

  else:
    training_args = TrainingArguments(
      output_dir=output_dir,           # output directory
      num_train_epochs=2,              # total number of training epochs
      per_device_train_batch_size=1,   # batch size per device during training, can increase if memory allows
      save_steps=500,                  # number of updates steps before checkpoint saves
      save_total_limit=5,              # limit the total amount of checkpoints and deletes the older checkpoints
      warmup_steps=500,                # number of warmup steps for learning rate scheduler
      weight_decay=0.01,               # strength of weight decay
      logging_dir='./logs',            # directory for storing logs
      logging_steps=100,
    )

    trainer = Trainer(
      model=model,                         # the instantiated 🤗 Transformers model to be trained
      args=training_args,                  # training arguments, defined above
      train_dataset=train_dataset,         # training dataset
      tokenizer=tokenizer
    )

  return trainer

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

train_texts, train_labels = list(X_train), list(y_train)
test_texts, test_labels = list(X_test), list(y_test)

In [16]:
model_name = 'nsi319/legal-pegasus'

train_dataset,test_dataset, tokenizer = prepare_data(model_name, train_texts, train_labels,test_texts,test_labels)
trainer = prepare_fine_tuning(model_name, tokenizer, train_dataset,test_dataset)

trainer.train()

trainer.evaluate(test_dataset)

wandb.finish()

tokenizer_config.json:   0%|          | 0.00/1.51k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.33k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/1.34k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

[34m[1mwandb[0m: Currently logged in as: [33marnavkundalia[0m ([33mfaltu-team[0m). Use [1m`wandb login --relogin`[0m to force relogin


You're using a PegasusTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss,Validation Loss
100,10.5242,10.290859
200,9.1068,9.14348
300,8.4446,8.352876
400,6.9447,4.548946
500,2.8832,0.629687
600,0.8703,0.52146
700,0.7572,0.498111


[34m[1mwandb[0m: Adding directory to artifact (./results/checkpoint-500)... Done. 106.1s


VBox(children=(Label(value='8711.512 MB of 8711.512 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▇▇▄▁▁▁▁
eval/runtime,▁▇▇▇▇█▇█
eval/samples_per_second,█▂▂▂▂▁▁▁
eval/steps_per_second,█▂▂▂▂▁▁▁
train/epoch,▁▁▂▂▃▃▄▄▆▆▇▇████
train/global_step,▁▁▂▂▃▃▄▄▆▆▇▇████
train/learning_rate,▂▄▅▇█▅▁
train/loss,█▇▇▅▃▁▁
train/total_flos,▁
train/train_loss,▁

0,1
eval/loss,0.49778
eval/runtime,25.9096
eval/samples_per_second,3.474
eval/steps_per_second,3.474
train/epoch,2.0
train/global_step,712.0
train/learning_rate,0.0
train/loss,0.7572
train/total_flos,1205448435302400.0
train/train_loss,5.56618


In [17]:
import os
if not os.path.exists('./ouput_model/'):
    os.makedirs('./ouput_model/')
trainer.model.save_pretrained("./ouput_model/")

Inference

In [18]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [19]:
config = PretrainedConfig.from_json_file('./ouput_model/config.json')

In [20]:
model = PegasusForConditionalGeneration.from_pretrained("./ouput_model/", config = config).to(device)

In [39]:
def summarize(text):
  input_tokenized = tokenizer.encode(text, return_tensors='pt',max_length=1024,truncation=True).to(device)
  summary_ids = model.generate(input_tokenized,
                                  num_beams=9,
                                  no_repeat_ngram_size=3,
                                  length_penalty=2.0,
                                  min_length=50,
                                  max_length=150,
                                  early_stopping=True)
  summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0]

  return summary

In [43]:
def summarize(text):
    summary = ""
    # summary token lenght condition
    d_len = len(tokenizer.encode(text))
    print("text len: ",d_len)
    if(d_len > 1024):
      #print('inside if block')
    # Summaries of long documents
      start = 0
      window_size = 1024
      total_len = d_len
      loop = True

      while loop:
        #print('inside while loop')
        end = start + window_size
        if end >= total_len:
          loop = False
          end = total_len

        # (1) extract window from sample and tokenize it
        input_chuck_tokenizer = tokenizer.encode(text[start:end+1], return_tensors='pt',max_length=1024,truncation=True).to(device)
        # (2) summarize chunk
        with torch.no_grad():
          #print('torch no grad')
          summary_chunck = model.generate(input_chuck_tokenizer,
                                          num_beams=9,
                                          no_repeat_ngram_size=3,
                                          length_penalty=2.0,
                                          min_length=120,
                                          max_length=250,
                                          early_stopping=True)

        # (3) concatenate summaries
        summary = summary + "\n" + [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_chunck][0]
        start = end
    else:
    # single summary
        input_tokenized = tokenizer.encode(text, return_tensors='pt',max_length=1024,truncation=True).to(device)
        with torch.no_grad():
            summary_ids = model.generate(input_tokenized,
                                          num_beams=9,
                                          no_repeat_ngram_size=3,
                                          length_penalty=2.0,
                                          min_length=50,
                                          max_length=150,
                                          early_stopping=True)

            summary = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=False) for g in summary_ids][0]

    return summary

In [33]:
test = ''' Introduction
Thank you for using the YouTube products, services and features provided to you from the platform (collectively, the “Service”).
Our Service
The Service allows you to discover, watch and share videos and other content, provides a forum for people to connect, inform, and inspire others across the globe, and acts as a distribution platform for original content creators and advertisers large and small. We provide lots of information about our products and how to use them in our Help Center. Among other things, you can find out about YouTube Kids, the YouTube Partner Program and YouTube Paid Memberships and Purchases (where available).You can also read all about enjoying content on other devices like your television, your games console, or Google Home.
Your Service Provider
The entity providing the Service is Google LLC, a company operating under the laws of Delaware, located at 1600 Amphitheatre Parkway, Mountain View, CA 94043 (referred to as “YouTube”, “we”, “us”, or “our”). References to YouTube’s “Affiliates” in these terms means the other companies within the Alphabet Inc. corporate group (now or in the future).
Applicable Terms
Your use of the Service is subject to these terms, the YouTube Community Guidelines and the Policy, Safety and Copyright Policies which may be updated from time to time (together, this "Agreement"). Your Agreement with us will also include the Advertising on YouTube Policies if you provide advertising or sponsorships to the Service or incorporate paid promotions in your content. Any other links or references provided in these terms are for informational use only and are not part of the Agreement.
Please read this Agreement carefully and make sure you understand it. If you do not understand the Agreement, or do not accept any part of it, then you may not use the Service.
By accepting this Agreement, you affirm that you are 18 years of age or above and are fully competent to enter into this Agreement, and to abide by and comply with this Agreement. In case you are below 18 years of age, the restrictions and requirements below apply.
Who may use the Service?
Age Requirements
You must be at least 13 years old to use the Service; however, children of all ages may use the Service and YouTube Kids (where available) if enabled by a parent or legal guardian.
Permission by Parent or Guardian
If you are considered a minor in your country, you represent that you have your parent or guardian’s permission to use the Service. Please have them read this Agreement with you.
If you are a parent or legal guardian of a minor in your country, by allowing your child to use the Service, you are subject to the terms of this Agreement and responsible for your child’s activity on the Service. You can find tools and resources to help you manage your family’s experience on YouTube (including how to enable a child under the age of 13 to use the Service and YouTube Kids) in our Help Center and through Google’s Family Link.
Businesses
If you are using the Service on behalf of a company or organisation, you represent that you have authority to act on behalf of that entity, and that such entity accepts this Agreement.
Your Use of the Service
Content on the Service
The content on the Service includes videos, audio (for example music and other sounds), graphics, photos, text (such as comments and scripts), branding (including trade names, trademarks, service marks, or logos), interactive features, software, metrics, and other materials (collectively, "Content”).
Content is the responsibility of the person or entity that provides it to the Service. YouTube is under no obligation to host or serve Content. If you see any Content you believe does not comply with this Agreement, including by violating the Community Guidelines or the law, you can report it to us.
Google Accounts and YouTube Channels
You can use parts of the Service, such as browsing and searching for Content, without having a Google account. However, you do need a Google account to use some features. With a Google account, you may be able to like videos, subscribe to channels, create your own YouTube channel, and more. You can follow these instructions to create a Google account.
Creating a YouTube channel will give you access to additional features and functions, such as uploading videos, making comments or creating playlists (where available). Here are some details about how to create your own YouTube channel.
To protect your Google account, keep your password confidential. You should not reuse your Google account password on third-party applications. Learn more about keeping your Google account secure, including what to do if you learn of any unauthorised use of your password or Google account.
Your Information
Our Privacy Policy explains how we treat your personal data and protect your privacy when you use the Service. The YouTube Kids Privacy Notice provides additional information about our privacy practices that are specific to YouTube Kids.
We will process any audio or audiovisual content uploaded by you to the Service in accordance with the YouTube Data Processing  Terms, except in cases where you uploaded such content for personal purposes or household activities. Learn More.
Permissions and Restrictions
You may access and use the Service as made available to you, as long as you comply with this Agreement and applicable law. You may view or listen to Content for your personal, non-commercial use. You may also show YouTube videos through the embeddable YouTube player.
The following restrictions apply to your use of the Service. You are not allowed to:
access, reproduce, download, distribute, transmit, broadcast, display, sell, license, alter, modify or otherwise use any part of the Service or any Content except: (a) as expressly authorized by the Service; or (b) with prior written permission from YouTube and, if applicable, the respective rights holders;
circumvent, disable, fraudulently engage with, or otherwise interfere with any part of the Service (or attempt to do any of these things), including security-related features or features that (a) prevent or restrict the copying or other use of Content or (b) limit the use of the Service or Content;
access the Service using any automated means (such as robots, botnets or scrapers) except (a) in the case of public search engines, in accordance with YouTube’s robots.txt file; or (b) with YouTube’s prior written permission;
collect or harvest any information that might identify a person (for example, usernames or faces), unless permitted by that person or allowed under section (3) above;
use the Service to distribute unsolicited promotional or commercial content or other unwanted or mass solicitations;
cause or encourage any inaccurate measurements of genuine user engagement with the Service, including by paying people or providing them with incentives to increase a video’s views, likes, or dislikes, or to increase a channel’s subscribers, or otherwise manipulate metrics in any manner;
misuse any reporting, flagging, complaint, dispute, or appeals process, including by making groundless, vexatious, or frivolous submissions;
run contests on or through the Service that do not comply with YouTube’s contest policies and guidelines;
use the Service to view or listen to Content other than for personal, non-commercial use
(for example, you may not publicly screen videos or stream music from the Service); or
use the Service to (a) sell any advertising, sponsorships, or promotions placed on, around, or within the Service or Content, other than those allowed in the Advertising on YouTube policies (such as compliant product placements); or (b) sell advertising, sponsorships, or promotions on any page of any website or application that only contains Content from the Service or where Content from the Service is the primary basis for such sales (for example, selling ads on a webpage where YouTube videos are the main draw for users visiting the webpage).
Reservation
Using the Service does not give you ownership of or rights to any aspect of the Service, including user names or any other Content posted by others or YouTube).
Develop, Improve and Update the Service
YouTube is constantly changing and improving the Service. As part of this continual evolution, we may make modifications or changes (to all or part of the Service) such as adding or removing features and functionalities, offering new digital content or services or discontinuing old ones. We may also need to alter or discontinue the Service, or any part of it, in order to make performance or security improvements, make changes to comply with law, or prevent illegal activities on or abuse of our systems. These changes may affect all users, some users or even an individual user. When the Service requires or includes downloadable software (such as the YouTube Studio application), that software may update automatically on your device once a new version or feature is available, subject to your device settings. If we make material changes that negatively impact your use of the Service, we’ll provide you with reasonable advance notice, except in urgent situations such as preventing abuse, responding to legal requirements, or addressing security and operability issues. We’ll also provide you with an opportunity to export your content from your Google Account using Google Takeout, subject to applicable law and policies.
Your Content and Conduct
Uploading Content
If you have a YouTube channel, you may be able to upload Content to the Service. You may use your Content to promote your business or artistic enterprise. If you choose to upload Content, you must not submit to the Service any Content that does not comply with this Agreement (including the YouTube Community Guidelines) or the applicable law(s).
In particular, the Content must:
a. respect the rights of others, including privacy;
b. not include third-party intellectual property (such as copyrighted material) unless you have permission from that party or are otherwise legally entitled to do so;
c. not abuse or harm others or yourself (or threaten or encourage such abuse or harm), including against children;
d. not mislead, be patently false, or defrauding;
e. not illegally impersonate, defame, bully, harass, be obscene or stalk others;
f. not incite violation of applicable laws;
g. not abuse, harm, interfere with, or disrupt the services — for example, by accessing or using them in fraudulent or deceptive ways, introducing malware, or spamming, hacking, or bypassing our systems or protective measures.
You are legally responsible for the Content you submit to the Service. We may use automated systems that analyze your Content to help detect infringement and abuse, including spam and malware.
Rights you Grant
You retain ownership rights in your Content. However, we do require you to grant certain rights to YouTube and other users of the Service, as described below.
License to YouTube
By providing Content to the Service, you grant to YouTube a worldwide, non-exclusive, royalty-free, transferable, sublicensable license to use that Content (including to reproduce, distribute, prepare derivative works, display and perform it). YouTube may only use that Content in connection with the Service and YouTube’s (and its successors’ and Affiliates) business, including for the purpose of promoting and redistributing part or all of the Service.
License to Other Users
You also grant each other user of the Service a worldwide, non-exclusive, royalty-free license to access your Content through the Service, and to use that Content, including to reproduce, distribute, prepare derivative works, display and perform it, only as enabled by a feature of the Service (such as video playback or embeds). For clarity, this license does not grant any rights or permissions for a user to make use of your Content independent of the Service.
Duration of License
The licenses granted by you continue for a commercially reasonable period of time after you remove or delete your Content from the Service. You understand and agree, however, that YouTube may retain, but not display, distribute, or perform, server copies of your videos that have been removed or deleted.
Right to Monetize
You grant to YouTube the right to monetize your Content on the Service (and such monetization may include displaying ads on or within Content or charging users a fee for access). This Agreement does not entitle you to any payments. Starting June 1, 2021, any payments you may be entitled to receive from YouTube under any other agreement between you and YouTube (including for example payments under the YouTube Partner Program, Channel memberships or Super Chat) will be treated as royalties.  If required by law, Google will withhold taxes from such payments.
Removing Your Content
You may remove your Content from the Service at any time. You also have the option to make a copy of your Content before removing it. You must remove your Content if you no longer have the rights required by these terms.
Removal of Content By YouTube
If we reasonably believe that any of your Content (1) is in breach of this Agreement or (2) may cause harm to YouTube, our users, or third parties, we reserve the right to remove or take down that Content in accordance with applicable law. We will notify you with the reason for our action unless we reasonably believe that to do so: (a) would breach the law or the direction of a legal enforcement authority or would otherwise risk legal liability for YouTube or our Affiliates; (b) would compromise an investigation or the integrity or operation of the Service; or (c) would cause harm to any user, other third party, YouTube or our Affiliates. You can learn more about reporting and enforcement, including how to appeal on the Troubleshooting page of our Help Center.
Community Guidelines Strikes
YouTube operates a system of “strikes” in respect of Content that violates the YouTube Community Guidelines. Each strike comes with varying restrictions and may result in the permanent removal of your channel from YouTube.  A full description of how a strike affects your channel is available on the Community Guidelines Strikes Basics page. If you believe that a strike has been issued in error, you may appeal here.
If your channel has been restricted due to a strike, you must not use another channel to circumvent these restrictions. Violation of this prohibition is a material breach of this Agreement and Google reserves the right to terminate your Google account or your access to all or part of the Service.
Copyright Protection
We provide information to help copyright holders manage their intellectual property online in our YouTube Copyright Center. If you believe your copyright has been infringed on the Service, please send us a notice.
We respond to notices of alleged copyright infringement according to the process in our YouTube Copyright Center, where you can also find information about how to resolve a copyright strike. YouTube's policies provide for the termination, in appropriate circumstances, of repeat infringers’ access to the Service.
Account Suspension & Termination
Terminations by You
You may stop using the Service at any time. Follow these instructions to delete the Service from your Google Account, which involves closing your YouTube channel and removing your data. You also have the option to download a copy of your data first.
Terminations and Suspensions by YouTube
YouTube reserves the right to suspend or terminate  your Google account or your access to all or part of the Service if: (a) you materially or repeatedly breach this Agreement; (b) we are required to do so to comply with a legal requirement or a court order; or (c) we believe there has been conduct that creates (or could create) liability or harm to any user, other third party, YouTube or our Affiliates.
Notice for Termination or Suspension
We will notify you with the reason for termination or suspension by YouTube unless we reasonably believe that to do so: (a) would violate the law or the direction of a legal enforcement authority; (b) would compromise an investigation; (c) would compromise the integrity, operation or security of the Service; or (d) would cause harm to any user, other third party, YouTube or our Affiliates. Where YouTube is terminating your use for Service changes, where reasonably possible, you will be provided with sufficient time to export your Content from the Service.
Effect of Account Suspension or Termination
If your Google account is terminated or your access to the Service is restricted, you may continue using certain aspects of the Service (such as viewing only) without an account, and this Agreement will continue to apply to such use. If you believe that the termination or suspension of your Google account has been made in error, you can appeal using this form.
About Software in the Service
Downloadable Software
When the Service requires or includes downloadable software (such as the YouTube Studio application), unless that software is governed by additional terms which provide a license, YouTube gives you a personal, worldwide, royalty-free, non-assignable and non-exclusive license to use the software provided to you by YouTube as part of the Service. This license is for the sole purpose of enabling you to use and enjoy the benefit of the Service as provided by YouTube, in the manner permitted by this Agreement. You are not allowed to copy, modify, distribute, sell, sublicense or lease any part of the software, unless you have YouTube’s written permission.
Open Source
Some software used in our Service may be offered under an open source, royalty-free license that we provide to you. There may be provisions in an open source license that expressly override some of these terms, so please be sure to read those licenses.
Other Legal Terms
Warranty Disclaimer
OTHER THAN AS EXPRESSLY STATED IN THIS AGREEMENT OR AS REQUIRED BY LAW, THE SERVICE IS PROVIDED “AS IS” AND YOUTUBE DOES NOT MAKE ANY SPECIFIC COMMITMENTS OR WARRANTIES ABOUT THE SERVICE. FOR EXAMPLE, WE DON’T MAKE ANY WARRANTIES ABOUT: (A) THE CONTENT PROVIDED THROUGH THE SERVICE; (B) THE SPECIFIC FEATURES OF THE SERVICE, OR ITS ACCURACY, RELIABILITY, AVAILABILITY, OR ABILITY TO MEET YOUR NEEDS; OR (C) THAT ANY CONTENT YOU SUBMIT WILL BE ACCESSIBLE ON THE SERVICE.
Limitation of Liability
EXCEPT AS REQUIRED BY APPLICABLE LAW, YOUTUBE, ITS AFFILIATES, OFFICERS, DIRECTORS, EMPLOYEES AND AGENTS WILL NOT BE RESPONSIBLE FOR ANY LOSS OF PROFITS, REVENUES, BUSINESS OPPORTUNITIES, GOODWILL, OR ANTICIPATED SAVINGS; LOSS OR CORRUPTION OF DATA; INDIRECT OR CONSEQUENTIAL LOSS; PUNITIVE DAMAGES CAUSED BY:
ERRORS, MISTAKES, OR INACCURACIES ON THE SERVICE;
PERSONAL INJURY OR PROPERTY DAMAGE RESULTING FROM YOUR USE OF THE SERVICE;
ANY UNAUTHORIZED USE OF THE SERVICE;
ANY INTERRUPTION OR CESSATION OF THE SERVICE;
ANY VIRUSES OR MALICIOUS CODE TRANSMITTED TO OR THROUGH THE SERVICE BY ANY THIRD PARTY;
ANY CONTENT WHETHER SUBMITTED BY A USER OR YOUTUBE, INCLUDING YOUR USE OF CONTENT; AND/OR
THE REMOVAL OR UNAVAILABILITY OF ANY CONTENT.
THIS PROVISION APPLIES TO ANY CLAIM, REGARDLESS OF WHETHER THE CLAIM ASSERTED IS BASED ON WARRANTY, CONTRACT, TORT, OR ANY OTHER LEGAL THEORY.
TO THE EXTENT PERMITTED BY APPLICABLE LAW, YOUTUBE AND ITS AFFILIATES’ TOTAL LIABILITY FOR ANY CLAIMS ARISING FROM OR RELATING TO THE SERVICE IS LIMITED TO THE GREATER OF: (A) THE AMOUNT OF REVENUE THAT YOUTUBE HAS PAID TO YOU FROM YOUR USE OF THE SERVICE IN THE 12 MONTHS BEFORE THE DATE OF YOUR NOTICE, IN WRITING TO YOUTUBE, OF THE CLAIM AND (B) USD $500, WHICHEVER IS HIGHER.
Indemnity
To the extent permitted by applicable law, you agree to defend, indemnify and hold harmless YouTube, its Affiliates, officers, directors, employees and agents, from and against any and all claims, damages, obligations, losses, liabilities, costs or debt, and expenses (including but not limited to attorney's fees) arising from: (i) your use of he Service; (ii) your violation of any term of these Terms of Service; (iii) your violation of any third party right, including without limitation any copyright, property, or privacy right; or (iv) any claim that your Content caused damage to a third party. This defense and indemnification obligation will survive this Agreement and your use of the Service.
Third-Party Links
The Service may contain links to third-party websites and online services that are not owned or controlled by YouTube. YouTube has no control over, and assumes no responsibility for, such websites and online services. Be aware when you leave the Service; we suggest you read the terms and privacy policy of each third-party website and online service that you visit.
About this Agreement
Changing this Agreement
We may change this Agreement, (1) to reflect changes to our Service or how we do business - for example, when we add new products or features or remove old ones, (2) for legal, regulatory, or security reasons, or (3) to prevent abuse or harm .
If we materially change this Agreement, we’ll provide you with reasonable advance notice and the opportunity to review the changes, except (1) when we launch a new product or feature, or (2) in urgent situations, such as preventing ongoing abuse or responding to legal requirements. If you don’t agree to the new terms, you should remove any Content you uploaded and stop using the Service.
Continuation of this Agreement
If your use of the Service ends, the following terms of this Agreement will continue to apply to you: “Other Legal Terms”, “About This Agreement”, and the licenses granted by you will continue as described under “Duration of License”.
Severance
If it turns out that a particular term of this Agreement is not enforceable for any reason, this will not affect any other terms.
No Waiver
If you fail to comply with this Agreement and we do not take immediate action, this does not mean that we are giving up any rights that we may have (such as the right to take action in the future).
Interpretation
In these terms, “include” or “including” means “including but not limited to,” and any examples we give are for illustrative purposes.
Governing Law
California law will govern all disputes arising out of or relating to these terms or the Service, regardless of conflict of laws rules. These disputes will be resolved exclusively in the federal or state courts of Santa Clara County, California, USA, and you and YouTube consent to personal jurisdiction in those courts.
If applicable local law prevents these disputes from being resolved in a California court, then you can file these disputes in your local courts. Likewise, if applicable local law prevents your local court from applying California law to resolve these disputes, then these disputes will be governed by the laws of your country of residence.




An "Account" represents your legal relationship with GitHub. A “Personal Account” represents an individual User’s authorization to log in to and use the Service and serves as a User’s identity on GitHub. “Organizations” are shared workspaces that may be associated with a single entity or with one or more Users where multiple Users can collaborate across many projects at once. A Personal Account can be a member of any number of Organizations.
The “Agreement” refers, collectively, to all the terms, conditions, notices contained or referenced in this document (the “Terms of Service” or the "Terms") and all other operating rules, policies (including the GitHub Privacy Statement, available at github.com/site/privacy) and procedures that we may publish from time to time on the Website. Most of our site policies are available at docs.github.com/categories/site-policy.
"Beta Previews" mean software, services, or features identified as alpha, beta, preview, early access, or evaluation, or words or phrases with similar meanings.
“Content” refers to content featured or displayed through the Website, including without limitation code, text, data, articles, images, photographs, graphics, software, applications, packages, designs, features, and other materials that are available on the Website or otherwise available through the Service. "Content" also includes Services. “User-Generated Content” is Content, written or otherwise, created or uploaded by our Users. "Your Content" is Content that you create or own.
“GitHub,” “We,” and “Us” refer to GitHub, Inc., as well as our affiliates, directors, subsidiaries, contractors, licensors, officers, agents, and employees.
The “Service” refers to the applications, software, products, and services provided by GitHub, including any Beta Previews.
“The User,” “You,” and “Your” refer to the individual person, company, or organization that has visited or is using the Website or Service; that accesses or uses any part of the Account; or that directs the use of the Account in the performance of its functions. A User must be at least 13 years of age. Special terms may apply for business or government Accounts (See Section B(5): Additional Terms).
The “Website” refers to GitHub’s website located at github.com, and all content, services, and products provided by GitHub at or through the Website. It also refers to GitHub-owned subdomains of github.com, such as education.github.com and pages.github.com. These Terms also govern GitHub’s conference websites, such as githubuniverse.com, and product websites, such as electronjs.org. Occasionally, websites owned by GitHub may provide different or additional terms of service. If those additional terms conflict with this Agreement, the more specific terms apply to the relevant page or service.


1. Account Controls
Users. Subject to these Terms, you retain ultimate administrative control over your Personal Account and the Content within it.

Organizations. The "owner" of an Organization that was created under these Terms has ultimate administrative control over that Organization and the Content within it. Within the Service, an owner can manage User access to the Organization’s data and projects. An Organization may have multiple owners, but there must be at least one Personal Account designated as an owner of an Organization. If you are the owner of an Organization under these Terms, we consider you responsible for the actions that are performed on or through that Organization.

2. Required Information
You must provide a valid email address in order to complete the signup process. Any other information requested, such as your real name, is optional, unless you are accepting these terms on behalf of a legal entity (in which case we need more information about the legal entity) or if you opt for a paid Account, in which case additional information will be necessary for billing purposes.

3. Account Requirements
We have a few simple rules for Personal Accounts on GitHub's Service.

You must be a human to create an Account. Accounts registered by "bots" or other automated methods are not permitted. We do permit machine accounts:
A machine account is an Account set up by an individual human who accepts the Terms on behalf of the Account, provides a valid email address, and is responsible for its actions. A machine account is used exclusively for performing automated tasks. Multiple users may direct the actions of a machine account, but the owner of the Account is ultimately responsible for the machine's actions. You may maintain no more than one free machine account in addition to your free Personal Account.
One person or legal entity may maintain no more than one free Account (if you choose to control a machine account as well, that's fine, but it can only be used for running a machine).
You must be age 13 or older. While we are thrilled to see brilliant young coders get excited by learning to program, we must comply with United States law. GitHub does not target our Service to children under 13, and we do not permit any Users under 13 on our Service. If we learn of any User under the age of 13, we will terminate that User’s Account immediately. If you are a resident of a country outside the United States, your country’s minimum age may be older; in such a case, you are responsible for complying with your country’s laws.
Your login may only be used by one person — i.e., a single login may not be shared by multiple people. A paid Organization may only provide access to as many Personal Accounts as your subscription allows.
You may not use GitHub in violation of export control or sanctions laws of the United States or any other applicable jurisdiction. You may not use GitHub if you are or are working on behalf of a Specially Designated National (SDN) or a person subject to similar blocking or denied party prohibitions administered by a U.S. government agency. GitHub may allow persons in certain sanctioned countries or territories to access certain GitHub services pursuant to U.S. government authorizations. For more information, please see our Export Controls policy.
4. Account Security
You are responsible for keeping your Account secure while you use our Service. We offer tools such as two-factor authentication to help you maintain your Account's security, but the content of your Account and its security are up to you.

You are responsible for all content posted and activity that occurs under your Account (even when content is posted by others who have Accounts under your Account).
You are responsible for maintaining the security of your Account and password. GitHub cannot and will not be liable for any loss or damage from your failure to comply with this security obligation.
You will promptly notify GitHub by contacting us through the GitHub Support portal if you become aware of any unauthorized use of, or access to, our Service through your Account, including any unauthorized use of your password or Account.
5. Additional Terms
In some situations, third parties' terms may apply to your use of GitHub. For example, you may be a member of an organization on GitHub with its own terms or license agreements; you may download an application that integrates with GitHub; or you may use GitHub to authenticate to another service. Please be aware that while these Terms are our full agreement with you, other parties' terms govern their relationships with you.

If you are a government User or otherwise accessing or using any GitHub Service in a government capacity, this Government Amendment to GitHub Terms of Service applies to you, and you agree to its provisions.

If you have signed up for GitHub Enterprise Cloud, the Enterprise Cloud Addendum applies to you, and you agree to its provisions.

Your use of the Website and Service must not violate any applicable laws, including copyright or trademark laws, export control or sanctions laws, or other laws in your jurisdiction. You are responsible for making sure that your use of the Service is in compliance with laws and any applicable regulations.

You agree that you will not under any circumstances violate our Acceptable Use Policies or Community Guidelines.

'''

In [44]:
import time
# Record the start time
start_time = time.time()
prediction = summarize(test)
# Record the end time
end_time = time.time()

# Calculate the execution time
execution_time = (end_time - start_time)

print("Execution Time: ", execution_time, "seconds")

text len:  6114
Execution Time:  20.810827255249023 seconds


In [45]:
prediction

'\nThe service provides a forum for people to connect, inform, and inspire others across the globe and acts as a distribution platform for original content creators and advertisers large and small. We provide lots of information about our products and how to use them in our Help Center. Among other things, you can find out about YouTube Kids, the YouTube Partner Program and YouTube Paid Memberships and Purchases (where available). You can also read all about enjoying content on other devices like your television, your games console, or Google Home. The service is provided by Google LLC, a company operating under the laws of Delaware, located at 1600 Amphitheatre Parkway, Mountain View, CA 94043.\nyour use of the service is subject to the terms of the YouTube Community Guidelines and the Policy, Safety and Copyright Policies which may be updated from time to time (together, this "Agreement"). Your Agreement with us will also include the Advertising on YouTube Policies if you provide adv

In [22]:
y_pred = X_test.apply(lambda x: summarize(x))

In [23]:
summary = pd.concat([y_test.to_frame(name="reference_summary"), y_pred.to_frame(name="generated_summary")], axis=1)

In [24]:
rouge = Rouge()

In [46]:
rouge.get_scores(test, prediction,avg=True)

{'rouge-1': {'r': 0.9375, 'p': 0.18, 'f': 0.302013420116211},
 'rouge-2': {'r': 0.7656529516994633,
  'p': 0.11428571428571428,
  'f': 0.19888475610410067},
 'rouge-l': {'r': 0.9201388888888888,
  'p': 0.17666666666666667,
  'f': 0.2964205789529}}