In [None]:
!pip install openai



In [None]:
!pip install transformers
!pip install datasets
!pip install torch
!pip install peft


In [None]:
from datasets import load_dataset
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch.nn as nn
import torch


# Load the XSum dataset
data_files = {
    "train": "XsumTrain.csv",
    "test": "XsumTest.csv",
    "validation": "XsumValidation.csv"
}
dataset = load_dataset('csv', data_files=data_files)

# Load tokenizer and model
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

def preprocess_function(examples):
    inputs = examples['document']
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    # Setup the tokenizer for targets
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples['summary'], max_length=128, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = dataset.map(preprocess_function, batched=True)


In [None]:
def apply_lora_and_peft(model, rank=32, lora_alpha=32, lora_dropout=0.1, adapter_size=64):
    for name, module in model.named_modules():
        if isinstance(module, nn.Linear):
            in_features = module.in_features
            out_features = module.out_features

            # LoRA
            lora_A = nn.Parameter(torch.Tensor(rank, in_features).uniform_(-0.1, 0.1))
            lora_B = nn.Parameter(torch.Tensor(out_features, rank).uniform_(-0.1, 0.1))
            module.lora_A = lora_A
            module.lora_B = lora_B

            original_forward = module.forward

            def lora_forward(input):
                return original_forward(input) + (module.lora_B @ module.lora_A @ input)

            module.forward = lora_forward

            # PEFT
            down = nn.Linear(in_features, adapter_size)
            up = nn.Linear(adapter_size, out_features)
            relu = nn.ReLU()

            original_forward = module.forward

            def adapter_forward(input):
                return original_forward(input) + up(relu(down(input)))

            module.forward = adapter_forward

apply_lora_and_peft(model)


In [None]:
from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    save_steps=10_000,
    fp16=True,
    gradient_accumulation_steps=8,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"]
)

# Train the model
trainer.train()


In [None]:
import time

def generate_summary_with_openai(article_text):
    prompt = f"Summarize the following article:\n\n{article_text}\n\nSummary:"

    response = openai.Completion.create(
        engine="text-davinci-003",  # or use "gpt-3.5-turbo" for more recent models
        prompt=prompt,
        max_tokens=100,  # Adjust the max tokens as needed
        temperature=0.1,  # Adjust the temperature for creativity
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0
    )

    summary = response.choices[0].text.strip()
    return summary

# Create an empty column 'model_generated' in df to store the generated summaries
df['model_generated'] = ""

# Generate summaries and store them in the 'model_generated' column
for idx, row in df.iterrows():
    article_text = row['document']
    summary = generate_summary_with_openai(article_text)
    df.at[idx, 'model_generated'] = summary
    time.sleep(1)  # To avoid hitting API rate limits

# Display the first 25 generated summaries
print(df[['document', 'model_generated']].head(25))


In [None]:
import openai

# Replace 'your_api_key' with your actual API key
api_key = 'sk-proj-7c2PVoOmKXkGcSMMyZ9AT3BlbkFJN17FfVDMnrQQnQ7Bo8fP'
openai.api_key = api_key

In [None]:
!pip install datasets
from datasets import load_dataset

# Load the XSum dataset
dataset = load_dataset('xsum')

# Access the splits, e.g., 'test'
test_dataset = dataset['test']

# If you want to convert it to a pandas DataFrame, you can do so like this:
test_df = test_dataset.to_pandas()



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [None]:
test_df.head()

Unnamed: 0,document,summary,id
0,"Prison Link Cymru had 1,099 referrals in 2015-...","There is a ""chronic"" need for more housing for...",38264402
1,Officers searched properties in the Waterfront...,"A man has appeared in court after firearms, am...",34227252
2,"Jordan Hill, Brittany Covington and Tesfaye Co...",Four people accused of kidnapping and torturin...,38537698
3,The 48-year-old former Arsenal goalkeeper play...,West Brom have appointed Nicky Hammond as tech...,36175342
4,Restoring the function of the organ - which he...,The pancreas can be triggered to regenerate it...,39070183


In [None]:
test_df.drop(columns=['id'], inplace=True)

In [None]:
test_df.head()

Unnamed: 0,document,summary
0,"Prison Link Cymru had 1,099 referrals in 2015-...","There is a ""chronic"" need for more housing for..."
1,Officers searched properties in the Waterfront...,"A man has appeared in court after firearms, am..."
2,"Jordan Hill, Brittany Covington and Tesfaye Co...",Four people accused of kidnapping and torturin...
3,The 48-year-old former Arsenal goalkeeper play...,West Brom have appointed Nicky Hammond as tech...
4,Restoring the function of the organ - which he...,The pancreas can be triggered to regenerate it...


In [None]:
test_df['document'][0]

'Prison Link Cymru had 1,099 referrals in 2015-16 and said some ex-offenders were living rough for up to a year before finding suitable accommodation.\nWorkers at the charity claim investment in housing would be cheaper than jailing homeless repeat offenders.\nThe Welsh Government said more people than ever were getting help to address housing problems.\nChanges to the Housing Act in Wales, introduced in 2015, removed the right for prison leavers to be given priority for accommodation.\nPrison Link Cymru, which helps people find accommodation after their release, said things were generally good for women because issues such as children or domestic violence were now considered.\nHowever, the same could not be said for men, the charity said, because issues which often affect them, such as post traumatic stress disorder or drug dependency, were often viewed as less of a priority.\nAndrew Stevens, who works in Welsh prisons trying to secure housing for prison leavers, said the need for acc

In [None]:
test_df['summary'][0]

'There is a "chronic" need for more housing for prison leavers in Wales, according to a charity.'

In [None]:
!pip install openai==0.28





In [None]:
import time

import openai

def generate_summary_with_openai(article_text):
    # Define the prompt or question for generating the summary
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": f"Summarize this article:\n{article_text}"}
    ]

    # Generate text using the 'gpt-3.5-turbo' model via the chat completions API
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=messages,
        max_tokens=100,
        temperature=0.1
    )

    # Extract and return the generated summary
    summary = response['choices'][0]['message']['content']
    return summary.strip()

# Sample usage with hypothetical 'test_df' and 'document' column
test_df['model_generated'] = ""

batch_size = 3
delay_seconds = 60

for batch_start in range(0, 25, batch_size):
    batch_end = min(batch_start + batch_size, 25)
    articles_to_process = test_df['document'][batch_start:batch_end]

    generated_summaries = []
    for article_text in articles_to_process:
        summary = generate_summary_with_openai(article_text)
        generated_summaries.append(summary)

    test_df.loc[batch_start:batch_end-1, 'model_generated'] = generated_summaries

    if batch_end < 25:
        print(f"Generated summaries for articles {batch_start+1}-{batch_end}. Waiting for {delay_seconds} seconds before the next batch...")
        time.sleep(delay_seconds)

print(test_df[['document', 'model_generated']])


Generated summaries for articles 1-3. Waiting for 60 seconds before the next batch...
Generated summaries for articles 4-6. Waiting for 60 seconds before the next batch...
Generated summaries for articles 7-9. Waiting for 60 seconds before the next batch...
Generated summaries for articles 10-12. Waiting for 60 seconds before the next batch...
Generated summaries for articles 13-15. Waiting for 60 seconds before the next batch...
Generated summaries for articles 16-18. Waiting for 60 seconds before the next batch...
Generated summaries for articles 22-24. Waiting for 60 seconds before the next batch...
                                                document  \
0      Prison Link Cymru had 1,099 referrals in 2015-...   
1      Officers searched properties in the Waterfront...   
2      Jordan Hill, Brittany Covington and Tesfaye Co...   
3      The 48-year-old former Arsenal goalkeeper play...   
4      Restoring the function of the organ - which he...   
...                           

In [None]:
test_df.head(25)

Unnamed: 0,document,summary,model_generated
0,"Prison Link Cymru had 1,099 referrals in 2015-...","There is a ""chronic"" need for more housing for...",The article discusses the challenges faced by ...
1,Officers searched properties in the Waterfront...,"A man has appeared in court after firearms, am...",The article reports that police conducted sear...
2,"Jordan Hill, Brittany Covington and Tesfaye Co...",Four people accused of kidnapping and torturin...,"Four individuals in Chicago, Jordan Hill, Brit..."
3,The 48-year-old former Arsenal goalkeeper play...,West Brom have appointed Nicky Hammond as tech...,The article discusses the career of a 48-year-...
4,Restoring the function of the organ - which he...,The pancreas can be triggered to regenerate it...,A study published in the journal Cell found th...
5,But there certainly should be.\nThese are two ...,Since their impending merger was announced in ...,The article discusses the proposed merger betw...
6,Media playback is not supported on this device...,"A ""medal at any cost"" approach created a ""cult...",The article discusses the accusations of ageis...
7,It's no joke. But Kareem Badr says people did ...,Have you heard the one about the computer prog...,"The article tells the story of Kareem Badr, wh..."
8,Relieved that the giant telecoms company would...,The reaction from BT's investors told us much ...,The article discusses how the giant telecoms c...
9,"""I'm really looking forward to it - the home o...",Manager Brendan Rodgers is sure Celtic can exp...,The article discusses Celtic manager Brendan R...


In [None]:
test_df['model_generated'][11]

"The East Sussex NHS Trust recently experienced a mix-up where wrong patient information leaflets were added to hospital appointment letters sent out in March, causing unnecessary anxiety for some patients. The trust attributed the error to an external company that distributes its printed material. Approximately 850 letters were affected by the mistake, but the trust has since contacted everyone impacted to apologize and explain the situation. The trust's assistant director of operations acknowledged the administrative error and expressed regret for any distress caused."

In [None]:
test_df['model_generated'][24]

'Bridget and Jonathan Reid, snowsports enthusiasts from Scotland, got married at a ski resort in their wedding attire before skiing down a run. Their wedding, held at Nevis Range, was captured by adventure photographer Hamish Frost. The couple, who share a love for skiing, decided to marry on skis due to their shared passion for the sport. The ceremony took place in full Highland dress amidst a snow-covered mountain landscape. The couple expressed gratitude for the recent snowfalls that made their special'

In [None]:
for i in range(25):
    model_summary = test_df['model_generated'][i]
    reference_summary = test_df['summary'][i]
    print(f"{i + 1} - Reference Summary: {reference_summary}\nModel Summary: {model_summary}\n")

1 - Reference Summary: There is a "chronic" need for more housing for prison leavers in Wales, according to a charity.
Model Summary: The article discusses the challenges faced by ex-offenders in Wales in finding suitable accommodation upon their release from prison. Prison Link Cymru, a charity that helps ex-offenders find housing, highlights the difficulties faced by individuals, particularly men, in securing accommodation due to issues like post-traumatic stress disorder and drug dependency. The charity suggests that investing in housing would be more cost-effective than incarcerating homeless repeat offenders. The article also mentions changes to the Housing Act in Wales and the need for more one

2 - Reference Summary: A man has appeared in court after firearms, ammunition and cash were seized by police in Edinburgh.
Model Summary: The article reports that police conducted searches in two areas of the city and recovered three firearms, ammunition, and a large sum of money. A 26-ye

In [None]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
from rouge import Rouge

# Initialize the ROUGE evaluator
rouge = Rouge()

# Select the first 25 rows of your DataFrame for evaluation
num_samples = 25
sampled_df = test_df.head(num_samples)

# Extract the generated summaries and reference summaries for the selected samples
generated_summaries = sampled_df['model_generated'].tolist()
reference_summaries = sampled_df['summary'].tolist()

# Calculate ROUGE scores for the selected samples
rouge_scores = rouge.get_scores(generated_summaries, reference_summaries, avg=True)

# Print the ROUGE scores
print("ROUGE Scores:", rouge_scores)

ROUGE Scores: {'rouge-1': {'r': 0.3788798315442952, 'p': 0.12265496815612652, 'f': 0.18393233351386157}, 'rouge-2': {'r': 0.07687084874414259, 'p': 0.020343791579278358, 'f': 0.03184915682713308}, 'rouge-l': {'r': 0.33040313929052656, 'p': 0.10713566400407648, 'f': 0.1605959240258277}}


In [None]:
from nltk.translate.bleu_score import corpus_bleu

# Select the first 25 rows of your DataFrame for evaluation
num_samples = 25
sampled_df = test_df.head(num_samples)

# Extract the generated summaries and reference summaries for the selected samples
generated_summaries = sampled_df['model_generated'].tolist()
reference_summaries = sampled_df['summary'].tolist()

# Calculate BLEU score for the selected samples
bleu_score = corpus_bleu(reference_summaries, generated_summaries)
print("BLEU Score for 25 Summaries:", bleu_score)

BLEU Score for 25 Summaries: 8.665409753482044e-232


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [None]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.0.0->bert_score)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.0.0->bert_score)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.0.0->bert_score)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.0.0->bert_score)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3

In [None]:
from bert_score import score
# Select the first 25 rows of your DataFrame for evaluation
num_samples = 25
sampled_df = test_df.head(num_samples)

# Extract the generated summaries and reference summaries for the selected samples
generated_summaries = sampled_df['model_generated'].tolist()
reference_summaries = sampled_df['summary'].tolist()

# Calculate BERT Score
P, R, F1 = score(generated_summaries, reference_summaries, lang="en", verbose=True)

# Print BERT Score
print("BERT Precision:", P.mean().item())
print("BERT Recall:", R.mean().item())
print("BERT F1 Score:", F1.mean().item())



tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 45.66 seconds, 0.55 sentences/sec
BERT Precision: 0.8456292152404785
BERT Recall: 0.8846249580383301
BERT F1 Score: 0.8646414279937744
