### **Install & Load Dependencies**

In [15]:
# Install transformers
%pip install transformers

# Import os
import os

# Import GPT model and tokenizer
os.environ['HF_HOME'] = r'C:\Users\josha\AppData\Local\Temp'        # Set cache directory for HuggingFace
#from transformers import GPT2LMHeadModel, GPT2Tokenizer            # Using GPT2
#from transformers import AutoModelForCausalLM, AutoTokenizer       # Using microsoft phi
from transformers import T5Tokenizer, T5ForConditionalGeneration    # Using Google FLAN-T5


Note: you may need to restart the kernel to use updated packages.


### **Load the Model**

In [16]:
# Create an instance of the tokenizer
#tokenizer = GPT2Tokenizer.from_pretrained('gpt2-large')
#tokenizer = AutoTokenizer.from_pretrained('microsoft/Phi-3-mini-128k-instruct')
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")


# Create an instance of the model
#model = GPT2LMHeadModel.from_pretrained('gpt2-large', pad_token_id=tokenizer.eos_token_id)
#model = AutoModelForCausalLM.from_pretrained('microsoft/Phi-3-mini-128k-instruct', pad_token_id=tokenizer.eos_token_id)
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base", pad_token_id=tokenizer.eos_token_id)


### **Define Summarizer Function**

In [26]:
def section_summarizer(text):
    # Define prompt
    prompt = f'''You are an AI assistant capable of summarizing academic content on AI and Machine Learning for non-academic readers. Your summaries are given in the third-person, and are about 2-3 sentences in length.
    Summarize the following abstract:
    {text}'''

    # Encode prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Generate response
    output = model.generate(
        input_ids,                  # Input ids
        max_length = 500,           # Maximum number of tokens to generate
        num_beams = 5,              # Needed as next token is found using beam search
        no_repeat_ngram_size = 2,   # Stops model from repeating word sequences repeatedly
        early_stopping = False,      # If output becomes not very good, stop generating
    )

    # Return the decoded the output
    return tokenizer.decode(output[0], skip_special_tokens=True)

### **Test Function**

In [27]:
AIAYN_abstract = '''The dominant sequence transduction models are based on complex recurrent or
 convolutional neural networks that include an encoder and a decoder. The best
 performing models also connect the encoder and decoder through an attention
 mechanism. We propose a new simple network architecture, the Transformer,
 based solely on attention mechanisms, dispensing with recurrence and convolutions
 entirely. Experiments on two machine translation tasks show these models to
 be superior in quality while being more parallelizable and requiring significantly
 less time to train. Our model achieves 28.4 BLEU on the WMT 2014 English
to-German translation task, improving over the existing best results, including
 ensembles, by over 2 BLEU. On the WMT 2014 English-to-French translation task,
 our model establishes a new single-model state-of-the-art BLEU score of 41.8 after
 training for 3.5 days on eight GPUs, a small fraction of the training costs of the
 best models from the literature. We show that the Transformer generalizes well to
 other tasks by applying it successfully to English constituency parsing both with
 large and limited training data.'''

summary = section_summarizer(AIAYN_abstract)

In [28]:
summary

'We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Our model achieves 28.4 BLEU on the WMT 2014 English to-German translation task, improving over the existing best results, including ensembles, by over 2.'