# A notebook to test out the summariser function using hugging face model

In [2]:
import requests
from bs4 import BeautifulSoup
from transformers import pipeline

In [3]:
hf_summarizer = pipeline('summarization', 'sshleifer/distilbart-cnn-12-6')

In [42]:
def summariser(url):
    if len(url) > 1:
        # pass in header in attempt to hide that this is an automated web crawler
        headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            html = response.text
            soup = BeautifulSoup(html, 'lxml')
            all_p = soup.find_all('p')
            relevant_p = ''
            for p in all_p:
                # We assume that if the sentence has less than 10 words, it will not be of value to article
                if len(p.text.split(' ')) > 10:
                    relevant_p += (p.text + ' ')
            #we cap it to 700 because there's a limit of words that the hugging face model can take
            relevant_p_trimmed = ' '.join(relevant_p.split(' ')[:700])
            return hf_summarizer(relevant_p_trimmed)[0]['summary_text'], relevant_p
        else:
            #return error message if unable to crawl website
            return "ERROR! Unable to crawl website. Please check if the link is valid or if the website allows automated web crawling", "a"
    else:
        #return error message if no the url length is only 1
        return "ERROR! Please pass in a valid url", "a"

In [44]:
url = "https://philosophybreak.com/articles/epicurus-principal-doctrines-40-aphorisms-for-living-well/?utm_source=reddit&utm_medium=social"
print(summariser(url))

 Only fragments of ancient Greek philosopher Epicurus’s writings remain . Among them are his Principal Doctrines: 40 brilliant, authoritative aphorisms that summarize the Epicurean approach to living a good life . Epicureans have things to say not just about ethics, but also share rich views on metaphysics, epistemology, and — more famously — why mortality is nothing to fear .


## Note
The code in this notebook is ultimately copied and pasted in `project_3.4_deployment.ipynb` so that it can be run and deployed on google cloud itself