# Imports and other declarations

In [2]:
import requests # used to call oneai api for summary
import json # used to get link - text content mapping stored into a .json file
from datetime import datetime # used to calculate processing time for some tasks

In [10]:
# with ALL categories
final_link_content_dictionary_json = "final_link_content_dictionary.json"
final_link_content_without_bl_dictionary_json = "final_link_content_without_bl_dictionary.json"
final_link_summary_dictionary_json = "final_link_summary_dictionary.json"
final_link_processingtime_dictionary_json = "final_link_processingtime_summary_dictionary.json"

# Getting the data

In [4]:
# getting the JSON with ALL article links mapped to their text content from the .json file
with open(final_link_content_dictionary_json, 'r') as f:
    db = json.loads(f.read())

In [7]:
len(db)

7626

In [42]:
# getting the JSON with ALL article links mapped to their text content (without the links with empty content = broken links) from the .json file
with open(final_link_content_without_bl_dictionary_json, 'r') as f:
    db_without_bl = json.loads(f.read())

In [238]:
len(db_without_bl)

7588

# Applying the summarize task

In [452]:
# Helper function to get summary from text content - using oneai api

def extract_summary_from_text(text):
    # OneAI API Config for Article Summarization
    api_key = "10fb649b-a5a0-4f04-b6fb-88fe9a474f83"
    url = "https://api.oneai.com/api/v0/pipeline"
    headers = {
      "api-key": api_key, 
      "content-type": "application/json"
    }
    payload = {
      "input": text,
      "input_type": "article",
      "steps": [
          {
            "skill": "summarize"
          }
        ]
    }
    r = requests.post(url, json=payload, headers=headers)
    data = r.json()
    
    return data['output'][0]['text']

# call fcn
text = 'By HH Patel, M.Pharm. Reviewed by Kate Anderton, B.Sc. (Editor) Aagenaes syndrome is a rare disorder characterized by impaired bile flow from the liver resulting in cholestasis. Image Credit: Magic Mine/Shutterstock.com Blockage of bile secretion (hepatic cholestasis) results in swelling and fluid retention ( lymphedema ) in the lower extremities. Neonatal cholestasis in patients with Aagenaes syndrome usually diminishes during early childhood, but remains intermittent in nature. Despite this, \u00a0Aagenaes syndrome often gradually develops into cirrhosis of liver and giant-cell hepatitis accompanied by scarring of the portal tract tissues. The disorder is also referred to as lymphedema cholestasis syndrome (LSC1) or cholestasis-lymphedema syndrome (CLS). Symptoms and diagnosis of Aagenaes syndrome According to the Human Phenotype Ontology (HPO) database, the most frequent symptoms of Aagenaes syndrome are abdominal pain , swollen legs, obstructive liver disease, abnormality of urine homeostasis, clay-colored (acholic) stools, and fatigue. Other symptoms include enlarged liver, liver scarring, abnormal lipid metabolism, and biliary tract abnormalities. Unfortunately, there are currently no validated diagnostic tests for the condition. Aagenaes syndrome is diagnosed by evaluation of the symptoms and co-morbidities such as lymphedema. Prognosis of Aagenaes syndrome Related Stories How have SARS-CoV-2 variants of concern impacted national case fatality rates? A correlation study between time-from-vaccine and SARS-CoV-2 breakthrough infections A quantitative assessment of SARS-CoV-2 bioaerosol production The disorder is idiopathic and familial in nature. The exact genetic cause remains inconclusive. However, it is considered to be recessively inherited. The gene is located on chromosome 15q. The condition is mainly observed in infants from Norwegian origin, with more than 50% cases being observed in southern Norway. However, it is also prevalent in individuals from other European regions and the United States. Research on Aagenaes syndrome In a whole-genome screen analysis by Bull et al. (2000), the regions considered similar in the patients with Aagenaes syndrome were identified. The most promising regions were further mapped in larger sets of Norwegian individuals, DNA samples from 8 patients and their 7 non-symptomatic relatives from the same ancestral lineage were evaluated. The results revealed an extensive similarity in the alleles and haploid genotype of the patients over a region on the chromosome 15q between D15S979 and D15S652 markers, the characteristic which was absent in the unaffected relatives. This finding suggested that all the Norwegian patients with Aagenaes syndrome could be homozygous with similar gene mutation inherited in dormant form via a common ancestor. Managing Aagenaes syndrome There are no established therapeutic options available for curing this disorder. Treatments mainly target specific symptoms especially concerning lymphedema. Living with a genetic or rare disease can significantly affect the daily lives of patients and their families. As there is a huge scope for further research towards finding a definitive cure for this ailment, community care and support groups are of pivotal importance. Support and advocacy groups can help patients connect with others with similar condition. These groups provide patient-centred information and are the driving force behind research for better treatments and possible cures. Advisors in these groups help direct patients and their family members to research, resources, and services pertaining to this disorder. Sources: https://rarediseases.info.nih.gov/diseases/370/aagenaes-syndrome http://www.checkorphan.org/diseases/aagenaes-syndrome https://www.sciencedirect.com/science/article/pii/S0002929707632935 http://www.lymphedemapeople.com/thesite/Aagenaes_syndrome.html Further Reading All Aagenaes Syndrome Content Symptoms of Aagenaes Syndrome How is Aagenaes Syndrome Diagnosed? Last Updated: Jan 13, 2021'
# my_summary = extract_summary_from_text(text)

In [286]:
# my_summary

In [465]:
# total number of articles
len(db_without_bl)

7588

In [453]:
# Helper function to extract ALL summaries for ALL articles

summary_json = {} # json, key = link of the article, value = summary of article text content
link_processingtime_mapping = {}  # json, key = link of article, value = processing time for summary extraction task
index = 1 # index used for counting the article range from db_without_bl

starttime = datetime.now() # START processing time for extracting keywords from ALL articles
print('Script #4 starts at: ', starttime)

# go through ALL the articles found in db
for link, content in db_without_bl.items(): # where key = article_url and value = text content
    if index >= 1 and index <= 7588:
        print('working on article: ', link)
        print(index)

        before_processing_datetime = datetime.now() # START processing time for an article

        # getting the json response of the ONEAI API for Summarize request
        this_summary = extract_summary_from_text(content)
        summary_json[link] = this_summary # where key = article_url and value = text summary extracted from text content

        after_processing_datetime = datetime.now() # STOP processing time for an article
        diff_datetime = after_processing_datetime - before_processing_datetime
        link_processingtime_mapping[link] = diff_datetime

    index = index + 1 # increment for the next article text content

endtime = datetime.now() # STOP processing time for extracting summaries from ALL articles
print('Script #4 ends at: ', endtime)

Script #4 starts at:  2022-06-13 22:10:42.016644
working on article:  https://www.news-medical.net/health/What-is-Aagenaes-Syndrome.aspx
1
working on article:  https://www.news-medical.net/health/Milk-Allergy.aspx
2
working on article:  https://www.news-medical.net/health/Asthma-Symptoms.aspx
3
working on article:  https://www.news-medical.net/health/Alcohol-Withdrawal.aspx
4
working on article:  https://www.news-medical.net/health/LDL-Cholesterol-and-Heart-Disease.aspx
5
working on article:  https://www.news-medical.net/health/Antibiotic-Resistance-Influence-on-Wound-Care.aspx
6
working on article:  https://www.news-medical.net/health/What-are-the-Complications-of-Acromegaly.aspx
7
working on article:  https://www.news-medical.net/health/Alport-Syndrome-Signs-and-Symptoms.aspx
8
working on article:  https://www.news-medical.net/health/Pathogenesis-of-Autoimmunity.aspx
9
working on article:  https://www.news-medical.net/health/Acupuncture-Theories.aspx
10
working on article:  https://w

KeyError: 'output'

In [454]:
summary_json

{'https://www.news-medical.net/health/What-is-Aagenaes-Syndrome.aspx': 'Aagenaes syndrome is a rare disorder characterized by impaired bile flow from the liver resulting in cholestasis. The condition is mainly observed in infants from Norwegian origin, with more than 50% cases being observed in southern Norway.',
 'https://www.news-medical.net/health/Milk-Allergy.aspx': 'Milk allergy is one of the most common food allergies among children. 3% to 6% of infants and young children developing the allergy by the time they are a few months old. Milk allergy should be distinguished from lactose intolerance, which is not an allergic condition and does not involve the immune system.',
 'https://www.news-medical.net/health/Asthma-Symptoms.aspx': 'The symptoms of asthma may vary in different people and from one time to another. In some people, symptoms occur every few months, while others show symptoms every other week or even every day. Asthma symptoms can last for a few minutes to several days.

In [455]:
len(summary_json)

248

In [456]:
len(final_summary_json)

7588

In [457]:
final_summary_json.update(summary_json)

In [458]:
len(final_summary_json)

7588

In [459]:
len(final_processingtime)

7340

In [460]:
final_processingtime.update(link_processingtime_mapping)

In [461]:
len(final_processingtime)

7588

In [464]:
start_script_4 = datetime.now()
print(start_script_4)

end_script_4 = start_script_4

# check what date it'll be after all timedeltas
for delta in final_processingtime.values():
    end_script_4 = end_script_4 + delta
print(end_script_4)

2022-06-13 22:22:55.128634
2022-06-14 03:28:01.831331


In [462]:
final_time_spent_mapping = {}
string = ''
for link, time in final_processingtime.items():
    string = str(time) + ' time spent to extract summary from link: ' + link
    print(string)
    final_time_spent_mapping[link] = str(time)

this_max = max(final_processingtime.values())
print(this_max)

0:00:03.996744 time spent to extract summary from link: https://www.news-medical.net/health/What-is-Eco-Anxiety.aspx
0:00:03.412264 time spent to extract summary from link: https://www.news-medical.net/health/Pollution-and-Alzheimers.aspx
0:00:03.276909 time spent to extract summary from link: https://www.news-medical.net/health/Neuropathy-Ataxia-and-Retinitis-Pigmentosa-(NARP-Syndrome).aspx
0:00:04.299695 time spent to extract summary from link: https://www.news-medical.net/health/Acute-Myeloid-Leukemia-Causes.aspx
0:00:03.072601 time spent to extract summary from link: https://www.news-medical.net/health/Reducing-Chest-Pain-Caused-by-Anxiety.aspx
0:00:02.763694 time spent to extract summary from link: https://www.news-medical.net/health/ADHD-Symptoms.aspx
0:00:04.608901 time spent to extract summary from link: https://www.news-medical.net/health/Is-Red-Wine-Good-for-the-Body.aspx
0:00:01.637931 time spent to extract summary from link: https://www.news-medical.net/health/Why-do-Humans

# Writting the data to disk

In [450]:
# write JSON with ALL article links mapped to their summary into a single .json file
with open(final_link_summary_dictionary_json, 'w') as f:
    jsonString = json.dumps(final_summary_json)
    f.write(jsonString)

In [463]:
# write JSON with ALL article links mapped to their processing time for summary extraction into a single .json file
with open(final_link_processingtime_dictionary_json, 'w') as f:
    jsonString = json.dumps(final_time_spent_mapping)
    f.write(jsonString)