# How to webscrape drug information from the NHS Website

In [61]:
import pandas as pd 
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [62]:
#import synthetic drug chart
df = pd.read_csv('drug_chart.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)

In [63]:
#prescription list formation 
def prescription_list(row): 
    if row['Prescriptions'] is np.nan: 
        return(np.nan)
    else: 
        drugs = row['Prescriptions'].split(", ")
        
        drugs_list = []
        
        for i in drugs: 
                drugs_list.append(i)      
           
        return(drugs_list)        
    
df['prescription_list'] = df.apply(prescription_list, axis=1)

In [64]:
#display
df.head()

Unnamed: 0,Prescriptions,prescription_count,prescription_list
0,"AMLODIPINE, LOSARTAN, SIMVASTATIN",3.0,"[AMLODIPINE, LOSARTAN, SIMVASTATIN]"
1,"AMLODIPINE, NAPROXEN, SERTRALINE, TAMSULOSIN",4.0,"[AMLODIPINE, NAPROXEN, SERTRALINE, TAMSULOSIN]"
2,"INDAPAMIDE, LEVOTHYROXINE, POTASSIUM CHLORIDE,...",4.0,"[INDAPAMIDE, LEVOTHYROXINE, POTASSIUM CHLORIDE..."
3,"DIGOXIN, LOSARTAN, METFORMIN, METOPROLOL, PRAV...",7.0,"[DIGOXIN, LOSARTAN, METFORMIN, METOPROLOL, PRA..."
4,"AMLODIPINE, ASPIRIN, ATORVASTATIN, GLIPIZIDE, ...",10.0,"[AMLODIPINE, ASPIRIN, ATORVASTATIN, GLIPIZIDE,..."


In [65]:
from selenium import webdriver

In [68]:
driver = webdriver.Chrome("/usr/local/bin/chromedriver")

In [69]:
#Find out how the website works for one page 

In [70]:
driver.get("https://www.nhs.uk/medicines/metformin/")

In [71]:
#scrape 
metformin = driver.find_element_by_xpath("""//*[@id="about-metformin"]/div""")
metformin.text

"Metformin is a medicine used to treat type 2 diabetes, and to help prevent type 2 diabetes if you're at high risk of developing it.\nMetformin is used when treating polycystic ovary syndrome (PCOS), although it's not officially approved for PCOS.\nType 2 diabetes is an illness where the body does not make enough insulin, or the insulin that it makes does not work properly. This can cause high blood sugar levels (hyperglycaemia).\nPCOS is a condition that affects how the ovaries work.\nMetformin lowers your blood sugar levels by improving the way your body handles insulin.\nIt's usually prescribed for diabetes when diet and exercise alone have not been enough to control your blood sugar levels.\nFor women with PCOS, metformin lowers insulin and blood sugar levels, and can also stimulate ovulation.\nMetformin is available on prescription as tablets and as a liquid that you drink."

In [72]:
#remove unsightly escape characters 
metformin.text.replace("\n", " ")

"Metformin is a medicine used to treat type 2 diabetes, and to help prevent type 2 diabetes if you're at high risk of developing it. Metformin is used when treating polycystic ovary syndrome (PCOS), although it's not officially approved for PCOS. Type 2 diabetes is an illness where the body does not make enough insulin, or the insulin that it makes does not work properly. This can cause high blood sugar levels (hyperglycaemia). PCOS is a condition that affects how the ovaries work. Metformin lowers your blood sugar levels by improving the way your body handles insulin. It's usually prescribed for diabetes when diet and exercise alone have not been enough to control your blood sugar levels. For women with PCOS, metformin lowers insulin and blood sugar levels, and can also stimulate ovulation. Metformin is available on prescription as tablets and as a liquid that you drink."

In [73]:
#build a function to scrape text for any drug 

In [74]:
def nhs_details(drug): 
    
    drug = drug.lower()
    try: 
        driver.get(f"https://www.nhs.uk/medicines/{drug}/")
        section_1 = driver.find_element_by_xpath(f"""//*[@id="about-{drug}"]/div""")
        section_1_text = section_1.text.replace("\n", " ")
        section_2 = driver.find_element_by_xpath("""//*[@id="key-facts"]/div""")
        section_2_text = section_2.text.replace("\n", " ")
        try: 
            section_3 = driver.find_element_by_xpath(f"""//*[@id="who-can-and-cannot-take-{drug}"]/div""")
            section_3_text = section_3.text.replace("\n", " ")
        except: 
            section_3 = driver.find_element_by_xpath(f"""//*[@id="who-can-and-cant-take-{drug}"]/div""")
            section_3_text = section_3.text.replace("\n", " ")
        try: 
            section_4 = driver.find_element_by_xpath(f"""//*[@id="how-and-when-to-take-it"]""")
            section_4_text = section_4.text.replace("\n", " ")
        except: 
            section_4 = driver.find_element_by_xpath(f"""//*[@id="how-and-when-to-take-{drug}"]""")
            section_4_text = section_4.text.replace("\n", " ")
            
        return(section_1_text, section_2_text, section_3_text, section_4_text)
        
    except: 
        driver.get(f"https://www.nhs.uk/medicines/{drug}-for-adults/")
        section_1 = driver.find_element_by_xpath(f"""//*[@id="about-{drug}-for-adults"]/div""")
        section_1_text = section_1.text.replace("\n", " ")
        section_2 = driver.find_element_by_xpath("""//*[@id="key-facts"]/div""")
        section_2_text = section_2.text.replace("\n", " ")
        section_3 = driver.find_element_by_xpath(f"""//*[@id="who-can-and-cannot-take-{drug}"]/div""")
        section_3_text = section_3.text.replace("\n", " ")
   
    
        return(section_1_text, section_2_text, section_3_text)

In [75]:
nhs_details('SITAGLIPTIN')

('Sitagliptin is a medicine used to treat type 2 diabetes. Type 2 diabetes is an illness where the body does not make enough insulin, or the insulin that it makes does not work properly. This can cause high blood sugar levels (hyperglycaemia). Sitagliptin is prescribed for people who still have high blood sugar, even though they have a sensible diet and exercise regularly. Sitagliptin is only available on prescription. It comes as tablets that you swallow. It also comes as tablets containing a mixture of sitagliptin and metformin. Metformin is another drug used to treat diabetes.',
 "Sitagliptin works by increasing the amount of insulin that your body makes. Insulin is the hormone that controls sugar levels in your blood. You take sitagliptin once a day. The most common side effect of sitagliptin is headaches. This medicine does not usually make you put on weight. Sitagliptin is also called by the brand name Januvia. When combined with metformin it's called Janumet.",
 "Sitagliptin can

In [76]:
#build a function that returns information for all medication prescribed 
def drug_information(patient_number): 
    """webscrapes NHS website and returns drug information"""

    drugs = df.loc[patient_number]['prescription_list']
    print(drugs)
        
    for drug in drugs: 
            print('\nPrescription medication:', drug)
            print('\nAccessing NHS drug information')
            
            try: 
                print(nhs_details(drug))
                
            except: 
                print('No NHS details available')
                
         


In [78]:
drug_information(1)

['AMLODIPINE', 'NAPROXEN', 'SERTRALINE', 'TAMSULOSIN']

Prescription medication: AMLODIPINE

Accessing NHS drug information
('Amlodipine is a medicine used to treat high blood pressure (hypertension). If you have high blood pressure, taking amlodipine helps prevent future heart disease, heart attacks and strokes. Amlodipine is also used to prevent chest pain caused by heart disease (angina). This medicine is only available on prescription. It comes as tablets or as a liquid to swallow.', "Amlodipine lowers your blood pressure and makes it easier for your heart to pump blood around your body. It's usual to take amlodipine once a day. You can take it at any time of day, but try to make sure it's around the same time each day. The most common side effects include headache, flushing, feeling tired and swollen ankles. These usually improve after a few days. Amlodipine can be called amlodipine besilate, amlodipine maleate or amlodipine mesilate. This is because the medicine contains another 

("Sertraline is a type of antidepressant known as a selective serotonin reuptake inhibitor (SSRI). It's often used to treat depression, and also sometimes panic attacks, obsessive compulsive disorder (OCD) and post-traumatic stress disorder (PTSD). Sertraline helps many people recover from depression, and has fewer unwanted side effects than older antidepressants. Sertraline comes as tablets, which are available only on prescription.", "It usually takes 4 to 6 weeks for sertraline to work. Side effects such as feeling sick, headaches and trouble sleeping are common. They're usually mild and go away after a couple of weeks. If you and your doctor decide to take you off sertraline, your doctor will probably recommend reducing your dose gradually to help prevent extra side effects.", 'Sertraline can be taken by adults for depression or obsessive compulsive disorder. Sertraline can be taken by children aged 6 to 17, but only for obsessive compulsive disorder. Check with your doctor before 