In [1]:
import os.path
import urllib.request
from bs4 import BeautifulSoup

In [2]:
ARTICLES = set()
PAGES = set()

def parse(quote_page):
    """ Parse the Page """

    if quote_page not in PAGES:
        print("Currently processing " + quote_page)
        PAGES.add(quote_page)

        page = urllib.request.urlopen(quote_page)
        soup = BeautifulSoup(page, 'html.parser')

        for link in soup.find_all('a'):
            ref = link.get('href')
            if 'faceted_query' not in ref:
                ARTICLES.add(ref)
            else:
                parse(ref)

```
<h6 class="sidebar-heading">Topic</h6><ul class="sidebar-navmenu">
    <li class="open">
        <a href="https://www.enisa.europa.eu/topics/critical-information-infrastructures-and-services">Critical Infrastructures and Services</a>
        <ul>
            <li class="active"><a href="https://www.enisa.europa.eu/topics/critical-information-infrastructures-and-services/cii">Critical Information Infrastructures</a></li>
        </ul>
    </li>
</ul>

<h6 class="sidebar-heading">Keywords</h6><ul class="list-unstyled">
    <li>
        <a href="https://www.enisa.europa.eu/@@search?Subject%3Alist=Critical%20Information%20Infrastructure%20Protection%20%28CIIP%29">Critical Information Infrastructure Protection (CIIP)</a>
    </li>
</ul>
```

In [3]:
TOPICS = set()
KEYWORDS = set()

def get_tags(link):
    """Get topic and keywords"""

    name = link.split('/')[-1]
    
    topics = set()
    keywords = set()
    
    page = urllib.request.urlopen(link)
    soup = BeautifulSoup(page, 'html.parser')

    for link in soup.find_all('a'):
        ref = link.get('href')
        if "https://www.enisa.europa.eu/topics/" in ref:
            topics.add(link.string)
            TOPICS.add(link.string)
        if "https://www.enisa.europa.eu/@@search?Subject" in ref:
            keywords.add(link.string)
            KEYWORDS.add(link.string)
    
    return name, list(topics), list(keywords)

In [4]:
start_page = 'https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=0'
parse(start_page)

Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=0
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=20
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=40
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=60
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=80
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=100
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=120
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=140
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=160
Currently processing https://www.enisa.europa.eu/publications/@@faceted_query?b_start:int=180
Currently processing https://www.enisa.europa.eu/publications/@@fa

In [5]:
import pandas as pd

tags = pd.DataFrame(columns=['name', 'topics', 'keywords'])

idx = 0
for ref in ARTICLES:
    name, topics, keywords = get_tags(ref)
    
    print('-' * 80)
    print(name)
    print('-' * 80)
    print(topics)
    print(keywords)
    
    tags.loc[idx] = [name, topics, keywords]
    idx += 1

--------------------------------------------------------------------------------
csirt-setting-up-guide-in-estonian
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
csirt-setting-up-guide-in-russian
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
[]
--------------------------------------------------------------------------------
standards-and-tools-for-exchange-and-processing-of-actionable-information
--------------------------------------------------------------------------------
['CSIRT Services', 'Reactive Services']
['CSIRTs']
--------------------------------------------------------------------------------
csirt-setting-up-guide-in-irish
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRT

--------------------------------------------------------------------------------
definition-of-cybersecurity
--------------------------------------------------------------------------------
['Standards and certification', 'Standards']
['Cyber Security']
--------------------------------------------------------------------------------
smartphone-secure-development-guidelines
--------------------------------------------------------------------------------
[]
['Internet of things']
--------------------------------------------------------------------------------
leading-the-way-enisa-s-impact-in-operational-security
--------------------------------------------------------------------------------
['CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
stock-taking-of-information-security-training-needs-in-critical-sectors
--------------------------------------------------------------------------------
['Trainings for Cyber Security Spe

--------------------------------------------------------------------------------
annual-incident-reports-2011
--------------------------------------------------------------------------------
['For Telcos', 'Incident Reporting', 'Annual Reports']
['Incident Reporting']
--------------------------------------------------------------------------------
critical-cloud-computing
--------------------------------------------------------------------------------
['Cloud and Big Data', 'Cloud Security']
['Critical Information Infrastructure Protection (CIIP)', 'Cloud Computing Security']
--------------------------------------------------------------------------------
csirt-setting-up-guide-in-danish
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
secure-communication
-------------------------------------------------------------------

--------------------------------------------------------------------------------
electronic-evidence-a-basic-guide-for-first-responders
--------------------------------------------------------------------------------
['CSIRTs and communities', 'Law Enforcement']
['CSIRTs']
--------------------------------------------------------------------------------
emergency-communications-stocktaking
--------------------------------------------------------------------------------
['Critical Infrastructures and Services', 'Critical Information Infrastructures']
['Resilience']
--------------------------------------------------------------------------------
national-exercise-good-practice-guide
--------------------------------------------------------------------------------
['Cyber Exercises']
['Cyber Security']
--------------------------------------------------------------------------------
public-private-partnerships-in-network-and-information-security-education
------------------------------------

--------------------------------------------------------------------------------
qualified-website-authentication-certificates
--------------------------------------------------------------------------------
['Trust Services', 'Qualified trust services']
['Trust service providers']
--------------------------------------------------------------------------------
security-guidelines-on-the-appropriate-use-of-qualified-website-authentication-certificates
--------------------------------------------------------------------------------
['Trust Services', 'Qualified trust services']
['Identity & Trust', 'Trust service providers', 'eID']
--------------------------------------------------------------------------------
eisas-enhanced-roadmap-2012
--------------------------------------------------------------------------------
['CSIRT Services']
['SMEs', 'Network and Information Security Awareness', 'Good Practice', 'CSIRTs', 'Trainings']
---------------------------------------------------------

--------------------------------------------------------------------------------
ncss-good-practice-guide
--------------------------------------------------------------------------------
['National Cyber Security Strategies Guidelines & tools', 'National Cyber Security Strategies']
['National Cyber Security Strategies']
--------------------------------------------------------------------------------
cybersecurity-cooperation-defending-the-digital-frontline
--------------------------------------------------------------------------------
[]
['Trainings']
--------------------------------------------------------------------------------
csirt-setting-up-guide
--------------------------------------------------------------------------------
['Setting up guide project plan in XML', 'CSIRT Cooperation', 'CSIRTs in Europe', 'Setting up guide project plan in MPP']
['CSIRTs']
--------------------------------------------------------------------------------
copy_of_procent
--------------------------

--------------------------------------------------------------------------------
business-and-it-continuity-overview-and-implementation-principles
--------------------------------------------------------------------------------
['Threat and Risk Management', 'Risk Management']
[]
--------------------------------------------------------------------------------
monetising-privacy
--------------------------------------------------------------------------------
['Online and mobile data protection', 'Data Protection']
['Privacy']
--------------------------------------------------------------------------------
baseline-capabilities-for-national-governmental-certs
--------------------------------------------------------------------------------
['CSIRT Capabilities', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
emerging-and-future-risks-framework-introductory-manual
--------------------------------------------------------------

--------------------------------------------------------------------------------
who-is-who-directory-nis-2010
--------------------------------------------------------------------------------
['Cyber Security Education', 'NIS in Education']
[]
--------------------------------------------------------------------------------
bigdata-threat-landscape
--------------------------------------------------------------------------------
['Threat Landscape', 'ENISA Thematic Landscapes', 'Big data', 'Threat and Risk Management', 'Cloud and Big Data']
['Threat Intelligence', 'Big Data']
--------------------------------------------------------------------------------
study-on-csirt-maturity-evaluation-process
--------------------------------------------------------------------------------
['CSIRT Capabilities', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
challenges-of-security-certification-in-emerging-ict-environments
-------------

--------------------------------------------------------------------------------
csirt-setting-up-guide-in-lithuanian
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
csirt-setting-up-guide-in-hungarian
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
report-on-7th-enisa-cert-workshop
--------------------------------------------------------------------------------
[]
['CSIRTs']
--------------------------------------------------------------------------------
flying-2.0-enabling-automated-air-travel-by-identifying-and-addressing-the-challenges-of-iot-rfid-technology-annex-i
--------------------------------------------------------------------------------
['Threat

--------------------------------------------------------------------------------
involving-intermediaries-in-cyber-security-awareness-raising
--------------------------------------------------------------------------------
['Cyber Security Education', 'NIS in Education']
['Network and Information Security Awareness']
--------------------------------------------------------------------------------
cybersecurity-information-sharing
--------------------------------------------------------------------------------
['CSIRTs and communities']
['Cyber Security']
--------------------------------------------------------------------------------
privacy-tools-for-the-general-public
--------------------------------------------------------------------------------
['Privacy enhancing technologies', 'Data Protection']
['Privacy']
--------------------------------------------------------------------------------
baseline-capabilities-of-national-governmental-certs-policy-recommendations
-----------------

--------------------------------------------------------------------------------
inventory-of-risk-assessment-and-risk-management-methods
--------------------------------------------------------------------------------
['Threat and Risk Management', 'Risk Management']
[]
--------------------------------------------------------------------------------
ecsm-deployment-report
--------------------------------------------------------------------------------
['Cyber Security Education', 'European Cyber Security Month']
['Network and Information Security Awareness']
--------------------------------------------------------------------------------
standards-eidas
--------------------------------------------------------------------------------
['Standards and certification', 'Standards']
['Standards', 'Trust service providers']
--------------------------------------------------------------------------------
smartphone-secure-development-guidelines-2016
-------------------------------------------

--------------------------------------------------------------------------------
csirt-setting-up-guide-in-hindi
--------------------------------------------------------------------------------
['CSIRT Cooperation', 'CSIRTs in Europe']
['CSIRTs']
--------------------------------------------------------------------------------
exploring-the-opportunities-and-limitations-of-current-threat-intelligence-platforms
--------------------------------------------------------------------------------
['Threat and Risk Management']
['Cyber Threat Intelligence']
--------------------------------------------------------------------------------
study-on-cryptographic-protocols
--------------------------------------------------------------------------------
['Security of personal data', 'Cryptographic protocols and tools']
['Privacy']
--------------------------------------------------------------------------------
good-practice-guide-on-cooperatve-models-for-effective-ppps
------------------------------

In [6]:
tags.head()

Unnamed: 0,name,topics,keywords
0,csirt-setting-up-guide-in-estonian,"[CSIRT Cooperation, CSIRTs in Europe]",[CSIRTs]
1,csirt-setting-up-guide-in-russian,"[CSIRT Cooperation, CSIRTs in Europe]",[]
2,standards-and-tools-for-exchange-and-processin...,"[CSIRT Services, Reactive Services]",[CSIRTs]
3,csirt-setting-up-guide-in-irish,"[CSIRT Cooperation, CSIRTs in Europe]",[CSIRTs]
4,cyber-europe-2012-key-findings-report,"[Cyber Exercises, Cyber Europe]",[Crisis Management]


In [7]:
print(len(TOPICS))
sorted(TOPICS)

94


['Annual Reports',
 'Big data',
 'CIIP Governance in the EU (Annex)',
 'CSIRT Capabilities',
 'CSIRT Cooperation',
 'CSIRT Services',
 'CSIRTs and communities',
 'CSIRTs in Europe',
 'Certification',
 'Cloud Computing Risk Assessment - Spanish',
 'Cloud Security',
 'Cloud and Big Data',
 'Communication network interdependencies in smart grids ...',
 'Community Projects',
 'Critical Information Infrastructures',
 'Critical Infrastructures and Services',
 'Cryptographic protocols and tools',
 'Cyber 7 - Seven messages to the Edge of Cyber-Space',
 'Cyber 7: Seven Messages to the Edge of Cyber-Space',
 'Cyber Crisis Management',
 'Cyber Europe',
 'Cyber Exercises',
 'Cyber Security Education',
 'Data Protection',
 'Detailed Mind Map for Internet Infrastructure Assets',
 'ECSM 2016 Planning',
 'ECSM countries profiles',
 'ENISA Thematic Landscapes',
 'ENISA Threat Landscape',
 'ENISA Threat Taxonomy - A tool for structuring threat...',
 'ENISA threat landscape - Top 15 Cyber Threats 2015',

In [8]:
print(len(KEYWORDS))
sorted(KEYWORDS)

51


['Big Data',
 'CSIRTs',
 'Certification',
 'Cloud Computing Security',
 'Crisis Management',
 'Critical Information Infrastructure Protection (CIIP)',
 'Cryptography',
 'Cyber Security',
 'Cyber Threat Intelligence',
 'Cyber Threats',
 'Cyber crisis cooperation',
 'Cyber crisis procedures',
 'Data protection',
 'Digital Skills',
 'ENISA events',
 'European Union Institutions',
 'Exercises',
 'Finance',
 'Good Practice',
 'Health',
 'Identity & Trust',
 'Incident Reporting',
 'Incident Response',
 'Internet of things',
 'Managed Services: Metrics',
 'Mobile Applications',
 'Mobile Security',
 'National Cyber Security Strategies',
 'Network and Information Security Awareness',
 'Online Safety',
 'Personal Data',
 'Privacy',
 'Privacy Tools',
 'Procurement',
 'Public Private Partnership',
 'Resilience',
 'Risk Management',
 'SCADA',
 'SMEs',
 'Smart Cars',
 'Smart Cities',
 'Smart Grids',
 'Standards',
 'Threat Intelligence',
 'Threat landscape',
 'Training',
 'Trainings',
 'Trust Service

In [9]:
tags.to_csv('tags.csv', index=False)