# Last update: Nov 10 2024 by portilla@gmail.com

In [2]:
!pip install py2neo

Collecting py2neo
  Downloading py2neo-2021.2.4-py2.py3-none-any.whl.metadata (9.9 kB)
Collecting interchange~=2021.0.4 (from py2neo)
  Downloading interchange-2021.0.4-py2.py3-none-any.whl.metadata (1.9 kB)
Collecting monotonic (from py2neo)
  Downloading monotonic-1.6-py2.py3-none-any.whl.metadata (1.5 kB)
Collecting pansi>=2020.7.3 (from py2neo)
  Downloading pansi-2024.11.0-py2.py3-none-any.whl.metadata (3.1 kB)
Downloading py2neo-2021.2.4-py2.py3-none-any.whl (177 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.2/177.2 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading interchange-2021.0.4-py2.py3-none-any.whl (28 kB)
Downloading pansi-2024.11.0-py2.py3-none-any.whl (26 kB)
Downloading monotonic-1.6-py2.py3-none-any.whl (8.2 kB)
Installing collected packages: monotonic, pansi, interchange, py2neo
Successfully installed interchange-2021.0.4 monotonic-1.6 pansi-2024.11.0 py2neo-2021.2.4


In [1]:
#pip3 install pandas
import pandas as pd

from py2neo import Node
from py2neo import Relationship
from py2neo import Transaction
from py2neo.database import Graph

In [2]:
#Load ibm-blogs.csv

blogs_df = pd.read_csv("ibm-blogs.csv")
print("Number of rows in ibm-blogs file: ", blogs_df.shape[0])
blogs_df.head()

Number of rows in ibm-blogs file:  21


Unnamed: 0,id,title,link,date
0,1001,Cloud at the Edge,https://www.ibm.com/cloud/blog/cloud-at-the-edge,2/26/19
1,1002,Rounding out the Edges,https://www.ibm.com/cloud/blog/rounding-out-th...,5/7/19
2,1003,Architecting at the Edge,https://www.ibm.com/cloud/blog/architecting-at...,10/21/19
3,1004,DevOps at the Edge,https://www.ibm.com/cloud/blog/devops-at-the-edge,12/3/19
4,1005,Policies at the Edge,https://www.ibm.com/cloud/blog/policies-at-the...,1/22/20


In [3]:
id_df = blogs_df.set_index(blogs_df["id"])
id_df.drop(['id'], axis=1, inplace=True)
id_df.head()

Unnamed: 0_level_0,title,link,date
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1001,Cloud at the Edge,https://www.ibm.com/cloud/blog/cloud-at-the-edge,2/26/19
1002,Rounding out the Edges,https://www.ibm.com/cloud/blog/rounding-out-th...,5/7/19
1003,Architecting at the Edge,https://www.ibm.com/cloud/blog/architecting-at...,10/21/19
1004,DevOps at the Edge,https://www.ibm.com/cloud/blog/devops-at-the-edge,12/3/19
1005,Policies at the Edge,https://www.ibm.com/cloud/blog/policies-at-the...,1/22/20


In [4]:
nodes = sorted(list(blogs_df.id.unique()) )
len(nodes)
print(nodes)

[1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021]


In [5]:
def _node(an_id:str) -> Node:
    return Node(":Article" , title=id_df.at[an_id,'title'],  article_id=str(an_id),
                link=id_df.at[an_id,'link'], date=id_df.at[an_id,'date']
                )

d_nodes = dict((key, _node(key)) for (key) in nodes)
print(d_nodes)

{1001: Node(':Article', article_id='1001', date='2/26/19', link='https://www.ibm.com/cloud/blog/cloud-at-the-edge', title='Cloud at the Edge'), 1002: Node(':Article', article_id='1002', date='5/7/19', link='https://www.ibm.com/cloud/blog/rounding-out-the-edges', title='Rounding out the Edges'), 1003: Node(':Article', article_id='1003', date='10/21/19', link='https://www.ibm.com/cloud/blog/architecting-at-the-edge', title='Architecting at the Edge'), 1004: Node(':Article', article_id='1004', date='12/3/19', link='https://www.ibm.com/cloud/blog/devops-at-the-edge', title='DevOps at the Edge'), 1005: Node(':Article', article_id='1005', date='1/22/20', link='https://www.ibm.com/cloud/blog/policies-at-the-edge', title='Policies at the Edge'), 1006: Node(':Article', article_id='1006', date='5/12/20', link='https://www.ibm.com/cloud/blog/security-at-the-edge', title='Security at the Edge'), 1007: Node(':Article', article_id='1007', date='6/8/20', link='https://www.ibm.com/cloud/blog/analytics

In [None]:
NEO4J_USERNAME = "neo4j"
# NEO4J_PASSWORD = {password}
NEO4J_URI = "bolt://localhost:7687"
NEO4J_DB = "neo4j"

neog = Graph(NEO4J_URI, NEO4J_DB, auth=(NEO4J_USERNAME, NEO4J_PASSWORD))

In [11]:
tx = neog.begin()
neog.delete_all()
neog.commit(tx) 

In [12]:
def countNodes():
    query = "MATCH (n) RETURN COUNT(n)"
    result = neog.query(query)
    print(result)

In [13]:
def createNodes(d_nodes:dict):
    for node in d_nodes.values():
        tx.create(node)
    print ("Neo4j nodes created")

In [14]:
# Creates a triple series with item1 - item2 - item3 as input

def create_tpl(itemOne, itemTwo, itemThree):
    tpl = []
    tpl.append(itemOne)
    tpl.append(itemTwo)
    tpl.append(itemThree)
    return tpl

In [15]:
def get_unique(column):
    """
    Get unique elements from a Pandas DF column
    Arguments:
        column: a DF column
    Returns:
        A node list with sorted, 
        unique elements of the DF column
    """
    nodes = ()
    nodes = sorted( list(column.unique() ) )
    return nodes 

In [16]:
countNodes()

 COUNT(n) 
----------
        0 



In [17]:
#Create article nodes
tx = neog.begin()
createNodes(d_nodes)
neog.commit(tx) 

Neo4j nodes created


In [18]:
countNodes()

 COUNT(n) 
----------
       21 



In [19]:
c_df = pd.read_csv ('final-categories.csv')
print(c_df)

                                                  URL  \
0    https://www.ibm.com/cloud/blog/cloud-at-the-edge   
1    https://www.ibm.com/cloud/blog/cloud-at-the-edge   
2    https://www.ibm.com/cloud/blog/cloud-at-the-edge   
3   https://www.ibm.com/cloud/blog/rounding-out-th...   
4   https://www.ibm.com/cloud/blog/rounding-out-th...   
..                                                ...   
56  https://www.ibm.com/cloud/blog/truck-tracker-a...   
57  https://www.ibm.com/cloud/blog/truck-tracker-a...   
58  https://www.ibm.com/cloud/blog/optical-charact...   
59  https://www.ibm.com/cloud/blog/optical-charact...   
60  https://www.ibm.com/cloud/blog/optical-charact...   

                                                Label     Score  
0                           /technology and computing  0.869217  
1       /technology and computing/internet technology  0.823215  
2   /technology and computing/networking/network m...  0.808410  
3                  /technology and computing/hardwa

In [20]:
rows = []
for _, row in c_df.iterrows():
    #print(row['Label'])
    cats = row['Label'].split('/')
    cats = list(filter(None, cats))
    
    for c in cats:
        tpl = create_tpl(row['URL'], c, row['Score'])
        rows.append(tpl)

#print(rows)
cat_df = pd.DataFrame(rows)
print(cat_df.shape)
print(cat_df.head())

(155, 3)
                                                  0                         1  \
0  https://www.ibm.com/cloud/blog/cloud-at-the-edge  technology and computing   
1  https://www.ibm.com/cloud/blog/cloud-at-the-edge  technology and computing   
2  https://www.ibm.com/cloud/blog/cloud-at-the-edge       internet technology   
3  https://www.ibm.com/cloud/blog/cloud-at-the-edge  technology and computing   
4  https://www.ibm.com/cloud/blog/cloud-at-the-edge                networking   

          2  
0  0.869217  
1  0.823215  
2  0.823215  
3  0.808410  
4  0.808410  


In [21]:
#create node list from a tuple
#tuple:ordered collection of objects

#category node list
c_nodes = get_unique(cat_df[1])
print(c_nodes)
#type(c_nodes)

['automation', 'business and industrial', 'business operations', 'business process', 'business software', 'computer', 'computer components', 'computer networking', 'computer peripherals', 'computer security', 'databases', 'email', 'hardware', 'internet technology', 'management', 'network monitoring and management', 'network security', 'networking', 'operating systems', 'servers', 'software', 'technology and computing', 'unix', 'wireless technology']


In [22]:
#Create a dictionary of categorry ID nodes with p_nodes list
d_nodes =()
def _node(category:str) -> Node:
    return Node(":Category",  name=category , label = category
    )

d_nodes = dict((key, _node(key)) for (key) in c_nodes) 
print("Dictionary size: " + str(len(d_nodes)) )
print(d_nodes)

Dictionary size: 24
{'automation': Node(':Category', label='automation', name='automation'), 'business and industrial': Node(':Category', label='business and industrial', name='business and industrial'), 'business operations': Node(':Category', label='business operations', name='business operations'), 'business process': Node(':Category', label='business process', name='business process'), 'business software': Node(':Category', label='business software', name='business software'), 'computer': Node(':Category', label='computer', name='computer'), 'computer components': Node(':Category', label='computer components', name='computer components'), 'computer networking': Node(':Category', label='computer networking', name='computer networking'), 'computer peripherals': Node(':Category', label='computer peripherals', name='computer peripherals'), 'computer security': Node(':Category', label='computer security', name='computer security'), 'databases': Node(':Category', label='databases', name=

In [23]:
#Create category nodes
tx = neog.begin()
createNodes(d_nodes)
neog.commit(tx) 

Neo4j nodes created


In [24]:
countNodes()

 COUNT(n) 
----------
       45 



# Create URL -has-a-> Category relationships


In [26]:
rows=[]

for _, row in c_df.iterrows():
    #print(row['URL'])
    #print(row['Label'])
    cats = row['Label'].split('/')
    cats = list(filter(None, cats))
    #print(cats)
    #print(cats[-1])
    tpl = create_tpl(row['URL'], 'HAS-A', cats[-1])
    rows.append(tpl)


print(rows)
has_df = pd.DataFrame(rows)
print(has_df.shape)
print(has_df.head())

[['https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'HAS-A', 'technology and computing'], ['https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'HAS-A', 'internet technology'], ['https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'HAS-A', 'network monitoring and management'], ['https://www.ibm.com/cloud/blog/rounding-out-the-edges', 'HAS-A', 'hardware'], ['https://www.ibm.com/cloud/blog/rounding-out-the-edges', 'HAS-A', 'operating systems'], ['https://www.ibm.com/cloud/blog/rounding-out-the-edges', 'HAS-A', 'network monitoring and management'], ['https://www.ibm.com/cloud/blog/architecting-at-the-edge', 'HAS-A', 'hardware'], ['https://www.ibm.com/cloud/blog/architecting-at-the-edge', 'HAS-A', 'operating systems'], ['https://www.ibm.com/cloud/blog/architecting-at-the-edge', 'HAS-A', 'computer'], ['https://www.ibm.com/cloud/blog/devops-at-the-edge', 'HAS-A', 'hardware'], ['https://www.ibm.com/cloud/blog/devops-at-the-edge', 'HAS-A', 'operating systems'], ['https://www.ibm.com/cloud/blog

In [27]:
has_df.rename(columns={0: 'URL', 1: 'Relationship', 2: 'Category'}, inplace=True )
print(has_df.head())
print(has_df.shape)

                                                 URL Relationship  \
0   https://www.ibm.com/cloud/blog/cloud-at-the-edge        HAS-A   
1   https://www.ibm.com/cloud/blog/cloud-at-the-edge        HAS-A   
2   https://www.ibm.com/cloud/blog/cloud-at-the-edge        HAS-A   
3  https://www.ibm.com/cloud/blog/rounding-out-th...        HAS-A   
4  https://www.ibm.com/cloud/blog/rounding-out-th...        HAS-A   

                            Category  
0           technology and computing  
1                internet technology  
2  network monitoring and management  
3                           hardware  
4                  operating systems  
(61, 3)


In [28]:
#If we need to remove duplicates


#category1_df = has_df.drop_duplicates(
# subset = ['URL', 'Relationship', 'Category'],
#  keep = 'last').reset_index(drop = True)
  
# print latest dataframe
#print(category1_df.head())
#print(category1_df.shape)

In [29]:
tx = neog.begin()

for _, row in has_df.iterrows():
    node = "MATCH(a:`:Article`) WHERE a.link='" + row[0] + "' return a"
    a_node  =  neog.evaluate(node)
    node = "MATCH(c:`:Category`) WHERE c.label='" + row[2] + "' return c"
    c_node  =  neog.evaluate(node)
    tx.create(Relationship(a_node, row[1], c_node))

print("URL-relationship-Category completed")

neog.commit(tx)

  node = "MATCH(a:`:Article`) WHERE a.link='" + row[0] + "' return a"
  node = "MATCH(c:`:Category`) WHERE c.label='" + row[2] + "' return c"
  tx.create(Relationship(a_node, row[1], c_node))


URL-relationship-Category completed


## Create category-has-a->category relationships

In [31]:
rows=[]

for _, row in c_df.iterrows():
    #print(row['URL'])
    #print(row['Label'])
    cats = row['Label'].split('/')
    cats = list(filter(None, cats))
    #print('\nrow: ', cats)
    #print(type(cats))
    for c in cats:
        if (len(cats) >= 2):
            tpl = create_tpl(cats[0], 'HAS-A', cats[1])
            rows.append(tpl)
            cats.pop(0)

ontology_df = pd.DataFrame(rows)
print(ontology_df.shape)
print(ontology_df.head())

(88, 3)
                          0      1                                  2
0  technology and computing  HAS-A                internet technology
1  technology and computing  HAS-A                         networking
2                networking  HAS-A  network monitoring and management
3  technology and computing  HAS-A                           hardware
4  technology and computing  HAS-A                  operating systems


In [32]:

ontology_df.rename(columns={0: 'Category', 1: 'Relationship', 2: 'Category'}, inplace=True )
print(ontology_df.head())
print(ontology_df.shape)

                   Category Relationship                           Category
0  technology and computing        HAS-A                internet technology
1  technology and computing        HAS-A                         networking
2                networking        HAS-A  network monitoring and management
3  technology and computing        HAS-A                           hardware
4  technology and computing        HAS-A                  operating systems
(88, 3)


In [33]:
new_df = ontology_df.drop_duplicates(
  subset = ['Category', 'Relationship', 'Category'],
  keep = 'last').reset_index(drop = True)
  
# print latest dataframe
print(new_df.head())
print(new_df.shape)

                   Category Relationship              Category
0                  hardware        HAS-A  computer peripherals
1  technology and computing        HAS-A     computer security
2         computer security        HAS-A      network security
3   business and industrial        HAS-A            automation
4  technology and computing        HAS-A            networking
(19, 3)


In [34]:
tx = neog.begin()

for _, row in new_df.iterrows():
    node = "MATCH(c:`:Category`) WHERE c.label='" + row[0] + "' return c"
    a_node  =  neog.evaluate(node)
    node = "MATCH(c:`:Category`) WHERE c.label='" + row[2] + "' return c"
    b_node  =  neog.evaluate(node)
    tx.create(Relationship(a_node, row[1], b_node))

print("Category-relationship-Category completed")

neog.commit(tx)

Category-relationship-Category completed


  node = "MATCH(c:`:Category`) WHERE c.label='" + row[0] + "' return c"
  node = "MATCH(c:`:Category`) WHERE c.label='" + row[2] + "' return c"
  tx.create(Relationship(a_node, row[1], b_node))


In [35]:
countNodes()

 COUNT(n) 
----------
       45 



# open GDSL playground
# Show the graphs with
# pagerank
# lovain

## Sample cypher queries

## Retrieve all Articles in a specific Category:

In [36]:
from neo4j import GraphDatabase

def get_articles_in_category(driver, category_name):
    query = '''
    MATCH (a:`:Article`)-[:`HAS-A`]->(c:`:Category` {name: $category_name})
    RETURN a
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [record['a'] for record in result]

# Usage
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", NEO4J_PASSWORD))
articles = get_articles_in_category(driver, "technology and computing")


print(articles)
print(len(articles))

[<Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:0' labels=frozenset({':Article'}) properties={'article_id': '1001', 'date': '2/26/19', 'link': 'https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'title': 'Cloud at the Edge'}>, <Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:6' labels=frozenset({':Article'}) properties={'article_id': '1007', 'date': '6/8/20', 'link': 'https://www.ibm.com/cloud/blog/analytics-at-the-edge', 'title': 'Analytics at the Edge'}>]
2


## Find all Categories linked to a specific Category:

In [38]:
def get_related_categories(driver, category_name):
    query = '''
    MATCH (c:`:Category` {name: $category_name})-[:`HAS-A`]-(related:`:Category`)
    RETURN related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [record['related'] for record in result]

# Usage
related_categories = get_related_categories(driver, "technology and computing")
for category in related_categories:
    print(category['name'])

computer security
hardware
internet technology
networking
operating systems
software


## Count Articles in each Category:

In [39]:
def count_articles_per_category(driver):
    query = '''
    MATCH (c:`:Category`)<-[:`HAS-A`]-(a:`:Article`)
    RETURN c.name AS Category, COUNT(a) AS ArticleCount
    '''
    with driver.session() as session:
        result = session.run(query)
        return [{"Category": record["Category"], "ArticleCount": record["ArticleCount"]} for record in result]

# Usage
article_counts = count_articles_per_category(driver)
for count in article_counts:
    print(f"Category: {count['Category']}, Article Count: {count['ArticleCount']}")

Category: internet technology, Article Count: 2
Category: network monitoring and management, Article Count: 8
Category: technology and computing, Article Count: 2
Category: hardware, Article Count: 8
Category: operating systems, Article Count: 13
Category: computer, Article Count: 5
Category: computer peripherals, Article Count: 1
Category: network security, Article Count: 1
Category: computer components, Article Count: 4
Category: wireless technology, Article Count: 2
Category: automation, Article Count: 1
Category: databases, Article Count: 2
Category: networking, Article Count: 1
Category: business software, Article Count: 2
Category: software, Article Count: 3
Category: servers, Article Count: 3
Category: unix, Article Count: 1
Category: email, Article Count: 1
Category: business process, Article Count: 1


## 	Find Categories with fewer than three connections:

In [44]:
def categories_with_few_connections(driver):
    query = '''
    MATCH (c:`:Category`)
    WITH c, SIZE([(c)-[:`HAS-A`]-(connectedCategory) | connectedCategory]) AS connections
    WHERE connections < 3
    RETURN c
    '''
    with driver.session() as session:
        result = session.run(query)
        return [record['c'] for record in result]

# Usage
few_connection_categories = categories_with_few_connections(driver)

# Print the categories with few connections
for category in few_connection_categories:
    print(category.get('name'))

automation
business operations
business process
computer networking
computer peripherals
computer security
email
management
network security
unix
wireless technology


## List all Articles under a Category and its connected Categories:

In [46]:
def get_articles_in_related_categories(driver, category_name):
    query = '''
    MATCH (c:`:Category` {name: $category_name})-[:`HAS-A`*1..2]-(related:`:Category`)<-[:`HAS-A`]-(a:`:Article`)
    RETURN a, related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [{"Article": record["a"], "RelatedCategory": record["related"]} for record in result]

# Usage
articles_related = get_articles_in_related_categories(driver, "technology and computing")
for article in articles_related:
    print(f"Article: {article['Article']['title']}, Related Category: {article['RelatedCategory']['name']}")

Article: Security at the Edge, Related Category: network security
Article: Rounding out the Edges, Related Category: hardware
Article: Architecting at the Edge, Related Category: hardware
Article: DevOps at the Edge, Related Category: hardware
Article: Policies at the Edge, Related Category: hardware
Article: Clusters at the Edge, Related Category: hardware
Article: Network Slicing at the Edge, Related Category: hardware
Article: Cloud Services at the Edge, Related Category: hardware
Article: Bandwidth Allocation in Virtual Server Instances, Related Category: hardware
Article: Architecting at the Edge, Related Category: computer
Article: DevOps at the Edge, Related Category: computer
Article: Analytics at the Edge, Related Category: computer
Article: Cloud Services at the Edge, Related Category: computer
Article: What Is Optical Character Recognition (OCR)?, Related Category: computer
Article: Analytics at the Edge, Related Category: computer components
Article: Automation at the Edge,

## Get all Articles linked to a Category and Categories connected up to two degrees:

In [47]:
def get_articles_in_category_with_degrees(driver, category_name):
    query = '''
    MATCH (a:`:Article`)-[:`HAS-A`]->(c:`:Category`)-[:`HAS-A`*0..2]-(related:`:Category`)
    WHERE c.name = $category_name
    RETURN a, c, related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [{"Article": record["a"], "Category": record["c"], "RelatedCategory": record["related"]} for record in result]

# Usage
articles_with_degrees = get_articles_in_category_with_degrees(driver, "technology and computing")

# Display the results
for article in articles_with_degrees:
    print(f"Article: {article['Article']['title']}")
    print(f"Category: {article['Category']['name']}")
    print(f"Related Category: {article['RelatedCategory']['name']}")
    print("---")

Article: Cloud at the Edge
Category: technology and computing
Related Category: technology and computing
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer security
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: network security
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: hardware
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer components
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer networking
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer peripherals
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: internet technology
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: email

## More examples

## 1. Retrieve all Articles in a specific Category


In [80]:
from neo4j import GraphDatabase

def get_articles_in_category(driver, category_name):
    query = '''
    MATCH (a:`:Article`{ article_id: "1001"})-[:`HAS-A`]->(c:`:Category` {label: $category_name})
    RETURN a
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [record['a'] for record in result]

# Usage
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", NEO4J_PASSWORD))
articles = get_articles_in_category(driver, "internet technology")

# Print the articles
for article in articles:
    print(article)

<Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:0' labels=frozenset({':Article'}) properties={'article_id': '1001', 'date': '2/26/19', 'link': 'https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'title': 'Cloud at the Edge'}>


## 2. Find all Categories linked to a specific Category

In [70]:
def get_related_categories(driver, category_name):
    query = '''
    MATCH (c:`:Category` {name: $category_name})-[:`HAS-A`]-(related:`:Category`)
    RETURN related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [record['related'] for record in result]

# Usage
related_categories = get_related_categories(driver, "network monitoring and management")

# Print the related categories
for category in related_categories:
    print(category['name'])

networking


## 3. Count Articles in each Category

In [81]:
def count_articles_per_category(driver):
    query = '''
    MATCH (c:`:Category`)<-[:`HAS-A`]-(a:`:Article` {article_id: "1001"})
    RETURN c.name AS Category, COUNT(a) AS ArticleCount
    '''
    with driver.session() as session:
        result = session.run(query)
        return [{"Category": record["Category"], "ArticleCount": record["ArticleCount"]} for record in result]

# MATCH (a:`:Article`{ article_id: "1001"})-[:`HAS-A`]->(c:`:Category` {label: $category_name})

# Usage
# print(c ategory)
article_counts = count_articles_per_category(driver)

# Print the results
for count in article_counts:
    print(count)

{'Category': 'internet technology', 'ArticleCount': 1}
{'Category': 'network monitoring and management', 'ArticleCount': 1}
{'Category': 'technology and computing', 'ArticleCount': 1}


## 4. Find Categories with fewer than three connection

In [95]:
def categories_with_few_connections(driver):
    query = '''
    MATCH (c:`:Category`)
    WITH c, SIZE([(c)-[:`HAS-A`]-(other) | other]) AS connections
    WHERE connections < 3
    RETURN c.name AS category_name
    '''
    with driver.session() as session:
        result = session.run(query)
        # Extract category names from the results
        return [record['category_name'] for record in result]

# Usage
few_connection_categories = categories_with_few_connections(driver)
# print(few_connection_categories)

#display the categories
for category in few_connection_categories:
    print(category)

automation
business operations
business process
computer networking
computer peripherals
computer security
email
management
network security
unix
wireless technology


## 5. List all Articles under a Category and its connected Categories

In [84]:
def get_articles_in_related_categories(driver, category_name):
    query = '''
    MATCH (c:`:Category` {name: $category_name})-[:`HAS-A`*1..2]-(related:`:Category`)<-[:`HAS-A`]-(a:`:Article` {article_id: "1001"})
    RETURN a, related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [{"Article": record["a"], "RelatedCategory": record["related"]} for record in result]

# Usage
articles_related = get_articles_in_related_categories(driver, "technology and computing")

# Print the results
for article_related in articles_related:
    print(f"Article: {article_related['Article']}, Related Category: {article_related['RelatedCategory']}")

Article: <Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:0' labels=frozenset({':Article'}) properties={'article_id': '1001', 'date': '2/26/19', 'link': 'https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'title': 'Cloud at the Edge'}>, Related Category: <Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:34' labels=frozenset({':Category'}) properties={'name': 'internet technology', 'label': 'internet technology'}>
Article: <Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:0' labels=frozenset({':Article'}) properties={'article_id': '1001', 'date': '2/26/19', 'link': 'https://www.ibm.com/cloud/blog/cloud-at-the-edge', 'title': 'Cloud at the Edge'}>, Related Category: <Node element_id='4:95b1055c-400b-4422-a9fc-dccddc909413:36' labels=frozenset({':Category'}) properties={'name': 'network monitoring and management', 'label': 'network monitoring and management'}>


## 6. Get all Articles linked to a Category and Categories connected up to two degrees

In [85]:
def get_articles_in_category_with_degrees(driver, category_name):
    query = '''
    MATCH (a:`:Article` {article_id: "1001", date: "2/26/19", link: "https://www.ibm.com/cloud/blog/cloud-at-the-edge", title: "Cloud at the Edge"})-[:`HAS-A`]->(c:`:Category`)-[:`HAS-A`*0..2]-(related:`:Category`)
    WHERE c.name = $category_name
    RETURN a, c, related
    '''
    with driver.session() as session:
        result = session.run(query, category_name=category_name)
        return [{"Article": record["a"], "Category": record["c"], "RelatedCategory": record["related"]} for record in result]

# Usage
articles_with_degrees = get_articles_in_category_with_degrees(driver, "technology and computing")

# Print the results
for article in articles_with_degrees:
    print(f"Article: {article['Article']['title']}")
    print(f"Category: {article['Category']['name']}")
    print(f"Related Category: {article['RelatedCategory']['name']}")
    print("---")

Article: Cloud at the Edge
Category: technology and computing
Related Category: technology and computing
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer security
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: network security
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: hardware
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer components
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer networking
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: computer peripherals
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: internet technology
---
Article: Cloud at the Edge
Category: technology and computing
Related Category: email