In [69]:
import csv

def find_sinks(edges):
    outgoing_edges = {}
    all_nodes = set()
    nodes_with_outgoing = set()
    
    for from_node, to_node in edges:
        if from_node not in outgoing_edges:
            outgoing_edges[from_node] = []
        outgoing_edges[from_node].append(to_node)
        all_nodes.update([from_node, to_node])
        nodes_with_outgoing.add(from_node)
    
    sinks = sorted(all_nodes - nodes_with_outgoing)
    
    with open('sinks.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Sink Nodes'])
        for sink in sinks:
            writer.writerow([sink])

def page_rank(edges, num_iterations=10, damping_factor=0.85):
    outgoing_edges = {}
    incoming_edges = {}
    all_nodes = set()
    
    for from_node, to_node in edges:
        if from_node not in outgoing_edges:
            outgoing_edges[from_node] = []
        if to_node not in incoming_edges:
            incoming_edges[to_node] = []
        outgoing_edges[from_node].append(to_node)
        incoming_edges[to_node].append(from_node)
        all_nodes.update([from_node, to_node])
    
    num_nodes = len(all_nodes)
    page_rank = {node: 1 / num_nodes for node in all_nodes}
    
    for _ in range(num_iterations):
        new_page_rank = {}
        for node in all_nodes:
            rank_sum = sum(page_rank[in_node] / len(outgoing_edges[in_node]) for in_node in incoming_edges.get(node, []))
            new_page_rank[node] = (1 - damping_factor) / num_nodes + damping_factor * rank_sum
        page_rank = new_page_rank
    
    sorted_page_rank = sorted(page_rank.items(), key=lambda x: x[1], reverse=True)
    
    with open('PR_results.csv', 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Node ID', 'PageRank'])
        for node, rank in sorted_page_rank:
            writer.writerow([node, rank])

def test():
    edges = [
        (0, 1), (0, 2), (0, 3),
        (1, 0), (1, 3), (1, 4),
        (2, 0), (2, 4),
        (3, 1), (3, 2), (3, 4), (3, 5),
        (4, 5)
    ]
    
    find_sinks(edges)
    page_rank(edges)

    print("Sinks:")
    with open('sinks.csv', 'r') as f:
        print(f.read())
    
    # Read and print PageRank results
    print("PageRank results:")
    with open('PR_results.csv', 'r') as f:
        print(f.read())

# Run the test
test()

# Run the functions on the provided dataset
edges_from_file = []
with open('p2p-Gnutella04.txt', 'r') as file:
    for line in file:
        if not line.startswith('#'):
            from_node, to_node = map(int, line.strip().split())
            edges_from_file.append((from_node, to_node))

find_sinks(edges_from_file)
page_rank(edges_from_file)


Sinks:
Sink Nodes
5

PageRank results:
Node ID,PageRank
5,0.10427838004680354
4,0.07779669133083934
0,0.06506880165154409
3,0.05957025568510249
1,0.05627062470378012
2,0.05627062470378012

