In [1]:
import numpy as np

def simrank(graph, C, max_iterations=10, epsilon=1e-4):
    # Initialize the similarity matrix with zeros
    num_nodes = len(graph)
    S = np.zeros((num_nodes, num_nodes))

    for i in range(num_nodes):
        for j in range(num_nodes):
            if i == j:
                S[i][j] = 1.0  # Similarity of a node to itself is 1.0

    for iteration in range(max_iterations):
        S_prev = S.copy()
        for i in range(num_nodes):
            for j in range(num_nodes):
                if i != j:
                    # Calculate the similarity between nodes i and j
                    sim = 0.0
                    for u in range(num_nodes):
                        for v in range(num_nodes):
                            if u != v:
                                in_neighbors_i = set(graph[i])  # In-neighbors of node i
                                in_neighbors_j = set(graph[j])  # In-neighbors of node j
                                common_neighbors = len(in_neighbors_i & in_neighbors_j)  # Common in-neighbors
                                sim += (C / (len(in_neighbors_i) * len(in_neighbors_j))) * common_neighbors * S_prev[u][v]
                    
                    S[i][j] = sim

        # Check for convergence
        if np.allclose(S, S_prev, atol=epsilon):
            break

    return S

# Example usage
graph = {
    0: [1, 2, 3],
    1: [0, 4],
    2: [0, 4],
    3: [0, 5],
    4: [1, 2, 6],
    5: [3, 6],
    6: [4, 5]
}

C = 0.8  # Decay factor
similarity_matrix = simrank(graph, C)

# Print the similarity matrix
print("Similarity Matrix:")
print(similarity_matrix)

Similarity Matrix:
[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]


$$
\text{Cosine Similarity} = \frac{\sum_{i=1}^{n} (A_i \cdot B_i)}{\sqrt{\sum_{i=1}^{n} (A_i^2)} \cdot \sqrt{\sum_{i=1}^{n} (B_i^2)}}
$$


In [13]:
# Similarity by string pairs
print("Similarity between nodes 0 and 1:")
text1 = "I like to eat broccoli and bananas."
text2 = "I ate a banana and spinach smoothie for breakfast."
# algorithm cosine similarity without library
import math

def dot_product(v1, v2):
    return sum(x * y for x, y in zip(v1, v2))

def magnitude(vector):
    return math.sqrt(sum(x ** 2 for x in vector))

def cosine_similarity(vec1, vec2):
    if len(vec1) != len(vec2):
        raise ValueError("Vector dimensions must be the same")

    dot_product_value = dot_product(vec1, vec2)
    magnitude_vec1 = magnitude(vec1)
    magnitude_vec2 = magnitude(vec2)

    if magnitude_vec1 == 0 or magnitude_vec2 == 0:
        return 0  # To avoid division by zero

    return dot_product_value / (magnitude_vec1 * magnitude_vec2)

# Example usage:
vector1 = [1, 2, 3]
vector2 = [4, 5, 6]

similarity = cosine_similarity(vector1, vector2)
print(f"Cosine Similarity: {similarity}")
   


Similarity between nodes 0 and 1:
Cosine Similarity: 0.9746318461970762
