# Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive')
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
!pip install scikit-surprise
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split
from surprise import SVD
from surprise import accuracy
import pandas as pd

# 1ο Ερώτημα

In [None]:
def h(Zi, Zj):
    return np.dot(Zi, Zj)

In [None]:
def Ζ_algorithm(G, r, epsilon=1e-5, lambda_val=0.1, max_iter=1000):
    nodes = G.nodes()
    node_indices = {node: idx for idx, node in enumerate(nodes)}

    n = len(nodes)
    Z = np.random.rand(n, r)
    t = 1
    convergence = []

    while True:
        Z0 = Z.copy()

        for edge in G.edges(data=True):
            i, j, weight = edge
            if i in nodes and j in nodes:
                i_idx, j_idx = node_indices[i], node_indices[j]
                eta = np.sqrt(1 / t)
                t += 1
                Z[i_idx] += eta * (1 - h(Z[i_idx], Z[j_idx]) * Z[j_idx] + lambda_val * Z[i_idx])

        convergence.append(np.linalg.norm(Z - Z0, 'fro'))

        if convergence[-1] <= epsilon or t > max_iter:
            break

    return Z, convergence

In [185]:
#Uncomment any couple of lines in order to run the cell. The rest lines should be commented

# # example for p2p-Gnutella08.txt
# G = nx.read_adjlist('/content/drive/MyDrive/graphs/p2p-Gnutella08.txt')

# # example for email-Eu-core.txt
# G = nx.read_adjlist('/content/drive/MyDrive/graphs/email-Eu-core.txt')

# # # example for CA-HepTh.txt
# G = nx.read_adjlist('/content/drive/MyDrive/graphs/CA-HepTh.txt')

# # example for foo.txt
# G = nx.read_adjlist('/content/drive/MyDrive/graphs/foo.txt')

In [None]:
plt.title('Starting Graph')
nx.draw(G, with_labels = True,  pos=nx.spring_layout(G))
plt.show()

result, convergence = Ζ_algorithm(G, r=3)

print("Node Embeddings:")
print(result)
print("\n\nConvergence: ")
print(convergence)

# 2o Ερωτημα

In [None]:
G = nx.Graph()

# create the users list and add the nodes in the graph
users = ["User" + str(i) for i in range(1, 16)]
G.add_nodes_from(users)

# create the movies list and add the nodes in the graph
movies = ["Movie" + str(i) for i in range(1, 6)]
G.add_nodes_from(movies)

# connect randomly the users with the movies using the weights
for i in range(1, 16):
    G.add_edge("User" + str(i), "Movie" + str(np.random.randint(1, 6)), weight=np.random.randint(1, 6))

# store the vectors and the convergence from the node embeddings algorithm
result, convergence = Ζ_algorithm(G, r=2)

print("\n\nNode Embeddings: ")
print(result)

# Plot the convergence over time
plt.plot(convergence)
plt.xlabel('Iteration')
plt.ylabel('Convergence')
plt.title('Convergence Over Time')
plt.show()

pos_start = nx.spring_layout(G)
nx.draw(G, pos_start, with_labels=True)
plt.title('Starting Graph')
plt.show()

# define the rates to be from 1 to 5
reader = Reader(rating_scale=(1, 5))

# load the edges from the graph
edges = [(edge[0], edge[1], edge[2]['weight']) for edge in G.edges(data=True)]

# create the dataset based on the edges
data = Dataset.load_from_df(pd.DataFrame(edges, columns=['user', 'item', 'rating']), reader)

# split the dataset in 2 sets
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# define the model
model = SVD()
model.fit(trainset)

predictions = model.test(data.build_full_trainset().build_testset())
print("Predictions:")
for prediction in predictions:
    print(prediction)

print(accuracy.rmse(predictions))