In [52]:
import pandas as pd
from matricesRalas import *

In [53]:
papers = pd.read_csv('papers/papers.csv', header = 0)
citas = pd.read_csv('papers/citas.csv', header = 0)

In [54]:
def genW(lista_citas, lista_papers):
    W = MatrizRala(len(lista_papers), len(lista_papers))
    for citador, citado in lista_citas:
        W[citador, citado] = 1
    return W

In [55]:
def genD(W):
    D = MatrizRala(W.shape[0], W.shape[1])
    for i in W.filas:
        cj = 0
        current_node = W.filas[i].raiz
        while current_node:
            cj += current_node.valor[1]  # Assume valor is a tuple (column_index, value)
            current_node = current_node.siguiente
        if cj > 0:
            D[i, i] = 1 / cj
    return D


In [56]:
def matriz_de_unos(n,m):
    matriz = MatrizRala(n,m)
    for i in range(matriz.shape[0]):
        for j in range(matriz.shape[1]):
            matriz[i,j] = 1
    return matriz

In [57]:
def P_it(d,N,W,D):
    p_t = MatrizRala(N,1)     # Initial equiprobable distribution
    for i in range(N):
        p_t[i,0] = 1/N

    tolerance = 1e-6
    errores = []
    error = 1

    mat_unos = matriz_de_unos(N,1)
    unoMenosDeSobreEne = ((1-d)/N) * mat_unos
    d_W = d * W
    d_WD = d_W @ D


    while error > tolerance:
        # Multiplica la matriz W_D por el vector p_t y escala por d
        p_t_plus_1 = d_WD @ p_t
        p_t_plus_1 = unoMenosDeSobreEne + p_t_plus_1
        # Calcula el error máximo en esta iteración comparando el nuevo vector de PageRank con el anterior
        error = max(abs(p_t_plus_1[i,0] - p_t[i,0]) for i in range(N))
        errores.append(error)

        # Actualiza el vector de PageRank para la próxima iteración
        p_t = p_t_plus_1
    return p_t, errores

In [58]:
def main():
    
    # Llamar a la función y pasar la ruta al archivo CSV
    lista_citas = cargar_citas_csv()
    lista_papers = cargar_papers()

    W = genW(lista_citas,lista_papers)
    
    D = genD(W)
    N = len(lista_papers)
    d = 0.85

    page_ranks = P_it(d, N, W, D)
    
    # Create list of (PageRank score, index)
    lista = [(page_ranks[0][i, 0], i) for i in range(len(lista_papers))]

    # Sort by PageRank score in descending order
    sorted_papers = sorted(lista, key=lambda x: x[0], reverse=True)

    # Print the top 10 papers with podium ranking
    print("Top 10 Papers by PageRank:")
    for rank, (score, index) in enumerate(sorted_papers[:10], start=1):
        print(f"{rank}. Paper ID: {lista_papers[index][0]}, Title: \"{lista_papers[index][1]}\", Score: {score:.6f}")


if __name__ == "__main__":
    main()

Top 10 Papers by PageRank:
1. Paper ID: 513215, Title: "A compendium of key search references", Score: 0.000026
2. Paper ID: 216749, Title: "Query evaluation techniques for large databases", Score: 0.000020
3. Paper ID: 326101, Title: "Query Optimization in Database Systems", Score: 0.000015
4. Paper ID: 517708, Title: "Principles and realization strategies of multilevel transaction management", Score: 0.000015
5. Paper ID: 553986, Title: "A survey of extensions to APL", Score: 0.000015
6. Paper ID: 304647, Title: "Hundreds of impossibility results for distributed computing", Score: 0.000012
7. Paper ID: 465154, Title: "What have we learnt from using real parallel machines to solve real problems?", Score: 0.000012
8. Paper ID: 168601, Title: "Survey on special purpose computer architectures for AI", Score: 0.000012
9. Paper ID: 315296, Title: "A survey of the literature in computer science education since curriculum '68", Score: 0.000011
10. Paper ID: 509822, Title: "An updated cross-i