# Citation Page Rank

This code calculates the pageRank of each author based on the citation they have received.
First, a directed citation graph is created using networkx, then the page rank is calulated with and without considering weights on edges ( we added weights to the edges of the graph, measuring how many times authors work together.
For example, if the authors A and B worked together in 3 papers, the weight on the edge that connects them will be 3)


In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import csv

from pathlib import Path
d = Path().resolve().parent.parent
d = str(d) + "/data/generated/citations-author-author.csv"

authors= pd.read_csv(d,delimiter=',',names=["author_id","author_cited_id"])
print(authors.head(7))


#create the graph
g=nx.DiGraph()
    
# list filled with 1 to assign weights
list1 = list(1 for i in range(0,len(authors)))

#add edges
g.add_nodes_from(authors.author_id,attr_dict=dict)
g.add_weighted_edges_from(zip(authors.author_id,authors.author_cited_id,list1))

print("\nCitation graph without considering weights")
#print(list(g.edges_iter(data='weight', default=1)))

#draw it 
nx.draw_networkx(g,with_labels=False)
plt.show()

print("pageRank results:")
pr = nx.pagerank(g, weight='weight')
#print(pr)

#save the results in a csv
with open('citationPageRankNoWeights.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['AuthorId','PageRank'])
    for key, value in pr.items():
       writer.writerow([key, value])


    
## now let's take in consideration the weights
authorsCouples = list(zip(authors.author_id,authors.author_cited_id))

for i in range(0,len(authorsCouples)):
    if g.has_edge(authorsCouples[i][0], authorsCouples[i][1]):
                g[authorsCouples[i][0]][authorsCouples[i][1]]['weight'] =0

for i in range(0,len(authorsCouples)):
    if g.has_edge(authorsCouples[i][0], authorsCouples[i][1]):
                g[authorsCouples[i][0]][authorsCouples[i][1]]['weight'] +=1

print("\nCitation graph considering weights")
#print(list(g.edges_iter(data='weight', default=1)))
           

nx.draw_networkx(g,with_labels=False)
plt.show()

#print("pageRank results:")
pr = nx.pagerank(g, weight='weight')
#print(pr)

#save the results in a csv
with open('citationPageRankWithWeights.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerow(['AuthorId','PageRank'])
    for key, value in pr.items():
       writer.writerow([key, value])

    

## Add a column with the absolute ordering of the PageRank to make the results more clear to to the final user

In [None]:
pagerank = pd.read_csv('citationPageRankWithWeights.csv')
print(pagerank.head(5))
pagerank = pagerank.sort_values('PageRank',ascending=False)

print(pagerank.head(5))

lista = []
for i in range(1,len(pagerank)+1):
    lista.append(i)

pagerank['Order'] = lista
print(pagerank.head(5))

pagerank.to_csv("pageRankCitationWithOrder.csv.csv",index=False)

In [None]:
pagerankwithOrder = pd.read_csv('pageRankCitationWithOrder.csv')
pagerankwithOrder.head(5)