# Libraries

In [1]:
import numpy as np
import time

from utils import readText, printMat, rmRed, convertStrNum, randSelect, randEdge

from tpVector import tpVector
from pagerank import pagerank

# Setting Up the Data Directory

Set up the directory for the data here. String should end with /

In [2]:
directory = 'D:/Coding Stuffs/PageRank Project (final)/Data/'

# Graph 1: wb-cs-stanford

In [5]:
# Set Up the Graph Name here

graphName = "wb-cs-stanford.txt"

## Data Processing

In [6]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  9914
Number of Edges:  36854


## Creating the Edge List

In [7]:
#Creating the edge
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
print(edge_list)
print("Time Needed: ", tend - tstart)

[[], [], [], [5, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [6], [5, 8, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [5, 8, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [5, 8, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [10], [5, 9, 11, 12, 13, 14, 16, 24, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [36, 2238, 6517], [36, 2238, 6517], [36, 2238, 6517], [36, 2238, 6517], [5, 9, 11, 12, 13, 14, 16, 24, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [17], [5, 9, 16, 19, 24, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [5, 9, 16, 27, 36, 38, 47, 52, 2238, 6517], [5, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [], [22], [5, 9, 16, 23, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [23], [25], [5, 9, 16, 26, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [5, 9, 16, 26, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517], [28], [5, 9, 16, 24, 27, 29, 30, 31, 32, 34, 36, 38, 47, 52, 2238, 6517], [5, 9, 16, 27, 29, 30, 31, 32

## Removing 10% of edges

In [8]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Neede: ", tend-tstart)

[]
[]
[]
[5, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517]
[6]
[5, 8, 9, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517]
[5, 8, 16, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517]
[5, 9, 27, 29, 30, 32, 34, 36, 38, 47, 52, 2238, 6517]
[10]
[5, 9, 11, 13, 14, 24, 27, 29, 30, 32, 34, 38, 47, 52, 2238]
Time Neede:  0.0608365535736084


In [9]:
len(new_edges)

9914

In [10]:
len(lines)

9914

### Creating the Teleport Vector and Running PageRank

In [11]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend=time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  0.002992391586303711


In [12]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend=time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  0.06482696533203125


In [14]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

1.0174242512737567e-08
1.0174242512737567e-08
1.0174242512737567e-08
2.784050213273728e-10
1.98860729519552e-11
2.9829109427932823e-10
2.784050213273728e-10
2.585189483754176e-10
1.98860729519552e-11
2.9829109427932823e-10
Iterations:  95


## Removing 20% of edges

In [19]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[]
[]
[]
[29, 38]
[]
[16, 38, 52, 2238, 6517]
[8, 34, 52, 2238, 6517]
[27, 30, 47, 6517]
[10]
[5, 11, 24, 29, 32, 47, 52, 2238]
Time Needed:  0.1246793270111084


In [17]:
len(new_edges)

9914

In [18]:
len(lines)

9914

### Creating the Teleport Vector and Running PageRank

In [20]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend=time.time()

print("Time Needed: ", tend-tstart)
#print(teleport)

Time Needed:  0.004987001419067383


In [21]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()
print("Time Needed: ", tend - tstart)

Time Needed:  0.02692866325378418


In [22]:
for i in range(10):
    print(ranking[i])
print("Iterations: ",iterations)

1.0173880691959264e-08
1.0173880691959264e-08
1.0173880691959264e-08
7.213954538126053e-09
1.0173880691959264e-08
1.8034886345315125e-08
1.8034886345315125e-08
1.4427909076252106e-08
3.6069772690630264e-09
2.5248840883441168e-08
Iterations:  63


# Graph 2: Stanford

In [3]:
# Set Up the Graph Name here

graphName = "Stanford.txt"

## Data Processing

In [7]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  281903
Number of Edges:  2312497


## Creating the Edge List

In [8]:
#Creating the edge list
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
for i in range(10):
  print(edge_list[i])

print("Time Needed: ", tend-tstart)

[]
[872, 1281, 12094, 17093, 17794, 19147, 20923, 25202, 31702, 35877, 52411, 53625, 54582, 59348, 64930, 72940, 73764, 78295, 84477, 98008, 98628, 100193, 102355, 105318, 105730, 115926, 120097, 140864, 151832, 152086, 155610, 158672, 161354, 161689, 163550, 163990, 164599, 172329, 172765, 175133, 175799, 178642, 180755, 181235, 181714, 181865, 185567, 190453, 192783, 200263, 204189, 204604, 210870, 213966, 224289, 225119, 228300, 234603, 241596, 243294, 246897, 251658, 252915, 253992, 257281, 259633, 266555, 276060, 278619, 280935]
[96493]
[]
[6545, 14395, 59745]
[6540, 59742]
[45366]
[]
[174822]
[6536, 118572]
Time Needed:  257.2230079174042


## Removing 10% of edges

In [9]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[]
[872, 1281, 12094, 17093, 17794, 19147, 20923, 25202, 31702, 35877, 52411, 53625, 54582, 59348, 64930, 72940, 78295, 84477, 98008, 98628, 100193, 102355, 105318, 105730, 115926, 120097, 140864, 151832, 152086, 155610, 158672, 161354, 161689, 163550, 163990, 164599, 172329, 172765, 175133, 175799, 178642, 180755, 181235, 181714, 181865, 185567, 190453, 192783, 204189, 204604, 210870, 213966, 224289, 225119, 228300, 234603, 241596, 243294, 251658, 252915, 253992, 257281, 259633, 276060, 278619, 280935]
[96493]
[]
[6545, 14395, 59745]
[6540, 59742]
[45366]
[]
[174822]
[6536, 118572]
Time Needed:  12.777822971343994


In [10]:
len(new_edges)

281903

In [11]:
len(lines)

281903

### Creating the Teleport Vector and Running PageRank

In [16]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend - time.time()
print("Time Needed: ", tend-tstart)
#print(teleport)

Time Needed:  -44.381630182266235


In [14]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  3.9663918018341064


In [15]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

1.2583475408363767e-11
4.382577776909926e-21
6.640269358954428e-23
1.2583475408363767e-11
1.9920808076863267e-22
1.3280538717908857e-22
6.640269358954428e-23
1.2583475408363767e-11
6.640269358954428e-23
1.3280538717908857e-22
Iterations:  237


## Removing 20% of edges

In [18]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[]
[872, 12094, 17093, 17794, 20923, 31702, 35877, 52411, 54582, 59348, 64930, 78295, 84477, 98008, 100193, 102355, 105318, 105730, 120097, 151832, 152086, 155610, 158672, 161689, 163550, 163990, 172329, 172765, 175133, 175799, 180755, 181714, 181865, 192783, 204189, 204604, 213966, 224289, 225119, 228300, 234603, 243294, 251658, 257281, 259633, 276060, 278619]
[]
[]
[6545, 14395, 59745]
[6540]
[45366]
[]
[]
[6536, 118572]
Time Needed:  18.978238821029663


In [19]:
len(new_edges)

281903

In [20]:
len(lines)

281903

### Creating Teleport Vector and Running PageRank

In [21]:
#Creating Teleport vector
tstart = time.time()

teleport = tpVector(new_edges)

tend = time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  0.13065147399902344


In [22]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  2.6578898429870605


In [23]:
for i in range(10):
    print(ranking[i])
print("Iterations: ", iterations)

1.2583475408163831e-11
2.6490721969729023e-15
1.2583475408163831e-11
1.2583475408163831e-11
1.6908971470039776e-16
5.636323823346594e-17
5.636323823346594e-17
1.2583475408163831e-11
1.2583475408163831e-11
1.1272647646693187e-16
Iterations:  153


# Graph 3: India

In [3]:
# Set Up the Graph Name here

graphName = "INDIA.txt"

## Data Processing

In [4]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  1382908
Number of Edges:  16917053


## Creating the Edge List

In [5]:
#Creating the edge list
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
for i in range(10):
  print(edge_list[i])

print("Time Needed: ", tend-tstart)

[1, 3, 161227, 378785, 378804]
[1, 3, 378804]
[4, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[184, 185, 187, 188, 189, 1347902]
Time Needed:  95.07967686653137


## Removing 10% of edges

In [6]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[1, 3, 161227, 378785, 378804]
[1, 3]
[4, 378804]
[5, 6, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 9, 378804]
[4, 5, 6, 7, 8, 9, 10]
[4, 5, 7, 8, 9, 10, 378804]
[4, 5, 6, 7, 8, 9, 10, 378804]
[4, 6, 7, 8, 9, 10, 378804]
[184, 185, 187, 188, 189, 1347902]
Time Needed:  73.88367867469788


In [7]:
len(new_edges)

1382908

In [8]:
len(lines)

1382908

### Creating the Teleport Vector and Running PageRank

In [9]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend = time.time()
print("Time Needed: ", tend-tstart)
#print(teleport)

Time Needed:  -19.509415864944458


In [10]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

  temp_v[idx] += v[i-1]


Time Needed:  44.00921368598938


  while (np.linalg.norm(v-temp_v) > error):


In [11]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

3.6155695100469446e-06
4.338683412056334e-06
1.4462278040187777e-06
5.061797314065723e-06
8.677366824112667e-06
1.7354733648225335e-05
1.7354733648225335e-05
5.134108704266661e-05
9.328169335921118e-05
4.338683412056334e-06
Iterations:  0


## Removing 20% of edges

In [12]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[3, 378804]
[1, 3]
[4, 378804]
[5, 6, 7, 8, 9, 10, 378804]
[5, 6, 9]
[4, 5, 6, 7, 8, 10]
[4, 5, 8, 9, 10, 378804]
[4, 6, 7, 8, 10, 378804]
[4, 6, 7, 8, 10, 378804]
[184, 185, 187, 188, 189, 1347902]
Time Needed:  65.5746796131134


In [13]:
len(new_edges)

1382908

In [14]:
len(lines)

1382908

### Creating Teleport Vector and Running PageRank

In [15]:
#Creating Teleport vector
tstart = time.time()

teleport = tpVector(new_edges)

tend = time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  1.3414127826690674


In [16]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  412.32553267478943


In [17]:
for i in range(10):
    print(ranking[i])
print("Iterations: ", iterations)

1.6360336300110042e-302
2.4540504450164895e-302
1.6360336300110042e-302
5.726117705038494e-302
2.4540504450164895e-302
1.1452235410076989e-301
1.1452235410076989e-301
3.1084638970209123e-301
6.135126112541241e-301
4.908100890032979e-302
Iterations:  4193


# Graph 4: Stanford Berkeley

In [41]:
# Set Up the Graph Name here

graphName = "Stanford_BerkeleyV2.txt"

## Data Processing

In [42]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  68344
Number of Edges:  7583376


## Creating the Edge List

In [43]:
#Creating the edge list
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
for i in range(10):
  print(edge_list[i])

print("Time Needed: ", tend-tstart)

[3, 7, 16, 18, 20, 48819, 48824, 48894, 48900, 48912, 48916, 50361, 64694, 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65094, 107258, 108260, 108346, 160844, 160919, 164450, 164543, 172459, 255096, 255111, 255168, 255196, 255205, 255463, 255464, 255655, 255675, 256797, 257160, 257187, 257328, 257406, 257407, 257409, 257410, 257411, 257412, 257414, 257415, 257416, 257417, 257419, 257420, 257421, 257422, 257423, 257426, 257428, 257430, 257431, 257432, 257435, 257437, 257446, 257447, 257448, 257449, 257450, 257453, 257455, 257457, 257458, 257460, 257462, 257464, 257469, 257470, 257471, 257474, 257476, 257479, 257480, 257489, 257491, 257498, 257500, 257502, 257512, 257514, 257516, 257526, 257532, 257536, 257540, 257545, 257547, 257551, 257553, 257557, 257559, 257561, 257563, 257566, 257568, 257569, 257577, 257581, 257591, 257594, 257595, 257596, 257598, 257602, 257605, 257606, 257608, 257609, 257611, 257613, 257615, 257617, 257619, 257620, 257630, 257634, 257638, 257640, 257643, 25764

## Removing 10% of edges

In [44]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[3, 7, 16, 18, 20, 48819, 48824, 48894, 48900, 48912, 48916, 50361, 64694, 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65094, 107258, 108260, 108346, 160844, 160919, 164450, 164543, 172459, 255096, 255111, 255168, 255196, 255205, 255463, 255464, 255655, 255675, 256797, 257160, 257187, 257328, 257406, 257407, 257409, 257410, 257411, 257412, 257414, 257415, 257416, 257417, 257419, 257420, 257421, 257422, 257423, 257426, 257428, 257430, 257431, 257432, 257435, 257437, 257446, 257447, 257448, 257449, 257450, 257453, 257455, 257457, 257458, 257460, 257462, 257464, 257469, 257470, 257471, 257474, 257476, 257479, 257480, 257489, 257491, 257498, 257500, 257502, 257512, 257514, 257516, 257526, 257532, 257536, 257540, 257545, 257547, 257551, 257553, 257557, 257559, 257561, 257563, 257566, 257568, 257569, 257577, 257581, 257591, 257594, 257595, 257596, 257598, 257602, 257605, 257606, 257608, 257609, 257611, 257613, 257615, 257617, 257619, 257620, 257630, 257634, 257638, 257640, 257643, 25764

In [45]:
len(new_edges)

683446

In [46]:
len(lines)

683446

### Creating the Teleport Vector and Running PageRank

In [47]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend - time.time()
print("Time Needed: ", tend-tstart)
#print(teleport)

Time Needed:  -0.6095716953277588


In [48]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  73.35342216491699


In [49]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

8.470662276981845e-276
8.581852354603357e-276
2.0216377749359835e-278
1.0108188874679918e-278
8.531311410229978e-276
1.0108188874679918e-278
2.5785989819308868e-275
1.7032298253835882e-275
6.847287143708249e-275
2.5785989819308868e-275
Iterations:  3856


## Removing 20% of edges

In [50]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[3, 7, 16, 18, 20, 48819, 48824, 48894, 48900, 48912, 48916, 50361, 64694, 65087, 65088, 65089, 65090, 65091, 65092, 65093, 65094, 107258, 108260, 108346, 160844, 160919, 164450, 164543, 172459, 255096, 255111, 255168, 255196, 255205, 255463, 255464, 255655, 255675, 256797, 257160, 257187, 257328, 257406, 257407, 257409, 257410, 257411, 257412, 257414, 257415, 257416, 257417, 257419, 257420, 257421, 257422, 257423, 257426, 257428, 257430, 257431, 257432, 257435, 257437, 257446, 257447, 257448, 257449, 257450, 257453, 257455, 257457, 257458, 257460, 257462, 257464, 257469, 257470, 257471, 257474, 257476, 257479, 257480, 257489, 257491, 257498, 257500, 257502, 257512, 257514, 257516, 257526, 257532, 257536, 257540, 257545, 257547, 257551, 257553, 257557, 257559, 257561, 257563, 257566, 257568, 257569, 257577, 257581, 257591, 257594, 257595, 257596, 257598, 257602, 257605, 257606, 257608, 257609, 257611, 257613, 257615, 257617, 257619, 257620, 257630, 257634, 257638, 257640, 257643, 25764

In [51]:
len(new_edges)

683446

In [52]:
len(lines)

683446

### Creating Teleport Vector and Running PageRank

In [53]:
#Creating Teleport vector
tstart = time.time()

teleport = tpVector(new_edges)

tend = time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  0.2712745666503906


In [54]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  45.12209749221802


In [55]:
for i in range(10):
    print(ranking[i])
print("Iterations: ", iterations)

1.9550666729850834e-145
1.9783968242140182e-145
4.6660302457877384e-148
2.3330151228938692e-148
1.9690647637224465e-145
2.3330151228938692e-148
3.982456814779885e-145
3.931130482076212e-145
7.932251417839254e-145
2.140876366198558e-12
Iterations:  2009


# Graph 5: Wikipedia

In [56]:
# Set Up the Graph Name here

graphName = "wikipedia.txt"

## Data Processing

In [57]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  1634989
Number of Edges:  19753078


## Creating the Edge List

In [58]:
#Creating the edge list
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
for i in range(10):
  print(edge_list[i])

print("Time Needed: ", tend-tstart)

[87]
[143]
[161]
[521]
[332]
[]
[739]
[463, 549, 1400, 1421, 1574, 2137, 2162, 3169, 3208, 3390, 3971, 4496, 4892, 5015, 5302, 5320, 5354, 5910, 6045, 7178, 7618, 7772, 8275, 8325, 8583, 8687, 8715, 8760, 8772, 8960, 9261, 9281, 9291, 9603, 9868, 9960, 10377, 10595, 10659, 11032, 11452, 11732, 11793, 11916, 12108, 13188, 13217, 13816, 13838, 13883, 14098, 14422, 14609, 14859, 15720, 15773, 16759, 17531, 17660, 18298, 18301, 18717, 18824, 19884, 19896, 20451, 20576, 20722, 20750, 20798, 20984, 21305, 21376, 21378, 21386, 22327, 23180, 23506, 23576, 23851, 24334, 24471, 24536, 24751, 25377, 25378, 25406, 25624, 26102, 26105, 26118, 26192, 26304, 26353, 26468, 26762, 26823, 26873, 27370, 27377, 27378, 27390, 27391, 27476, 27487, 27494, 27519, 27534, 27544, 27586, 27587, 27605, 27664, 28229, 29516, 29590, 29612, 29638, 29703, 29768, 30408, 31469, 33832, 33962, 35393, 35526, 36293, 36739, 37855, 38178, 38188, 38698, 39405, 39409, 39583, 40134, 40140, 40459, 42237, 42486, 42656, 43714, 45311

## Removing 10% of edges

In [59]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[87]
[143]
[161]
[521]
[332]
[]
[739]
[463, 549, 1400, 1574, 2137, 2162, 3169, 3208, 3390, 3971, 4496, 4892, 5015, 5302, 5354, 5910, 6045, 7618, 7772, 8275, 8583, 8687, 8715, 8760, 8772, 8960, 9261, 9281, 9291, 9603, 9868, 9960, 10377, 10595, 10659, 11032, 11452, 11732, 11793, 11916, 12108, 13188, 13217, 13816, 13838, 13883, 14098, 14422, 14609, 14859, 15720, 15773, 16759, 17531, 17660, 18298, 18301, 18824, 19884, 19896, 20451, 20722, 20798, 20984, 21305, 21376, 21386, 22327, 23180, 23506, 23576, 23851, 24471, 24536, 24751, 25377, 25378, 25624, 26102, 26105, 26118, 26192, 26304, 26353, 26468, 26762, 26823, 26873, 27370, 27377, 27378, 27390, 27391, 27487, 27494, 27519, 27534, 27544, 27587, 27605, 27664, 29516, 29590, 29638, 29703, 29768, 30408, 31469, 33962, 35393, 35526, 36293, 36739, 37855, 38178, 38188, 38698, 39409, 39583, 40134, 40459, 42237, 42486, 42656, 43714, 45311, 45563, 45641, 45878, 46220, 47158, 47874, 52280, 53274, 55041, 58119, 59029, 59783, 60640, 61308, 61772, 66774, 6

In [60]:
len(new_edges)

1634989

In [61]:
len(lines)

1634989

### Creating the Teleport Vector and Running PageRank

In [62]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend - time.time()
print("Time Needed: ", tend-tstart)
#print(teleport)

Time Needed:  -0.6863946914672852


In [63]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  122.53067326545715


In [64]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

4.709922975579043e-164
4.709922975579043e-164
4.709922975579043e-164
4.709922975579043e-164
4.709922975579043e-164
3.740850305504716e-13
4.709922975579043e-164
1.4694959683806618e-161
4.709922975579043e-164
4.709922975579043e-164
Iterations:  2226


## Removing 20% of edges

In [65]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

[87]
[143]
[161]
[521]
[332]
[]
[739]
[463, 549, 1574, 2137, 3169, 3208, 3390, 3971, 4496, 4892, 5015, 5302, 5354, 5910, 6045, 7618, 7772, 8275, 8583, 8687, 8715, 8760, 8960, 9281, 9291, 9603, 9868, 9960, 10595, 10659, 11032, 11452, 11793, 11916, 12108, 13188, 13217, 13816, 13838, 13883, 14098, 14422, 14609, 15720, 15773, 16759, 17660, 18298, 18301, 18824, 19884, 19896, 20451, 20722, 20798, 20984, 21305, 21386, 23180, 23506, 23576, 23851, 24471, 24536, 24751, 25377, 25378, 25624, 26102, 26105, 26118, 26192, 26304, 26353, 26468, 26762, 26873, 27377, 27378, 27390, 27391, 27487, 27494, 27519, 27544, 27587, 27605, 29516, 29590, 29703, 29768, 31469, 33962, 35393, 35526, 36293, 36739, 37855, 38178, 38188, 38698, 39409, 39583, 40134, 40459, 42237, 42486, 43714, 45311, 45563, 45641, 45878, 47158, 47874, 52280, 55041, 58119, 59029, 59783, 60640, 61772, 67615, 69121, 69302, 71929, 71932, 74728, 74730, 75462, 75479, 78081, 79588, 80287, 80369, 86440, 100251, 115277, 116988, 116994, 118039, 121688

In [66]:
len(new_edges)

1634989

In [67]:
len(lines)

1634989

### Creating Teleport Vector and Running PageRank

In [68]:
#Creating Teleport vector
tstart = time.time()

teleport = tpVector(new_edges)

tend = time.time()
print("Time Needed: ", tend-tstart)

Time Needed:  0.6502556800842285


In [69]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

Time Needed:  74.85278344154358


In [70]:
for i in range(10):
    print(ranking[i])
print("Iterations: ", iterations)

3.5440082089495185e-106
3.5440082089495185e-106
3.5440082089495185e-106
3.5440082089495185e-106
3.5440082089495185e-106
3.740850305504716e-13
3.5440082089495185e-106
9.356181671626759e-104
3.740850305504716e-13
3.5440082089495185e-106
Iterations:  1406


# Graph 6: EDU

In [72]:
# Set Up the Graph Name here

graphName = "wb-edu.txt"

## Data Processing

In [73]:
# Data Processing

lines, rows = readText(directory+graphName)
lines, vertices,total_edges = rmRed(lines)

lines = convertStrNum(lines)
# print(lines)

print("Number of Nodes: ", vertices)
print("Number of Edges: ", total_edges)

Number of Nodes:  9845725
Number of Edges:  57156537


## Creating the Edge List

In [74]:
#Creating the edge list
tstart = time.time()
edge_list = []

for line in lines:
  edge = line
  edge.pop(0)
  edge.pop(0)
  
  for number in edge:
    if not isinstance(number, int):
      edge.remove(number)
  
  edge_list.append(edge)

tend = time.time()
for i in range(10):
  print(edge_list[i])

print("Time Needed: ", tend-tstart)

[]
[]
[14]
[4, 5, 10, 12, 13, 16, 17, 20, 21, 23, 26, 27, 28, 33, 34, 35, 41, 42, 89, 93, 94, 97, 102]
[]
[4, 6, 9, 39, 40, 102]
[4, 74, 77]
[4, 39]
[4, 39, 61, 99, 105]
[4, 81]
Time Needed:  1607.5138075351715


## Removing 10% of edges

In [None]:
#Removing random edges
tstart = time.time()

threshold = 0.1
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()

for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

In [None]:
len(new_edges)

In [None]:
len(lines)

### Creating the Teleport Vector and Running PageRank

In [None]:
#Creating Teleport vector
tstart = time.time()
teleport = tpVector(new_edges)
tend - time.time()
print("Time Needed: ", tend-tstart)
#print(teleport)

In [None]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

In [None]:
for i in range(10):
    print(ranking[i])

print("Iterations: ", iterations)

## Removing 20% of edges

In [None]:
#Removing random edges
tstart = time.time()
threshold = 0.2
counter = 0
new_edges = edge_list
while counter < total_edges*threshold:
  idx = randSelect(len(new_edges))
  for neighbor in new_edges[idx]:
    x = randEdge(threshold)
    if x:
      new_edges[idx].remove(neighbor)
      counter+=1
  
new_edges_num = 0

for item in edge_list:
  new_edges_num+=len(item)

tend = time.time()
for i in range(10):
  print(new_edges[i])

print("Time Needed: ", tend-tstart)

In [None]:
len(new_edges)

In [None]:
len(lines)

### Creating Teleport Vector and Running PageRank

In [None]:
#Creating Teleport vector
tstart = time.time()

teleport = tpVector(new_edges)

tend = time.time()
print("Time Needed: ", tend-tstart)

In [None]:
tstart = time.time()
ranking,iterations = pagerank(new_edges, 0.85, 10**-6, teleport)
tend = time.time()

print("Time Needed: ", tend-tstart)

In [None]:
for i in range(10):
    print(ranking[i])
print("Iterations: ", iterations)