# ML Challenge

In this Jupyter Notebook all the functions from the ML Challenge will be tested and their results compared with networkx's functions' results

In [83]:
%load_ext autoreload
%autoreload 2

from number_of_vertices import number_of_vertices
from number_of_edges import number_of_edges
from test_graph_connection import test_graph_connection
from subgraphs import subgraphs
from degree_count import degree_count
from get_isolates import get_isolates
from eigenvector_centrality import eigenvector_centrality
from betweenness_centrality import betweenness_centrality
from normalize_centrality import normalize_centrality
from rank_the_nodes import rank_the_nodes
import json
import networkx as nx

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [84]:
f = open('challenge_graph.json')
data = json.load(f)
graph = nx.node_link_graph(data)

Calculating the number of vertices and edges:

In [85]:
print(f'The number of vertices in the graph is: {number_of_vertices(data)}')

The number of vertices in the graph is: 3506


In [86]:
print(f"networkx's function result is: {graph.number_of_nodes()}")

networkx's function result is: 3506


In [87]:
print(f'The number of edges is: {number_of_edges(data)}')

The number of edges is: 3756


In [88]:
print(f"networkx's function result is: {graph.number_of_edges()}")

networkx's function result is: 3756


Calculating the degree of nodes:

In [89]:
print(f"Degrees of nodes are: {degree_count(data)}")

Degrees of nodes are: {'48633953191': 45, '1438155667': 50, '37754729913': 38, '45694946584': 50, '8048097398': 50, '246800250': 50, '18866083059': 50, '27449814345': 50, '12762152189': 50, '46105172901': 50, '5693320415': 50, '429008541': 90, '3104137586': 50, '55714809789': 50, '382387161': 50, '1957459102': 50, '35984678': 70, '555363279': 96, '14766909': 93, '854242655': 70, '7176701743': 70, '9245623768': 70, '7644102419': 94, '2859517': 70, '1373705266': 111, '195738888': 50, '1187523512': 50, '1023340677': 51, '40507939065': 70, '45085100035': 70, '1660812944': 112, '447825325': 90, '35094604': 70, '30028715978': 50, '405387573': 117, '3145709864': 50, '356793922': 50, '180647838': 115, '267735244': 100, '1166106601': 71, '819819023': 50, '2839121': 98, '231040053': 70, '20934432266': 71, '3425534353': 72, '13817938': 124, '1281592476': 70, '447997840': 98, '37704595': 125, '5550915984': 71, '1407455621': 50, '254197705': 50, '520781652': 72, '24970812': 100, '56010353': 1, '104

In [90]:
print(f"(networkx) Degrees of nodes are: {nx.degree(graph)}")

(networkx) Degrees of nodes are: [('48633953191', 45), ('1438155667', 50), ('37754729913', 38), ('45694946584', 50), ('8048097398', 50), ('246800250', 50), ('18866083059', 50), ('27449814345', 50), ('12762152189', 50), ('46105172901', 50), ('5693320415', 50), ('429008541', 90), ('3104137586', 50), ('55714809789', 50), ('382387161', 50), ('1957459102', 50), ('35984678', 70), ('555363279', 96), ('14766909', 93), ('854242655', 70), ('7176701743', 70), ('9245623768', 70), ('7644102419', 94), ('2859517', 70), ('1373705266', 111), ('195738888', 50), ('1187523512', 50), ('1023340677', 51), ('40507939065', 70), ('45085100035', 70), ('1660812944', 112), ('447825325', 90), ('35094604', 70), ('30028715978', 50), ('405387573', 117), ('3145709864', 50), ('356793922', 50), ('180647838', 115), ('267735244', 100), ('1166106601', 71), ('819819023', 50), ('2839121', 98), ('231040053', 70), ('20934432266', 71), ('3425534353', 72), ('13817938', 124), ('1281592476', 70), ('447997840', 98), ('37704595', 125

Testing if the graph is connected:

In [91]:
print(f"Graph is connected? {test_graph_connection(data)}")

Graph is connected? False


In [92]:
print(f"(networkx) Graph is connected? {nx.is_connected(graph)}")

(networkx) Graph is connected? False


Counting the subgraphs of G:

In [93]:
print(f"The number of subgraphs of graph G is {len(subgraphs(data))} and the subgraphs are: {subgraphs(data)}")

The number of subgraphs of graph G is 4 and the subgraphs are: {1: ['48633953191', '296859123', '362979739', '49329608516', '1566833007', '2101373890', '2173792662', '2927872150', '1083360063', '1473766388', '44126985657', '632575834', '4411970070', '3062623212', '1785778110', '6763621476', '1429894784', '16630714902', '7286267339', '25724532', '44576752321', '272374612', '5994980118', '224617992', '314430043', '310864998', '8035435930', '498482296', '224940874', '207966782', '591543501', '3047543213', '367843285', '381018429', '1440691802', '3918232748', '221222696', '1815894041', '5353089060', '2846588', '2935685012', '1695753418', '1782523938', '1461679662', '10446954062', '56010353'], 2: ['1438155667', '300071671', '7041811726', '308166344', '1394249115', '355436416', '2166036729', '964258450', '2176773041', '191710512', '2309488352', '24970812', '693961749', '1828386722', '23247946', '22459209', '4320866722', '1548824729', '5314642756', '464843428', '2109315610', '1410204266', '10

In [94]:
print(f"(networkx) The number of subgraphs of graph G is {len(list(nx.connected_components(graph)))}")

(networkx) The number of subgraphs of graph G is 4


Calculating the number of isolates: 

In [95]:
print(f"Isolates from the graph are: {get_isolates(data)}")

Isolates from the graph are: []


In [96]:
print(f"(networkx) The number of isolates from the graph is: {nx.number_of_isolates(graph)}")

(networkx) The number of isolates from the graph is: 0


There are no isolates in given graph

Eigenvector centrality:

In [97]:
print(f"Eigenvector centrality values for nodes are: {eigenvector_centrality(data, 150)}")

Eigenvector centrality values for nodes are: {'48633953191': 1.7294641939875167e-45, '1438155667': 0.06637113194033184, '37754729913': 0.015242422209333176, '45694946584': 0.05442421641527433, '8048097398': 0.09542698989208023, '246800250': 0.03898543191507461, '18866083059': 0.00704821699094921, '27449814345': 0.0947584379648804, '12762152189': 0.0015764176943924774, '46105172901': 0.0014169305797555932, '5693320415': 0.007595158549937399, '429008541': 0.07407094078469241, '3104137586': 0.050367102701793585, '55714809789': 0.02031633616340929, '382387161': 0.03800604911102736, '1957459102': 0.05092887757943976, '35984678': 0.014250510406427778, '555363279': 0.0007874205039299806, '14766909': 0.08667175658226967, '854242655': 0.07966004324564654, '7176701743': 0.016165745487278626, '9245623768': 0.07453374680882954, '7644102419': 0.05853784578163433, '2859517': 0.00235554164158189, '1373705266': 0.11756576506805627, '195738888': 0.1681040719442207, '1187523512': 6.542777007257086e-06, 

In [98]:
G = nx.DiGraph(graph)
print(f"(networkx) Eigenvector centrality values for nodes are: {(nx.eigenvector_centrality(G,150))}")

(networkx) Eigenvector centrality values for nodes are: {'48633953191': 8.45025891996694e-14, '1438155667': 0.06644079835210721, '37754729913': 0.015249762523853402, '45694946584': 0.054467440227635486, '8048097398': 0.09542346818089233, '246800250': 0.03896403525711137, '18866083059': 0.0070616658481269185, '27449814345': 0.0947545465864163, '12762152189': 0.0015771101860353213, '46105172901': 0.0014190921934593636, '5693320415': 0.007609497817650112, '429008541': 0.07402112029854296, '3104137586': 0.050403207159150455, '55714809789': 0.020330403226872257, '382387161': 0.038032229595251016, '1957459102': 0.050961667340617474, '35984678': 0.014270487678166098, '555363279': 0.000790280857386111, '14766909': 0.08660846209876298, '854242655': 0.07972658845564509, '7176701743': 0.016190991747508387, '9245623768': 0.07460838147973134, '7644102419': 0.05859796292952146, '2859517': 0.0023553914970877183, '1373705266': 0.11759446312170632, '195738888': 0.16807810734763634, '1187523512': 6.5880

We can notice slight differences between output values of two functions, but those differences only occur where the number is very small (e.g. less than 10^(-15)), so we can say that the difference is insignificant

Betweenness centrality:

In [99]:
print(f"Betweenness centrality values for nodes are: {betweenness_centrality(data)}")

Betweenness centrality values for nodes are: {'48633953191': 0.0001612178297148887, '1438155667': 0.028328765398611525, '37754729913': 0.020444474803646185, '45694946584': 0.028430899154982266, '8048097398': 0.023598488397275984, '246800250': 0.03732077852769208, '18866083059': 0.02658758850696005, '27449814345': 0.038343896866922134, '12762152189': 0.02602395949435709, '46105172901': 0.02658758850696005, '5693320415': 0.026097147267614196, '429008541': 0.04706036040171879, '3104137586': 0.024962726883715595, '55714809789': 0.022620091594703875, '382387161': 0.026317189855768915, '1957459102': 0.03024476058114138, '35984678': 0.033601396607002544, '555363279': 0.0773365186068174, '14766909': 0.05474932168036143, '854242655': 0.03679120015731904, '7176701743': 0.034443804998876455, '9245623768': 0.05541466767711154, '7644102419': 0.12405981692482584, '2859517': 0.03640894533901632, '1373705266': 0.06973811276423432, '195738888': 0.03442244648056488, '1187523512': 0.02658758850696005, '1

In [100]:
print(f"(networkx) Betweenness centrality values for nodes are: {nx.betweenness_centrality(graph)}")

(networkx) Betweenness centrality values for nodes are: {'48633953191': 0.0001612178297148887, '1438155667': 0.028328765398611525, '37754729913': 0.020444474803646185, '45694946584': 0.028430899154982266, '8048097398': 0.023598488397275984, '246800250': 0.037320778527692076, '18866083059': 0.02658758850696005, '27449814345': 0.03834389686692213, '12762152189': 0.02602395949435709, '46105172901': 0.02658758850696005, '5693320415': 0.026097147267614196, '429008541': 0.04706036040171879, '3104137586': 0.024962726883715595, '55714809789': 0.02262009159470387, '382387161': 0.026317189855768912, '1957459102': 0.03024476058114138, '35984678': 0.03360139660700254, '555363279': 0.0773365186068174, '14766909': 0.05474932168036143, '854242655': 0.03679120015731904, '7176701743': 0.034443804998876455, '9245623768': 0.055414667677111534, '7644102419': 0.12405981692482584, '2859517': 0.03640894533901631, '1373705266': 0.06973811276423432, '195738888': 0.03442244648056488, '1187523512': 0.02658758850

Ranking the nodes:
(parameters w1, w2, w3 and w4 are equal to give each metric the same importance)

In [101]:
print(rank_the_nodes(data, 0.25, 0.25, 0.25, 0.25))

[{'id': '447825325', 'q1': 0.005791892864283673, 'q2': 0.05995780178955409, 'weighted_sum': 0.47795072333231536}, {'id': '37704595', 'q1': 0.0022785713063863573, 'q2': 0.009389270078642119, 'weighted_sum': 0.3333505069154924}, {'id': '13817938', 'q1': 0.01838985474518544, 'q2': 0.00358045421965279, 'weighted_sum': 0.30543357415287686}, {'id': '276192188', 'q1': 1.0, 'q2': 0.002493530617258193, 'weighted_sum': 0.2561561419785633}, {'id': '391216093', 'q1': 0.0028785600974083423, 'q2': 1.0, 'weighted_sum': 0.2552371761565435}, {'id': '1084945', 'q1': 0.8178815087636941, 'q2': 0.0011508602848883968, 'weighted_sum': 0.22391253315562742}, {'id': '561009264', 'q1': 0.7276994317753215, 'q2': 0.0008715579350470606, 'weighted_sum': 0.20408690993905737}, {'id': '48804988', 'q1': 0.5623970916267452, 'q2': 0.0013796863064451542, 'weighted_sum': 0.16009863537677937}, {'id': '192815961', 'q1': 0.5650484676129239, 'q2': 0.0016758140990480164, 'weighted_sum': 0.158722131330674}, {'id': '20934432266', 