In [None]:
# s = setup(graphs, blocks, compares)
# mc = MatchClassifier(s)
# model = mc.fit()
# pred_matches = model.predict(node_pairs)
# integrated_graph = integrate_network(pred_matches, graph)
# integrate_networks(setup)

In [1]:
import sys
sys.path.append("C:/Users/jnevin/Documents/GitHub/networkdiffusionanalyser")

from netdiffanalyse.datahandling.dataintegration import FeatureSetup, MatchClassifierFit, NetworkIntegration
from netdiffanalyse.diffusionmodel.diffusionmodel import CustomDiffusionModel, InitialisedDiffusionModel, RunDiffusionModel
from netdiffanalyse.analyser.networkanalysis import ResultsAnalyser
from bokeh.io import output_notebook, show
from ndlib.viz.bokeh.DiffusionTrend import DiffusionTrend

In [2]:
import pandas as pd
import numpy as np
import networkx as nx

test_df = pd.read_csv('test_df.csv')
test_df.index = test_df.rec_id
test_df = test_df.drop(columns = ['rec_id'])

test_matches = np.load('test_matches.npy', allow_pickle = True)
test_matches = pd.MultiIndex.from_tuples(test_matches)

test_graph = nx.read_gml('test_graph.gml')

In [3]:
test_df

Unnamed: 0_level_0,given_name,surname,street_number,address_1,address_2,suburb,postcode,state,date_of_birth,soc_sec_id
rec_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
rec-223-org,,waller,6.0,tullaroop street,willaroo,st james,4011,wa,19081209.0,6988048
rec-122-org,lachlan,berry,69.0,giblin street,killarney,bittern,4814,qld,19990219.0,7364009
rec-373-org,deakin,sondergeld,48.0,goldfinch circuit,kooltuo,canterbury,2776,vic,19600210.0,2635962
rec-10-dup-0,kayla,harrington,,maltby circuit,coaling,coolaroo,3465,nsw,19150612.0,9004242
rec-227-org,luke,purdon,23.0,ramsay place,mirani,garbutt,2260,vic,19831024.0,8099933
...,...,...,...,...,...,...,...,...,...,...
rec-188-dup-0,stephanie,geu,28.0,bainton crescent,masonic memorial village,maryborough,2541,sa,19421008.0,3997529
rec-334-dup-0,nicholas,,289.0,britten-jonues drive,jabaru court,paddington,2000,vic,19970422.0,5062738
rec-469-dup-0,lachlan,katsiavos,29.0,paul coe cdrescent,,casual,2913,nsw,19380406.0,4112327
rec-350-dup-0,monique,gergely,21.0,harwoos court,hyberni a park,sherwood,2207,nsw,19790807.0,7375144


In [3]:
# data integration

In [4]:
test_blocks = {'Block': [['given_name', 'given_name']]}
test_compares = {'Exact':  [['given_name', 'given_name'], ['date_of_birth', 'date_of_birth'],
                          ['suburb', 'suburb'], ['state', 'state']],
                'String': [['surname', 'surname', 'jarowinkler', 0.85], ['address_1', 'address_1', 'levenshtein', 0.85]]}

In [5]:
test_feature_setup = FeatureSetup(test_blocks, test_compares, test_df)

In [6]:
len(test_feature_setup.candidate_links) # 2082

2082

In [7]:
test_features = test_feature_setup.calculate_features()

In [8]:
test_features.sum(axis=1).value_counts().sort_index(ascending=False) # 142, 145, 30, 9, 376, 1380

6.0     142
5.0     145
4.0      30
3.0       9
2.0     376
1.0    1380
dtype: int64

In [9]:
test_classifier = MatchClassifierFit('NaiveBayesClassifier', test_features, test_matches)
test_classifier.fit_model()

In [10]:
test_classifier.pred_matches(test_features)

MultiIndex([('rec-122-dup-0',   'rec-122-org'),
            (  'rec-183-org', 'rec-183-dup-0'),
            ('rec-248-dup-0',   'rec-248-org'),
            ('rec-469-dup-0',   'rec-469-org'),
            ('rec-373-dup-0',   'rec-373-org'),
            (   'rec-10-org',  'rec-10-dup-0'),
            ('rec-342-dup-0',   'rec-342-org'),
            (  'rec-397-org', 'rec-397-dup-0'),
            (  'rec-472-org', 'rec-472-dup-0'),
            (  'rec-330-org', 'rec-330-dup-0'),
            ...
            (  'rec-5-dup-0',     'rec-5-org'),
            ('rec-407-dup-0',   'rec-407-org'),
            ('rec-367-dup-0',   'rec-367-org'),
            ('rec-103-dup-0',   'rec-103-org'),
            ('rec-195-dup-0',   'rec-195-org'),
            ('rec-184-dup-0',   'rec-184-org'),
            (  'rec-252-org', 'rec-252-dup-0'),
            ( 'rec-48-dup-0',    'rec-48-org'),
            ('rec-298-dup-0',   'rec-298-org'),
            (  'rec-282-org', 'rec-282-dup-0')],
           names=['rec_

In [11]:
test_network_integrator = NetworkIntegration(test_graph, test_classifier.pred_matches(test_feature_setup.features),
                                             'walktrap_integration')

In [12]:
test_network_integrator.integrate_network()

In [13]:
org_graph = test_network_integrator.graph
adj_graph = test_network_integrator.adj_graph

In [14]:
len(adj_graph.nodes()) # 646

646

In [15]:
len(org_graph.nodes()) # 922

922

In [17]:
# diffusion model

In [16]:
model_name = 'sir'
statuses = ['Susceptible', 'Infected', 'Removed']
compartments = {'NodeStochastic': {'c1': [0.02, 'Infected'], 'c2': [0.01]}}
rules = [["Susceptible", "Infected", "c1"], ["Infected", "Removed", "c2"]]
parameters = [['fraction_infected', 0.1]]

In [17]:
custom_diffusion_model = CustomDiffusionModel(model_name, statuses, compartments,
                                             rules, parameters)

In [18]:
g = nx.erdos_renyi_graph(1000, 0.1)
initialised_diffusion_model = InitialisedDiffusionModel(g, custom_diffusion_model)

In [19]:
g = test_network_integrator.adj_graph
initialised_diffusion_model = InitialisedDiffusionModel(g, custom_diffusion_model)

In [20]:
initialised_diffusion_model.initialise_model()
initialised_diffusion_model.add_statuses()
initialised_diffusion_model.add_compartments()
initialised_diffusion_model.add_rules()
initialised_diffusion_model.set_initial_model_status()

In [21]:
run_diffusion_model = RunDiffusionModel(initialised_diffusion_model, [10, 200, None, 4])

In [22]:
run_diffusion_model.run_simulations()

In [23]:
from ndlib.viz.mpl.DiffusionTrend import DiffusionTrend

viz = DiffusionTrend(initialised_diffusion_model.model, run_diffusion_model.trends)
viz.plot("diffusion.pdf", percentile=90)

no display found. Using non-interactive Agg backend


In [24]:
results_analyser = ResultsAnalyser(run_diffusion_model)

In [47]:
results_analyser.calculate_graph_properties()

In [25]:
run_diffusion_model.trends

[{'trends': {'node_count': {0: [582,
     577,
     574,
     573,
     567,
     564,
     561,
     557,
     555,
     549,
     541,
     537,
     535,
     532,
     529,
     528,
     526,
     525,
     521,
     519,
     516,
     511,
     510,
     508,
     506,
     502,
     501,
     498,
     495,
     493,
     489,
     486,
     479,
     478,
     477,
     476,
     470,
     468,
     466,
     463,
     458,
     457,
     454,
     452,
     450,
     449,
     447,
     445,
     443,
     438,
     435,
     428,
     427,
     421,
     417,
     413,
     411,
     408,
     404,
     401,
     396,
     394,
     390,
     385,
     380,
     377,
     376,
     371,
     368,
     367,
     362,
     358,
     356,
     353,
     350,
     350,
     347,
     345,
     344,
     338,
     334,
     334,
     332,
     329,
     328,
     327,
     326,
     325,
     323,
     322,
     321,
     319,
     316,
     313,
     311,
     311,
     310,
   