In [4]:
import sys
sys.path.append("C:/Users/jnevin/Documents/GitHub/networkdiffusionanalyser")

from netdiffanalyse.parameter_sweeper import CombinationBuilder, ParameterSweeper, MultiNetworkDiffusion
from netdiffanalyse.diffusionmodel.diffusionmodel import CustomDiffusionModel
%matplotlib inline

In [5]:
import pandas as pd
import numpy as np
import networkx as nx

test_df = pd.read_csv('dedup_test_df.csv')
test_df.index = test_df.rec_id
test_df = test_df.drop(columns = ['rec_id'])

test_matches = np.load('dedup_test_matches.npy', allow_pickle = True)
test_matches = pd.MultiIndex.from_tuples(test_matches)

test_graph = nx.read_gml('dedup_test_graph.gml')

In [6]:
test_dict = test_df.to_dict()
full_graph_attr = test_graph.copy()

for attribute in test_dict:
    nx.set_node_attributes(full_graph_attr, test_dict[attribute], attribute)

In [7]:
# different blocking setups
test_block_0 = {'Block': [['given_name', 'given_name']]}
test_block_1 = {'Block': [['surname', 'surname']]}
all_test_blocks = [test_block_0, test_block_1]

# different comparison setups
test_compare_0 = {'Exact':  [['given_name', 'given_name'], ['date_of_birth', 'date_of_birth'],
                          ['suburb', 'suburb'], ['state', 'state']],
                'String': [['surname', 'surname', 'jarowinkler', 0.85], ['address_1', 'address_1', 'levenshtein', 0.85]]}

test_compare_1 = {'Exact':  [['surname', 'surname'], ['suburb', 'suburb'], ['state', 'state']],
                'String': [['given_name', 'given_name', 'jarowinkler', 0.85], ['address_1', 'address_1', 'levenshtein', 0.85]]}

all_test_compares = [test_compare_0, test_compare_1]

# different classifier setups
classifier_name_0 = 'NaiveBayesClassifier'
classifier_name_1 = 'LogisticRegressionClassifier'
all_classifier_names = [classifier_name_0, classifier_name_1]

# clustering algorithms
clustering_algs = ['walktrap_integration']

In [8]:
combination_builder = CombinationBuilder(all_test_blocks, all_test_compares, all_classifier_names, clustering_algs)
integration_setups = combination_builder.get_all_combinations()

graphs = [full_graph_attr]
training_matches = test_matches

parameter_sweeper = ParameterSweeper(integration_setups, graphs, training_matches)
different_integrated_networks = parameter_sweeper.get_integrated_networks()

In [9]:
different_integrated_networks

[<networkx.classes.graph.Graph at 0x16624b29280>,
 <networkx.classes.graph.Graph at 0x16623f75700>,
 <networkx.classes.graph.Graph at 0x16623f75ee0>,
 <networkx.classes.graph.Graph at 0x16624b07640>,
 <networkx.classes.graph.Graph at 0x16623fe8820>,
 <networkx.classes.graph.Graph at 0x16624b2f6a0>,
 <networkx.classes.graph.Graph at 0x16624e3eca0>,
 <networkx.classes.graph.Graph at 0x16624e3cac0>]

In [10]:
# all the other features of the pipeline 
statuses = ['Susceptible', 'Infected', 'Removed']
compartments = {'NodeStochastic': {'c1': [0.02, 'Infected'], 'c2': [0.01]}}
transition_rules = [["Susceptible", "Infected", "c1"], ["Infected", "Removed", "c2"]]
model_parameters = [['fraction_infected', 0.1]]
simulation_parameters = [5, 600, None, 5]
model_name = 'sir'

custom_diffusion_model = CustomDiffusionModel(statuses, compartments,
                                             transition_rules, model_parameters)
# custom_diffusion_model = CustomDiffusionModel.SIR(0.02, 0.01, 0.1)

In [11]:
multi_network_diffusion = MultiNetworkDiffusion(different_integrated_networks, custom_diffusion_model)
graph_assc_results_analysers = multi_network_diffusion.run_diffusion_model(simulation_parameters)

In [12]:
graph_assc_results_analysers.get_average_stat_comparison([0,1,2])

Unnamed: 0,Susceptible_peak,Infected_peak,Removed_peak,Susceptible_final,Infected_final,Removed_final
0,0.900302,0.366767,0.798792,0.197885,0.003323,0.798792
1,0.9,0.329459,0.758378,0.236216,0.005405,0.758378
2,0.9,0.348116,0.769565,0.224928,0.005507,0.769565


In [None]:
graph_assc_results_analysers.get_graph_prop_comparison()