## **Fig.1: Candidates for first plot**

In [1]:
import sys
sys.path.append("..")
from utils import edgelist_c2c, create_network

import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import text, inspect, MetaData

from tqdm import tqdm
import numpy as np
from pathlib import Path
import pandas as pd
import datetime as dt
from collections import defaultdict, Counter

from dateutil.relativedelta import relativedelta
from scipy.stats import linregress

import nxviz as nv
import networkx as nx
from networkx.algorithms import bipartite
from nxviz import layouts, plots, lines
from nxviz import nodes, edges, annotate, highlights
from nxviz.plots import despine, rescale, respine, aspect_equal

from nxviz.utils import edge_table, node_table
from nxviz import encodings as aes

import matplotlib
import matplotlib.pyplot as plt

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

plt.rcParams["font.family"] = "sans-serif"
plt.rcParams["font.sans-serif"] = ["Tahoma"]

basepath = Path.home().joinpath("Documents", "data")
netpath = basepath.joinpath("regulacao", "edgelists")

netpath_ne = basepath.joinpath("regulacao", "edgelists", "nordeste_no_service")

nxviz has a new API! Version 0.7.4 onwards, the old class-based API is being
deprecated in favour of a new API focused on advancing a grammar of network
graphics. If your plotting code depends on the old API, please consider
pinning nxviz at version 0.7.4, as the new API will break your old code.

To check out the new API, please head over to the docs at
https://ericmjl.github.io/nxviz/ to learn more. We hope you enjoy using it!

(This deprecation message will go away in version 1.0.)



In [2]:
def summarize_network(G, return_summary=True):
    summary = {}

    # Basic information
    summary['Number of Nodes'] = G.number_of_nodes()
    summary['Number of Edges'] = G.number_of_edges()
    summary['Is Directed'] = G.is_directed()
    summary['Is Bipartite'] = bipartite.is_bipartite(G)

    # Node and edge metadata keys
    if len(G.nodes) > 0:
        summary['Node Metadata Keys'] = list(next(iter(G.nodes(data=True)))[1].keys())
    else:
        summary['Node Metadata Keys'] = []

    if len(G.edges) > 0:
        summary['Edge Metadata Keys'] = list(next(iter(G.edges(data=True)))[2].keys())
    else:
        summary['Edge Metadata Keys'] = []

    # Degree information
    if G.is_directed():
        in_degrees = dict(G.in_degree())
        out_degrees = dict(G.out_degree())
        try:
            summary['Mean In-Degree'] = sum(in_degrees.values()) / len(in_degrees)
        except: pass
        try:
            summary['Mean Out-Degree'] = sum(out_degrees.values()) / len(out_degrees)
        except: pass
    else:
        degrees = dict(G.degree())
        summary['Mean Degree'] = sum(degrees.values()) / len(degrees)

    # Printing summary
    for key, value in summary.items():
        print(f"{key}: {value}")

    if return_summary:
        return summary

## connection to database

In [4]:
basepath = Path.home().joinpath("Documents", "data")
sih_location = basepath.joinpath("opendatasus")
sih_name = "SIHSUS_NORDESTE_NO_SERVICE.db"

cnes_location = basepath.joinpath("opendatasus", "cnes")
cnes_name = "CNES_NORDESTE.db"

sih_engine_url = f"sqlite:///{sih_location.joinpath(sih_name)}"
sih_engine = create_engine(sih_engine_url)

cnes_engine_url = f"sqlite:///{cnes_location.joinpath(cnes_name)}"
cnes_engine = create_engine(cnes_engine_url)

## **Generate edgelist data - C2C**

In [6]:
output = basepath.joinpath("regulacao", "edgelists", "nordeste_no_service")

In [4]:
selected_year = 2018
periods = [
    (dt.datetime(selected_year, 1, 1), dt.datetime(selected_year, 3, 31)),
    (dt.datetime(selected_year, 4, 1), dt.datetime(selected_year, 6, 30)),
    (dt.datetime(selected_year, 7, 1), dt.datetime(selected_year, 9, 30)),
    (dt.datetime(selected_year, 10, 1), dt.datetime(selected_year, 12, 31)),
]

#for interval in tqdm(periods):
#    start_date, final_date = interval[0], interval[1]
#    start_date_str, final_date_str = start_date.strftime("%Y%m"), final_date.strftime("%Y%m")
#
#    c2c_diag_people = edgelist_c2c(sih_engine, start_date, final_date, diag_level=3, mode="people")
#    c2c_diag_money = edgelist_c2c(sih_engine, start_date, final_date, diag_level=3, mode="money")
#    c2c_diag_people.to_parquet(output.joinpath("c2c", f"c2c_diag_people_{start_date_str}_{final_date_str}.parquet"))
#    c2c_diag_money.to_parquet(output.joinpath("c2c", f"c2c_diag_money_{start_date_str}_{final_date_str}.parquet"))

## **Load the networks**

In [12]:
networks = []

for interval in tqdm(periods):
    start_date, final_date = interval[0], interval[1]
    start_date_str, final_date_str = start_date.strftime("%Y%m"), final_date.strftime("%Y%m")

    edgelist = pd.read_parquet(output.joinpath("c2c", f"c2c_diag_people_{start_date_str}_{final_date_str}.parquet"))
    edgelist = edgelist.reset_index()

    # -- filter fluxes only between northeast states 
    ne_ufs = ["21", "22", "23", "24", "25", "26", "27", "28", "29"]
    edgelist["CD_UF_RES"] = edgelist["MUNIC_RES"].apply(lambda x: x[:2])
    edgelist["CD_UF_MOV"] = edgelist["MUNIC_MOV"].apply(lambda x: x[:2])
    edgelist = edgelist[(edgelist["CD_UF_RES"].isin(ne_ufs)) & (edgelist["CD_UF_MOV"].isin(ne_ufs))]

    net = create_network(edgelist, net_type="c2c")
    networks.append(net)

100%|██████████| 4/4 [00:52<00:00, 13.08s/it]


In [14]:
for v in networks[0].nodes():
    print(v)

210005
210047
210060
210095
210203
210232
210325
210350
210405
210455
210480
210530
210542
210550
210596
210820
210900
210990
211040
211085
211130
221100
230440
210010
210177
210300
210320
210340
210355
211210
210015
210170
210805
211010
211060
211250
220770
210020
210190
210490
210860
210030
210330
210360
210040
210043
210120
210540
210690
210845
210850
211300
210193
210515
210910
211030
211150
210200
210720
210810
211000
211102
211227
291080
210050
210140
211160
220190
210055
210290
210465
210467
210700
211180
210070
210270
210663
210880
211020
210080
210210
210640
210083
210130
210370
211178
210087
210735
211400
210090
220570
210160
210400
210535
210547
210580
210100
210590
210930
211290
231330
210110
210710
210920
210960
210450
210870
211140
210125
210135
211050
210407
210610
210725
210750
210760
210790
210800
210970
211110
211157
211200
220390
210150
211090
210408
210975
211163
211230
221120
210173
211260
210180
210520
211223
210197
210470
220850
260790
261160
210207
210570
211220