In [22]:
#!pip install bezier
#!pip install networkx==2.6.2

import copy
import getopt
import math
import networkx as nx
import operator
import os
import random
import sys
from scipy import stats
import seaborn as sns
import time
import pandas as pd
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt
import scipy.optimize as opt
from scipy.misc import derivative
from matplotlib.collections import LineCollection
from scipy.stats import pearsonr
from networkx.algorithms import tree

try:
    import bezier
except:
    pass
%matplotlib inline

In [23]:
palette = sns.color_palette("flare")
palette

In [24]:
try:
    from google.colab import drive

    drive.mount("/content/gdrive")
except:
    pass

In [25]:
def clean(series):
    series = series.str.upper()
    series = series.str.replace("Á", "A")
    series = series.str.replace("É", "E")
    series = series.str.replace("Ú", "U")
    series = series.str.replace("Ó", "O")
    series = series.str.replace("Í", "I")
    series = series.str.replace("Ã", "A")
    series = series.str.replace("Õ", "O")
    series = series.str.replace("Ô", "O")
    series = series.str.replace("Â", "A")
    series = series.str.replace("Ê", "E")
    series = series.str.replace("Ç", "C")
    series = series.str.replace("-", "")
    series = series.str.replace("'", "")
    series = series.str.upper()
    return series


def clean_str(series):
    series = series.upper()
    series = series.replace("Á", "A")
    series = series.replace("É", "E")
    series = series.replace("Ú", "U")
    series = series.replace("Ó", "O")
    series = series.replace("Í", "I")
    series = series.replace("Ã", "A")
    series = series.replace("Õ", "O")
    series = series.replace("Ô", "O")
    series = series.replace("Â", "A")
    series = series.replace("Ê", "E")
    series = series.replace("Ç", "C")
    series = series.replace("-", "")
    series = series.replace("'", "")
    return series

In [26]:
try:
    popDf = pd.read_excel(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/MUNIC_2019.xlsx"
    )
except:
    popDf = pd.read_excel("MUNIC_2019.xlsx")
popDf["NOME MUNIC"] = clean(popDf["NOME MUNIC"])
popDf.head(2)

Unnamed: 0,CodMun,REGIAO,COD UF,UF,NOME MUNIC,POP EST,CLASSE POP
0,1100015,1 - Norte,11,RO,ALTA FLORESTA DOESTE (RO),22945,4 - 20001 até 50000
1,1100023,1 - Norte,11,RO,ARIQUEMES (RO),107863,6 - 100001 até 500000


In [27]:
try:
    casosDf = pd.read_csv(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/caso_full.csv"
    )
except:
    casosDf = pd.read_csv("caso_full.csv")

casosStDf = casosDf[casosDf.place_type == "state"]
casosDf = casosDf[casosDf.place_type == "city"]
casosDf.city = clean(casosDf.city)
casosDf["city_state"] = casosDf.city + " (" + casosDf.state + ")"
casosDf.head(2)  # casos por cidade

Unnamed: 0,city,city_ibge_code,date,epidemiological_week,estimated_population,estimated_population_2019,is_last,is_repeated,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,last_available_date,last_available_death_rate,last_available_deaths,order_for_place,place_type,state,new_confirmed,new_deaths,city_state
0,RIO BRANCO,1200401.0,2020-03-17,202012,413418.0,407319.0,False,False,3,0.72566,2020-03-17,0.0,0,1,city,AC,3,0,RIO BRANCO (AC)
2,RIO BRANCO,1200401.0,2020-03-18,202012,413418.0,407319.0,False,False,3,0.72566,2020-03-18,0.0,0,2,city,AC,0,0,RIO BRANCO (AC)


In [28]:
casosStDf.head(2)  # casos por estado

Unnamed: 0,city,city_ibge_code,date,epidemiological_week,estimated_population,estimated_population_2019,is_last,is_repeated,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,last_available_date,last_available_death_rate,last_available_deaths,order_for_place,place_type,state,new_confirmed,new_deaths
1,,12.0,2020-03-17,202012,894470.0,881935.0,False,False,3,0.33539,2020-03-17,0.0,0,1,state,AC,3,0
3,,12.0,2020-03-18,202012,894470.0,881935.0,False,False,3,0.33539,2020-03-18,0.0,0,2,state,AC,0,0


In [29]:
epiweekDf = casosDf[["date", "epidemiological_week"]].drop_duplicates()
print(len(epiweekDf), "semanas epidemiológicas")
epiweekDf.tail(2)  # semanas epidemiológicas

692 semanas epidemiológicas


Unnamed: 0,date,epidemiological_week
2970939,2020-03-03,202010
2970941,2020-03-04,202010


In [30]:
casosDf = pd.merge(
    left=casosDf, right=popDf, how="left", left_on="city_state", right_on="NOME MUNIC"
)
print(casosDf["CLASSE POP"].value_counts())

3 - 10001 até 20000      828311
1 - Até 5000             739884
2 - 5001 até 10000       723726
4 - 20001 até 50000      693037
5 - 50001 até 100000     225184
6 - 100001 até 500000    181618
7 - Maior que 500000      32236
Name: CLASSE POP, dtype: int64


In [31]:
def group_data(Df, list):
    GroupDf = (
        Df.groupby(list)
        .agg(
            {
                "last_available_confirmed": "sum",
                "last_available_confirmed_per_100k_inhabitants": "mean",
                "new_confirmed": "sum",
                "last_available_deaths": "sum",
                "new_deaths": "sum",
                "estimated_population": "mean",
            }
        )
        .reset_index()
    )
    return GroupDf


def create_variable(Df, level, x1, x2):
    Df["confirmed_percent"] = Df.last_available_confirmed / Df.estimated_population
    Df["new_deaths_percent"] = Df.groupby(level)["new_deaths"].pct_change()
    Df["new_confirmed_percent"] = Df.groupby(level)["new_confirmed"].pct_change()
    Df["new_deaths_percent"] = Df["new_deaths_percent"].replace(
        [np.inf, -np.inf, np.nan, None], None
    )
    Df["new_confirmed_percent"] = Df["new_confirmed_percent"].replace(
        [np.inf, -np.inf, np.nan, None], None
    )
    Df["decrease_deaths"] = Df["new_deaths_percent"] <= x1
    Df["stability_deaths"] = (Df["new_deaths_percent"] > x1) & (
        Df["new_deaths_percent"] <= x2
    )
    Df["increase_deaths"] = Df["new_deaths_percent"] > x2
    Df["decrease_cases"] = Df["new_confirmed_percent"] < x1
    Df["stability_cases"] = (Df["new_confirmed_percent"] > x1) & (
        Df["new_confirmed_percent"] <= x2
    )
    Df["increase_cases"] = Df["new_confirmed_percent"] > x2
    Df["confirmed_t1"] = [None] + list(Df["last_available_confirmed"][:-1])
    Df["confirmed_t2"] = [None, None] + list(Df["last_available_confirmed"][:-2])
    Df["new_confirmed_t1"] = [None] + list(Df["new_confirmed"][:-1])
    Df["new_confirmed_t2"] = [None, None] + list(Df["new_confirmed"][:-2])
    Df["new_confirmed_t3"] = [None, None, None] + list(Df["new_confirmed"][:-3])
    Df["new_confirmed_t4"] = [None, None, None, None] + list(Df["new_confirmed"][:-4])
    Df["new_confirmed_t5"] = [None, None, None, None, None] + list(
        Df["new_confirmed"][:-5]
    )
    Df["new_confirmed_t6"] = [None, None, None, None, None, None] + list(
        Df["new_confirmed"][:-6]
    )
    return Df


def group_time(Df, level_time, level_city):
    GroupDf = Df.groupby(level_time).agg(
        {
            level_city: "count",
            "decrease_deaths": "sum",
            "stability_deaths": "sum",
            "increase_deaths": "sum",
            "decrease_cases": "sum",
            "stability_cases": "sum",
            "increase_cases": "sum",
        }
    )
    GroupDf["decrease_deaths_percent"] = (
        GroupDf["decrease_deaths"] / GroupDf[level_city]
    )
    GroupDf["stability_deaths_percent"] = (
        GroupDf["stability_deaths"] / GroupDf[level_city]
    )
    GroupDf["increase_deaths_percent"] = (
        GroupDf["increase_deaths"] / GroupDf[level_city]
    )
    GroupDf["decrease_cases_percent"] = GroupDf["decrease_cases"] / GroupDf[level_city]
    GroupDf["stability_cases_percent"] = (
        GroupDf["stability_cases"] / GroupDf[level_city]
    )
    GroupDf["increase_cases_percent"] = GroupDf["increase_cases"] / GroupDf[level_city]
    return GroupDf

In [32]:
casosStGroupDf = group_data(casosStDf, ["state", "epidemiological_week"])
casosStGroupDf = create_variable(casosStGroupDf, "state", -0.01, 0.10)
casosStGroupDf.head(2)

Unnamed: 0,state,epidemiological_week,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,new_confirmed,last_available_deaths,new_deaths,estimated_population,confirmed_percent,new_deaths_percent,...,stability_cases,increase_cases,confirmed_t1,confirmed_t2,new_confirmed_t1,new_confirmed_t2,new_confirmed_t3,new_confirmed_t4,new_confirmed_t5,new_confirmed_t6
0,AC,202012,28,0.626068,11,0,0,894470.0,3.1e-05,,...,False,False,,,,,,,,
1,AC,202013,145,2.315819,14,0,0,894470.0,0.000162,,...,False,True,28.0,,11.0,,,,,


In [33]:
casosGroupDf = group_data(casosDf, ["city_state", "epidemiological_week"])
casosGroupDf = create_variable(casosGroupDf, "city_state", -0.01, 0.10)
casosGroupDf.head(2)

Unnamed: 0,city_state,epidemiological_week,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,new_confirmed,last_available_deaths,new_deaths,estimated_population,confirmed_percent,new_deaths_percent,...,stability_cases,increase_cases,confirmed_t1,confirmed_t2,new_confirmed_t1,new_confirmed_t2,new_confirmed_t3,new_confirmed_t4,new_confirmed_t5,new_confirmed_t6
0,ABADIA DE GOIAS (GO),202021,16,59.5371,5,0,0,8958.0,0.001786,,...,False,False,,,,,,,,
1,ABADIA DE GOIAS (GO),202022,41,65.384496,1,0,0,8958.0,0.004577,,...,False,False,16.0,,5.0,,,,,


In [34]:
casosGroupDf.groupby("epidemiological_week")["new_confirmed"].sum().to_csv(
    "cases_week.csv"
)

In [35]:
casosEpiWeekDf = group_time(casosGroupDf, "epidemiological_week", "city_state")
casosEpiWeekDf.head()

Unnamed: 0_level_0,city_state,decrease_deaths,stability_deaths,increase_deaths,decrease_cases,stability_cases,increase_cases,decrease_deaths_percent,stability_deaths_percent,increase_deaths_percent,decrease_cases_percent,stability_cases_percent,increase_cases_percent
epidemiological_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
202009,1,1,0,0,1,0,0,1.0,0.0,0.0,1.0,0.0,0.0
202010,6,6,0,0,5,0,1,1.0,0.0,0.0,0.833333,0.0,0.166667
202011,31,31,0,0,27,1,3,1.0,0.0,0.0,0.870968,0.032258,0.096774
202012,142,142,0,0,118,5,19,1.0,0.0,0.0,0.830986,0.035211,0.133803
202013,313,312,0,1,230,18,65,0.996805,0.0,0.003195,0.734824,0.057508,0.207668


In [36]:
casosStEpiWeekDf = group_time(casosStGroupDf, "epidemiological_week", "state")
casosStEpiWeekDf.head()

Unnamed: 0_level_0,state,decrease_deaths,stability_deaths,increase_deaths,decrease_cases,stability_cases,increase_cases,decrease_deaths_percent,stability_deaths_percent,increase_deaths_percent,decrease_cases_percent,stability_cases_percent,increase_cases_percent
epidemiological_week,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
202009,1,1,0,0,1,0,0,1.0,0.0,0.0,1.0,0.0,0.0
202010,5,5,0,0,4,0,1,1.0,0.0,0.0,0.8,0.0,0.2
202011,17,17,0,0,12,0,5,1.0,0.0,0.0,0.705882,0.0,0.294118
202012,27,26,0,0,10,0,16,0.962963,0.0,0.0,0.37037,0.0,0.592593
202013,27,24,0,2,1,2,24,0.888889,0.0,0.074074,0.037037,0.074074,0.888889


In [37]:
def create_network(path):
    """
    Create a NetworkX graph object using the airport and route databases.
    Args:
        nodes: The file path to the nodes .csv file.
        edeges: The file path to the edges .csv file.
    Returns:
        G: A NetworkX DiGraph object populated with the nodes and edges assigned
           by the data files from the arguments.
    """
    print("Creating network.")
    G = nx.read_gexf(path)

    # Add pos attribute in nodes
    for node in G.nodes():
        long = G.nodes[node]["long"]
        lat = G.nodes[node]["lat"]
        loc = (long, lat)
        node_dict = {node: loc}
        nx.set_node_attributes(G, node_dict, "pos")

    # Calculate the edge weights
    print("\tCalculating edge weights", end="")
    degree_network = nx.Graph(G)
    ldegree = degree_network.degree
    for i, j in G.edges():
        degree_sum = ldegree[i] + ldegree[j]
        G[i][j]["weight"] = degree_sum

    print("\t\t\t\t[Done]")

    G = nx.relabel_nodes(G, clean_str)

    # Calculate the edge distances
    print("\tCalculating edge distance", end="")
    G = calculate_distance(G)
    print("\t\t\t\t[Done]")

    # Add clustering data
    print("\tCalculating clustering coefficents", end="")
    cluster_network = nx.Graph(G)
    lcluster = nx.clustering(cluster_network)
    for i, j in G.edges():
        cluster_sum = lcluster[i] + lcluster[j]
        G[i][j]["cluster"] = cluster_sum
    print("\t\t\t[Done]")

    return G

In [38]:
from math import cos, asin, sqrt, pi


def distance(lat1, lon1, lat2, lon2):
    p = pi / 180
    a = (
        0.5
        - cos((lat2 - lat1) * p) / 2
        + cos(lat1 * p) * cos(lat2 * p) * (1 - cos((lon2 - lon1) * p)) / 2
    )
    return 12742 * asin(sqrt(a))


def calculate_distance(input_network):
    """
    Add weights to the edges of a network based on the degrees of the connecting
    verticies, and return the network.
    Args:
        input_network: A NetworkX graph object
    Returns:
        G: A weighted NetworkX graph object.
    """

    G = input_network.copy()

    # Add weights to edges
    for node, successor in G.edges():
        dist = distance(
            G.nodes[node]["lat"],
            G.nodes[node]["long"],
            G.nodes[successor]["lat"],
            G.nodes[successor]["long"],
        )
        edge_dict = {(node, successor): dist}
        nx.set_edge_attributes(G, edge_dict, "distance")

    return G

In [39]:
def calculate_weights(input_network):
    """
    Add weights to the edges of a network based on the degrees of the connecting
    verticies, and return the network.
    Args:
        input_network: A NetworkX graph object
    Returns:
        G: A weighted NetworkX graph object.
    """

    G = input_network.copy()

    # Add weights to edges
    for node in G.nodes():
        successors = G.neighbors(node)
        weights = {}

        # Calculate the total out degree of all succs
        total_degree = 0
        for successor in successors:
            weights[successor] = G.degree(successor)

        largest_weight = max(weights.items())[1]
        smallest_weight = min(weights.items())[1]

        successors = G.neighbors(node)
        for successor in successors:
            if largest_weight == smallest_weight:
                relative_weight = 0
            else:
                relative_weight = (weights[successor] - smallest_weight) / (
                    largest_weight - smallest_weight
                )
            G[node][successor]["weight"] = relative_weight

    return G

In [40]:
def curved_edges(G, pos, dist_ratio=0.2, bezier_precision=20, polarity="random"):
    # Get nodes into np array
    edges = np.array(G.edges())
    l = edges.shape[0]

    if polarity == "random":
        # Random polarity of curve
        rnd = np.where(np.random.randint(2, size=l) == 0, -1, 1)
    else:
        # Create a fixed (hashed) polarity column in the case we use fixed polarity
        # This is useful, e.g., for animations
        rnd = np.where(
            np.mod(np.vectorize(hash)(edges[:, 0]) + np.vectorize(hash)(edges[:, 1]), 2)
            == 0,
            -1,
            1,
        )

    # Coordinates (x,y) of both nodes for each edge
    # e.g., https://stackoverflow.com/questions/16992713/translate-every-element-in-numpy-array-according-to-key
    # Note the np.vectorize method doesn't work for all node position dictionaries for some reason
    u, inv = np.unique(edges, return_inverse=True)
    coords = np.array([pos[x] for x in u])[inv].reshape(
        [edges.shape[0], 2, edges.shape[1]]
    )
    coords_node1 = coords[:, 0, :]
    coords_node2 = coords[:, 1, :]

    # Swap node1/node2 allocations to make sure the directionality works correctly
    should_swap = coords_node1[:, 0] > coords_node2[:, 0]
    coords_node1[should_swap], coords_node2[should_swap] = (
        coords_node2[should_swap],
        coords_node1[should_swap],
    )

    # Distance for control points
    dist = dist_ratio * np.sqrt(np.sum((coords_node1 - coords_node2) ** 2, axis=1))

    # Gradients of line connecting node & perpendicular
    m1 = (coords_node2[:, 1] - coords_node1[:, 1]) / (
        coords_node2[:, 0] - coords_node1[:, 0]
    )
    m2 = -1 / m1

    # Temporary points along the line which connects two nodes
    # e.g., https://math.stackexchange.com/questions/656500/given-a-point-slope-and-a-distance-along-that-slope-easily-find-a-second-p
    t1 = dist / np.sqrt(1 + m1**2)
    v1 = np.array([np.ones(l), m1])
    coords_node1_displace = coords_node1 + (v1 * t1).T
    coords_node2_displace = coords_node2 - (v1 * t1).T

    # Control points, same distance but along perpendicular line
    # rnd gives the 'polarity' to determine which side of the line the curve should arc
    t2 = dist / np.sqrt(1 + m2**2)
    v2 = np.array([np.ones(len(edges)), m2])
    coords_node1_ctrl = coords_node1_displace + (rnd * v2 * t2).T
    coords_node2_ctrl = coords_node2_displace + (rnd * v2 * t2).T

    # Combine all these four (x,y) columns into a 'node matrix'
    node_matrix = np.array(
        [coords_node1, coords_node1_ctrl, coords_node2_ctrl, coords_node2]
    )

    # Create the Bezier curves and store them in a list
    curveplots = []
    for i in range(l):
        nodes = node_matrix[:, i, :].T
        curveplots.append(
            bezier.Curve(nodes, degree=2)
            .evaluate_multi(np.linspace(0, 1, bezier_precision))
            .T
        )

    # Return an array of these curves
    curves = np.array(curveplots)

    return curves

In [41]:
try:
    G = create_network(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/grafo_cidades_aeroportos.gexf"
    )
except:
    G = create_network("grafo_cidades_aeroportos.gexf")
G.remove_edges_from(nx.selfloop_edges(G))

Creating network.
Creating network.
	Calculating edge weights				[Done]
	Calculating edge distance				[Done]
	Calculating clustering coefficents			[Done]


In [42]:
epi_week = list(set(casosGroupDf.epidemiological_week.tolist()))
edges = [str(x) for x in list(G.edges())]
# edges = [str((u,v)) for u,v,e in G.edges(data=True) if e['aereo'] == 'sim']
new_confirmed_sum_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_diff_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_percent_avg_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_percent_diff_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_corr_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_avg_df = pd.DataFrame(columns=edges, index=epi_week)
new_confirmed_sum_df.loc[202009, "('CACOAL (RO)', 'VARZEA GRANDE (MT)')"]

nan

In [43]:
aero_edges = [(u, v) for u, v, e in G.edges(data=True) if e["aereo"] == "sim"]

In [44]:
casosGroupDf.head()

Unnamed: 0,city_state,epidemiological_week,last_available_confirmed,last_available_confirmed_per_100k_inhabitants,new_confirmed,last_available_deaths,new_deaths,estimated_population,confirmed_percent,new_deaths_percent,...,stability_cases,increase_cases,confirmed_t1,confirmed_t2,new_confirmed_t1,new_confirmed_t2,new_confirmed_t3,new_confirmed_t4,new_confirmed_t5,new_confirmed_t6
0,ABADIA DE GOIAS (GO),202021,16,59.5371,5,0,0,8958.0,0.001786,,...,False,False,,,,,,,,
1,ABADIA DE GOIAS (GO),202022,41,65.384496,1,0,0,8958.0,0.004577,,...,False,False,16.0,,5.0,,,,,
2,ABADIA DE GOIAS (GO),202023,50,79.737186,1,0,0,8958.0,0.005582,,...,True,False,41.0,16.0,1.0,5.0,,,,
3,ABADIA DE GOIAS (GO),202024,80,127.579499,7,0,0,8958.0,0.008931,,...,False,True,50.0,41.0,1.0,1.0,5.0,,,
4,ABADIA DE GOIAS (GO),202025,142,226.453607,8,0,0,8958.0,0.015852,,...,False,True,80.0,50.0,7.0,1.0,1.0,5.0,,


In [None]:
for edge in G.edges():
    print(edge)
    city_state_0 = edge[0]
    city_state_1 = edge[1]
    temp_0 = casosGroupDf[casosGroupDf.city_state == city_state_0]
    temp_1 = casosGroupDf[casosGroupDf.city_state == city_state_1]

    for week in epi_week:
        temp2_0 = temp_0[temp_0.epidemiological_week == week]
        temp2_1 = temp_1[temp_1.epidemiological_week == week]
        if (temp2_0.empty) | (temp2_1.empty):
            new_confirmed_percent_0 = np.nan
            new_confirmed_percent_1 = np.nan
        elif temp2_0.empty:
            new_confirmed_percent_0 = 0
            new_confirmed_percent_1 = temp2_1.new_confirmed_percent.iloc[0]
        elif temp2_1.empty:
            new_confirmed_percent_1 = 0
            new_confirmed_percent_0 = temp2_0.new_confirmed_percent.iloc[0]

        else:
            new_confirmed_percent_0 = temp2_0.new_confirmed_percent.iloc[0]
            new_confirmed_percent_1 = temp2_1.new_confirmed_percent.iloc[0]

            new_confirmed_percent_avg = (
                new_confirmed_percent_0 + new_confirmed_percent_1
            ) / 2
            new_confirmed_percent_diff = (
                new_confirmed_percent_0 * new_confirmed_percent_1
            )
            new_confirmed_percent_avg_df.loc[
                week, str(edge)
            ] = new_confirmed_percent_avg
            new_confirmed_percent_diff_df.loc[
                week, str(edge)
            ] = new_confirmed_percent_diff

In [None]:
new_confirmed_percent_avg_df.to_csv("new_confirmed_percent_avg_df_all.csv", sep=";")
new_confirmed_percent_diff_df.to_csv("new_confirmed_percent_diff_df_all.csv", sep=";")

In [46]:
size = len(G.edges())

In [None]:
i = 0
for edge in G.edges():
    city_state_a = edge[0]
    city_state_b = edge[1]
    temp_a = casosGroupDf[casosGroupDf.city_state == city_state_a]
    temp_b = casosGroupDf[casosGroupDf.city_state == city_state_b]

    for week in epi_week:
        temp2_a = temp_a[temp_a.epidemiological_week == week]
        temp2_b = temp_b[temp_b.epidemiological_week == week]

        try:
            confirmed_a = temp2_a.new_confirmed.iloc[0]
        except:
            confirmed_a = 0
        try:
            confirmed_b = temp2_b.new_confirmed.iloc[0]
        except:
            confirmed_b = 0

        # except: confirmed_t4_b = np.nan

        avg = (confirmed_a + confirmed_b) / 2
        diff = abs(confirmed_a - confirmed_b)

        new_confirmed_avg_df[week, str(edge)] = avg
        new_confirmed_diff_df[week, str(edge)] = diff
    i = i + 1
    print(edge, size - i)

('ALTA FLORESTA DOESTE (RO)', 'NOVA BRASILANDIA DOESTE (RO)') 17436
('ALTA FLORESTA DOESTE (RO)', 'ROLIM DE MOURA (RO)') 17435
('ALTA FLORESTA DOESTE (RO)', 'SANTA LUZIA DOESTE (RO)') 17434
('ALTA FLORESTA DOESTE (RO)', 'SAO MIGUEL DO GUAPORE (RO)') 17433
('ALTA FLORESTA DOESTE (RO)', 'ALTO ALEGRE DOS PARECIS (RO)') 17432
('ALTA FLORESTA DOESTE (RO)', 'NOVO HORIZONTE DO OESTE (RO)') 17431
('ALTA FLORESTA DOESTE (RO)', 'SAO FRANCISCO DO GUAPORE (RO)') 17430
('NOVA BRASILANDIA DOESTE (RO)', 'PRESIDENTE MEDICI (RO)') 17429
('NOVA BRASILANDIA DOESTE (RO)', 'SAO MIGUEL DO GUAPORE (RO)') 17428
('NOVA BRASILANDIA DOESTE (RO)', 'ALVORADA DOESTE (RO)') 17427
('NOVA BRASILANDIA DOESTE (RO)', 'NOVO HORIZONTE DO OESTE (RO)') 17426
('NOVA BRASILANDIA DOESTE (RO)', 'CASTANHEIRAS (RO)') 17425
('ROLIM DE MOURA (RO)', 'CACOAL (RO)') 17424
('ROLIM DE MOURA (RO)', 'PIMENTA BUENO (RO)') 17423
('ROLIM DE MOURA (RO)', 'SANTA LUZIA DOESTE (RO)') 17422
('ROLIM DE MOURA (RO)', 'NOVO HORIZONTE DO OESTE (RO)') 1

('NOVA UNIAO (RO)', 'TEIXEIROPOLIS (RO)') 17274
('NOVA UNIAO (RO)', 'URUPA (RO)') 17273
('VALE DO PARAISO (RO)', 'JIPARANA (RO)') 17272
('JIPARANA (RO)', 'TEIXEIROPOLIS (RO)') 17271
('JIPARANA (RO)', 'URUPA (RO)') 17270
('JIPARANA (RO)', 'COLNIZA (MT)') 17269
('JIPARANA (RO)', 'CONFINS (MG)') 17268
('JIPARANA (RO)', 'VARZEA GRANDE (MT)') 17267
('JIPARANA (RO)', 'CAMPINAS (SP)') 17266
('TEIXEIROPOLIS (RO)', 'URUPA (RO)') 17265
('URUPA (RO)', 'ALVORADA DOESTE (RO)') 17264
('COLNIZA (MT)', 'APUI (AM)') 17263
('COLNIZA (MT)', 'NOVO ARIPUANA (AM)') 17262
('COLNIZA (MT)', 'ARIPUANA (MT)') 17261
('COLNIZA (MT)', 'COTRIGUACU (MT)') 17260
('PORTO VELHO (RO)', 'CANDEIAS DO JAMARI (RO)') 17259
('PORTO VELHO (RO)', 'CUJUBIM (RO)') 17258
('PORTO VELHO (RO)', 'ITAPUA DO OESTE (RO)') 17257
('PORTO VELHO (RO)', 'ACRELANDIA (AC)') 17256
('PORTO VELHO (RO)', 'CANUTAMA (AM)') 17255
('PORTO VELHO (RO)', 'HUMAITA (AM)') 17254
('PORTO VELHO (RO)', 'LABREA (AM)') 17253
('PORTO VELHO (RO)', 'BELEM (PA)') 1725

('SANTO ANTONIO DO ICA (AM)', 'JAPURA (AM)') 17087
('SANTO ANTONIO DO ICA (AM)', 'SAO PAULO DE OLIVENCA (AM)') 17086
('SANTO ANTONIO DO ICA (AM)', 'TABATINGA (AM)') 17085
('SANTO ANTONIO DO ICA (AM)', 'TONANTINS (AM)') 17084
('SAO PAULO DE OLIVENCA (AM)', 'BENJAMIN CONSTANT (AM)') 17083
('SAO PAULO DE OLIVENCA (AM)', 'TABATINGA (AM)') 17082
('ANAMA (AM)', 'ANORI (AM)') 17081
('ANAMA (AM)', 'BERURI (AM)') 17080
('ANAMA (AM)', 'CAAPIRANGA (AM)') 17079
('ANAMA (AM)', 'CODAJAS (AM)') 17078
('ANAMA (AM)', 'MANACAPURU (AM)') 17077
('ANORI (AM)', 'BERURI (AM)') 17076
('ANORI (AM)', 'COARI (AM)') 17075
('ANORI (AM)', 'CODAJAS (AM)') 17074
('ANORI (AM)', 'TAPAUA (AM)') 17073
('BERURI (AM)', 'BORBA (AM)') 17072
('BERURI (AM)', 'CAREIRO (AM)') 17071
('BERURI (AM)', 'MANACAPURU (AM)') 17070
('BERURI (AM)', 'MANAQUIRI (AM)') 17069
('BERURI (AM)', 'TAPAUA (AM)') 17068
('CAAPIRANGA (AM)', 'CODAJAS (AM)') 17067
('CAAPIRANGA (AM)', 'MANACAPURU (AM)') 17066
('CAAPIRANGA (AM)', 'NOVO AIRAO (AM)') 17065
(

('URUCARA (AM)', 'CAROEBE (RR)') 16900
('URUCARA (AM)', 'SAO JOAO DA BALIZA (RR)') 16899
('ITAITUBA (PA)', 'ALTAMIRA (PA)') 16898
('ITAITUBA (PA)', 'NOVO PROGRESSO (PA)') 16897
('ITAITUBA (PA)', 'RUROPOLIS (PA)') 16896
('ITAITUBA (PA)', 'TRAIRAO (PA)') 16895
('ITAITUBA (PA)', 'OBIDOS (PA)') 16894
('ITAITUBA (PA)', 'ALENQUER (PA)') 16893
('ITAITUBA (PA)', 'BELEM (PA)') 16892
('ITAITUBA (PA)', 'BREJO (MA)') 16891
('ITAITUBA (PA)', 'SANTAREM (PA)') 16890
('ITAITUBA (PA)', 'MONTE ALEGRE (PA)') 16889
('ITAITUBA (PA)', 'ORIXIMINA (PA)') 16888
('NHAMUNDA (AM)', 'CAROEBE (RR)') 16887
('NHAMUNDA (AM)', 'FARO (PA)') 16886
('NHAMUNDA (AM)', 'JURUTI (PA)') 16885
('NHAMUNDA (AM)', 'ORIXIMINA (PA)') 16884
('NHAMUNDA (AM)', 'TERRA SANTA (PA)') 16883
('CAROEBE (RR)', 'SAO JOAO DA BALIZA (RR)') 16882
('CAROEBE (RR)', 'ORIXIMINA (PA)') 16881
('FARO (PA)', 'ORIXIMINA (PA)') 16880
('FARO (PA)', 'TERRA SANTA (PA)') 16879
('JURUTI (PA)', 'OBIDOS (PA)') 16878
('JURUTI (PA)', 'ORIXIMINA (PA)') 16877
('JURUTI 

('TOMEACU (PA)', 'AURORA DO PARA (PA)') 16717
('TOMEACU (PA)', 'IPIXUNA DO PARA (PA)') 16716
('TOMEACU (PA)', 'SAO DOMINGOS DO CAPIM (PA)') 16715
('AFUA (PA)', 'ANAJAS (PA)') 16714
('AFUA (PA)', 'BREVES (PA)') 16713
('AFUA (PA)', 'CHAVES (PA)') 16712
('AFUA (PA)', 'GURUPA (PA)') 16711
('AFUA (PA)', 'ITAUBAL (AP)') 16710
('AFUA (PA)', 'MACAPA (AP)') 16709
('AFUA (PA)', 'MAZAGAO (AP)') 16708
('AFUA (PA)', 'SANTANA (AP)') 16707
('ANAJAS (PA)', 'BREVES (PA)') 16706
('ANAJAS (PA)', 'CHAVES (PA)') 16705
('ANAJAS (PA)', 'SANTA CRUZ DO ARARI (PA)') 16704
('ANAJAS (PA)', 'SAO SEBASTIAO DA BOA VISTA (PA)') 16703
('BREVES (PA)', 'BAGRE (PA)') 16702
('BREVES (PA)', 'CURRALINHO (PA)') 16701
('BREVES (PA)', 'GURUPA (PA)') 16700
('BREVES (PA)', 'MELGACO (PA)') 16699
('BREVES (PA)', 'SAO SEBASTIAO DA BOA VISTA (PA)') 16698
('BREVES (PA)', 'ALMEIRIM (PA)') 16697
('BREVES (PA)', 'ALTAMIRA (PA)') 16696
('BREVES (PA)', 'PORTO DE MOZ (PA)') 16695
('BREVES (PA)', 'SANTAREM (PA)') 16694
('BREVES (PA)', 'MACA

('SAO FELIX DO XINGU (PA)', 'CUMARU DO NORTE (PA)') 16531
('SAO FELIX DO XINGU (PA)', 'MARABA (PA)') 16530
('SAO FELIX DO XINGU (PA)', 'NOVO REPARTIMENTO (PA)') 16529
('SAO FELIX DO XINGU (PA)', 'SANTANA DO ARAGUAIA (PA)') 16528
('SAO FELIX DO XINGU (PA)', 'SENADOR JOSE PORFIRIO (PA)') 16527
('SAO FELIX DO XINGU (PA)', 'TUCUMA (PA)') 16526
('SAO FELIX DO XINGU (PA)', 'PEIXOTO DE AZEVEDO (MT)') 16525
('SAO FELIX DO XINGU (PA)', 'SANTA CRUZ DO XINGU (MT)') 16524
('SAO FELIX DO XINGU (PA)', 'VILA RICA (MT)') 16523
('SENADOR JOSE PORFIRIO (PA)', 'ANAPU (PA)') 16522
('SENADOR JOSE PORFIRIO (PA)', 'PORTEL (PA)') 16521
('SENADOR JOSE PORFIRIO (PA)', 'VITORIA DO XINGU (PA)') 16520
('URUARA (PA)', 'MEDICILANDIA (PA)') 16519
('URUARA (PA)', 'MOJUI DOS CAMPOS (PA)') 16518
('VITORIA DO XINGU (PA)', 'ANAPU (PA)') 16517
('GUARANTA DO NORTE (MT)', 'MATUPA (MT)') 16516
('GUARANTA DO NORTE (MT)', 'NOVO MUNDO (MT)') 16515
('MATUPA (MT)', 'NOVO MUNDO (MT)') 16514
('MATUPA (MT)', 'PEIXOTO DE AZEVEDO (MT)'

('SANTA LUZIA DO PARA (PA)', 'GARRAFAO DO NORTE (PA)') 16366
('SANTA LUZIA DO PARA (PA)', 'NOVA ESPERANCA DO PIRIA (PA)') 16365
('BREJO GRANDE DO ARAGUAIA (PA)', 'PALESTINA DO PARA (PA)') 16364
('BREJO GRANDE DO ARAGUAIA (PA)', 'SAO DOMINGOS DO ARAGUAIA (PA)') 16363
('BREJO GRANDE DO ARAGUAIA (PA)', 'SAO GERALDO DO ARAGUAIA (PA)') 16362
('BREJO GRANDE DO ARAGUAIA (PA)', 'ARAGUATINS (TO)') 16361
('PALESTINA DO PARA (PA)', 'SAO GERALDO DO ARAGUAIA (PA)') 16360
('PALESTINA DO PARA (PA)', 'ANANAS (TO)') 16359
('PALESTINA DO PARA (PA)', 'ARAGUATINS (TO)') 16358
('SAO DOMINGOS DO ARAGUAIA (PA)', 'SAO GERALDO DO ARAGUAIA (PA)') 16357
('SAO GERALDO DO ARAGUAIA (PA)', 'ELDORADO DO CARAJAS (PA)') 16356
('SAO GERALDO DO ARAGUAIA (PA)', 'PICARRA (PA)') 16355
('SAO GERALDO DO ARAGUAIA (PA)', 'ANANAS (TO)') 16354
('SAO GERALDO DO ARAGUAIA (PA)', 'XAMBIOA (TO)') 16353
('ARAGUATINS (TO)', 'ANANAS (TO)') 16352
('ARAGUATINS (TO)', 'AUGUSTINOPOLIS (TO)') 16351
('ARAGUATINS (TO)', 'AXIXA DO TOCANTINS (TO)

In [None]:
new_confirmed_avg_df.to_csv("new_confirmed_avg_df_all.csv", sep=";")

In [None]:
for edge in G.edges(data=True):
    if (temp[temp.city_state == edge[0]].empty) | (
        temp[temp.city_state == edge[1]].empty
    ):
        corr = np.nan
    else:
        confirmed_0 = temp[(temp.city_state == edge[0])].new_confirmed.iloc[0]
        confirmed_1 = temp[(temp.city_state == edge[1])].new_confirmed.iloc[0]
        confirmed_t1_0 = temp[(temp.city_state == edge[0])].new_confirmed_t1.iloc[0]
        confirmed_t1_1 = temp[(temp.city_state == edge[1])].new_confirmed_t1.iloc[0]
        confirmed_t2_0 = temp[(temp.city_state == edge[0])].new_confirmed_t2.iloc[0]
        confirmed_t2_1 = temp[(temp.city_state == edge[1])].new_confirmed_t2.iloc[0]
        confirmed_t3_0 = temp[(temp.city_state == edge[0])].new_confirmed_t3.iloc[0]
        confirmed_t3_1 = temp[(temp.city_state == edge[1])].new_confirmed_t3.iloc[0]
        confirmed_t4_0 = temp[(temp.city_state == edge[0])].new_confirmed_t4.iloc[0]
        confirmed_t4_1 = temp[(temp.city_state == edge[1])].new_confirmed_t5.iloc[0]
        confirmed_t5_0 = temp[(temp.city_state == edge[0])].new_confirmed_t5.iloc[0]
        confirmed_t5_1 = temp[(temp.city_state == edge[1])].new_confirmed_t5.iloc[0]
        confirmed_t6_0 = temp[(temp.city_state == edge[0])].new_confirmed_t6.iloc[0]
        confirmed_t6_1 = temp[(temp.city_state == edge[1])].new_confirmed_t6.iloc[0]

        t0_0 = [
            confirmed_t5_0,
            confirmed_t4_0,
            confirmed_t3_0,
            confirmed_t2_0,
            confirmed_t1_0,
            confirmed_0,
        ]
        # t1_0 = [confirmed_t6_0,confirmed_t5_0,confirmed_t4_0,confirmed_t3_0,confirmed_t2_0,confirmed_t1_0]

        t0_1 = [
            confirmed_t5_1,
            confirmed_t4_1,
            confirmed_t3_1,
            confirmed_t2_1,
            confirmed_t1_1,
            confirmed_1,
        ]
        # t1_1 = [confirmed_t6_1,confirmed_t5_1,confirmed_t4_1,confirmed_t3_1,confirmed_t2_1,confirmed_t1_1]

        try:
            corr_t0, p_t0 = pearsonr(t0_0, t0_1)
        except:
            corr_t0 = np.nan
        # try: corr_t1, p_t1 = pearsonr(t1_0, t1_1)
        # except: corr_t1 = np.nan
        # corr = abs(corr_t0) - abs(corr_t1)
        corr = corr_t0
    print(edge)

In [None]:
import matplotlib as mp
import matplotlib.cm as cm

pearson = [x[2]["pearson"] for x in list(G.edges(data=True))]

minima = min(pearson)
maxima = max(pearson)

norm = mp.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
mapper = cm.ScalarMappable(norm=norm, cmap=plt.cm.BrBG)

color_list = [mapper.to_rgba(v) for v in pearson]
alpha_list = [0.5 if x[2]["aereo"] == "sim" else 0.5 for x in list(G.edges(data=True))]
linestyle_list = [
    "dotted" if x[2]["aereo"] == "sim" else "solid" for x in list(G.edges(data=True))
]

capital = nx.get_node_attributes(G, "capital")
region = nx.get_node_attributes(G, "region")
node_size_list = [15 if x == "sim" else 10 for x in list(capital.values())]
node_alpha_list = [0.6 if x == "sim" else 0.4 for x in list(capital.values())]
node_color_list = "gray"

labels = {}
for node in G.nodes(data=True):
    if node[1]["capital"] == "sim":
        labels[node[0]] = node[0]

positions = nx.get_node_attributes(G, "pos")
# Produce the curves
curves = curved_edges(G, positions)
lc = LineCollection(
    curves, color=color_list, alpha=alpha_list, linestyle=linestyle_list
)

size = nx.get_node_attributes(G, "population")
size_list = [v for v in size.values()]
size_norm = [float(i) / max(size_list) for i in size_list]

plt.figure(figsize=(20, 20))
nx.draw_networkx_nodes(
    G,
    positions,
    node_size=node_size_list,
    alpha=node_alpha_list,
    node_color=node_color_list,
)
plt.gca().add_collection(lc)
plt.tick_params(
    axis="both",
    which="both",
    bottom=False,
    left=False,
    labelbottom=False,
    labelleft=False,
)
# nx.draw_networkx_labels(G, positions, labels, font_size=12, font_color="black")
plt.savefig("grafo_cidades_aeroportos_202040.png", transparent=False)
plt.show()

In [None]:
minimum_spanning_tree = tree.mst.minimum_spanning_tree(
    G, weight="pearson", ignore_nan=True
)
sum_ = sum(list(nx.get_edge_attributes(minimum_spanning_tree, "pearson").values()))
print("Sum:", sum_)
# Sum: -517.7498406040883 202020
# Sum: -806.0413886847555 202022
# Sum: -1056.9493846100775 202025
# Sum: -1322.9335498411547 202031
# Sum: -1362.3329650691155 202033
# Sum: -1275.7802168555108 202035
# Sum: -1332.737400610826 202036
# Sum: -1232.9998790140364 202040
# Sum: -1225.2711675299283 202045
# Sum: -1177.700229849171 202050

In [None]:
1 + np.exp(-1 * sum_)
# 7.17627017190965e+224 202020
# inf 202022
# inf 202025
# inf 202031
# inf 202033
# 1.1934680253072109e+125 202036
# inf 202045

In [None]:
sum_ = -806.0413886847555

In [None]:
def infection_yes_no(input_network, casosDf, vis=False, title=""):
    print("Replicating infection.")

    network = input_network.copy()
    positions = nx.get_node_attributes(network, "pos")
    curves = curved_edges(network, positions)

    # Set the default to susceptable
    sys.stdout.flush()
    for node in network.nodes():
        nx.set_node_attributes(network, values={node: "s"}, name="status")
        nx.set_node_attributes(network, values={node: "#A6A6A6"}, name="color")
        nx.set_node_attributes(network, values={node: 0.4}, name="alpha")
    for edge in network.edges(data=True):
        nx.set_edge_attributes(network, {(edge[0], edge[1]): "#A6A6A6"}, "color")
    if vis:
        pos = nx.get_node_attributes(network, "pos")

    epi_weeks = sorted(list(set(casosDf.epidemiological_week.tolist())))

    for week in epi_weeks:
        inicial, infectado = 0, 0
        weekDf = casosDf[casosDf.epidemiological_week == week]
        cities = list(set(weekDf.city_state.tolist()))
        for city in cities:
            nx.set_node_attributes(network, {city: "i"}, "status")
            nx.set_node_attributes(
                network, {city: (0.888292, 0.40830288, 0.36223756)}, "color"
            )
            nx.set_node_attributes(network, {city: 0.8}, "alpha")
        for edge in network.edges(data=True):
            if (
                network.nodes[edge[0]]["status"] == "i"
                and network.nodes[edge[1]]["status"] == "i"
            ):
                nx.set_edge_attributes(
                    network,
                    {(edge[0], edge[1]): (0.888292, 0.40830288, 0.36223756)},
                    "color",
                )

        for node in network.nodes():
            status = network.nodes[node]["status"]
            color = network.nodes[node]["color"]
            if status == "s":
                inicial += 1
            elif status == "i":
                infectado += 1
        print(
            "{0}, {1}, {2}".format(
                week, inicial / len(network.nodes()), infectado / len(network.nodes())
            )
        )

        if inicial == 0:
            break

        if vis:
            visualize_curved_edges(network, title, pos, curves)

    print(
        "\t----------\n\tS: {0}, I: {1}, {2}".format(
            inicial, infectado, infectado / len(network.nodes())
        )
    )

    return {"Suscceptable": inicial, "Infected": infectado}

In [None]:
def infection_cases(
    input_network,
    casosDf,
    DELAY=0,
    vis=False,
    file_name="sir.csv",
    title="",
    RECALCULATE=True,
):
    print("Replicating infection.")

    network = input_network.copy()
    positions = nx.get_node_attributes(network, "pos")
    curves = curved_edges(network, positions)

    # Set the default to susceptable
    sys.stdout.flush()
    for node in network.nodes():
        nx.set_node_attributes(network, values={node: "s"}, name="status")
        nx.set_node_attributes(network, values={node: "#A6A6A6"}, name="color")
        nx.set_node_attributes(network, values={node: 0.4}, name="alpha")
    for edge in network.edges(data=True):
        nx.set_edge_attributes(network, {(edge[0], edge[1]): "#A6A6A6"}, "color")

    if vis:
        pos = nx.get_node_attributes(network, "pos")

    epi_weeks = sorted(list(set(casosDf.epidemiological_week.tolist())))
    palette = sns.color_palette("flare")
    # Iterate through the evolution of the disease.
    i = 0
    for week in epi_weeks:
        i = i + 1
        print(i)
        # Create variables to hold the outcomes as they happen
        S, queda, estab, aumento = 0, 0, 0, 0

        casosWeekDf = casosDf[casosDf.epidemiological_week == week]
        cities = list(set(casosWeekDf.city_state.tolist()))

        for city in cities:
            new_per = casosWeekDf[
                casosWeekDf.city_state == city
            ].new_deaths_percent.values[0]
            if new_per < -0.01:
                nx.set_node_attributes(network, {city: "em queda"}, "status")
                nx.set_node_attributes(network, {city: palette[4]}, "color")
                nx.set_node_attributes(network, {city: 0.7}, "alpha")

            elif new_per < 0.10:
                nx.set_node_attributes(network, {city: "estabilidade"}, "status")
                nx.set_node_attributes(network, {city: palette[2]}, "color")
                nx.set_node_attributes(network, {city: 0.7}, "alpha")

            else:
                nx.set_node_attributes(network, {city: "aumento"}, "status")
                nx.set_node_attributes(network, {city: palette[0]}, "color")
                nx.set_node_attributes(network, {city: 0.8}, "alpha")

        # Loop twice to prevent bias.
        for edge in network.edges(data=True):
            if (casosWeekDf[(casosWeekDf.city_state == edge[0])].empty) | (
                casosWeekDf[(casosWeekDf.city_state == edge[1])].empty
            ):
                pass
            else:
                confirmed_0 = casosWeekDf[
                    (casosWeekDf.city_state == edge[0])
                ].new_confirmed.iloc[0]
                confirmed_1 = casosWeekDf[
                    (casosWeekDf.city_state == edge[1])
                ].new_confirmed.iloc[0]
                last_week_confirmed_0 = casosWeekDf[
                    (casosWeekDf.city_state == edge[0])
                ].last_week_new_confirmed.iloc[0]
                last_week_confirmed_1 = casosWeekDf[
                    (casosWeekDf.city_state == edge[1])
                ].last_week_new_confirmed.iloc[0]
                corr, p = pearsonr(
                    [last_week_confirmed_0, confirmed_0],
                    [last_week_confirmed_1, confirmed_1],
                )
                nx.set_edge_attributes(network, {(edge[0], edge[1]): corr}, "pearson")

            if (
                network.nodes[edge[0]]["status"] == "i"
                and network.nodes[edge[1]]["status"] == "i"
            ):
                nx.set_edge_attributes(
                    network,
                    {(edge[0], edge[1]): (0.888292, 0.40830288, 0.36223756)},
                    "color",
                )

        minimum_spanning_tree = tree.mst.minimum_spanning_tree(
            network, weight="pearson"
        )
        sum_ = sum(
            list(nx.get_edge_attributes(minimum_spanning_tree, "pearson").values())
        )
        print("sum:", sum_)

        for node in network.nodes():
            status = network.nodes[node]["status"]
            color = network.nodes[node]["color"]

            if status == "s":
                S += 1
            if status == "em queda":
                queda += 1
            if status == "estabilidade":
                estab += 1
            elif status == "aumento":
                aumento += 1

        print(
            "Semana: {0},Suscetivel: {1},Em queda: {2},Estabilidade: {3},Aumento: {4}".format(
                week, S, queda, estab, aumento
            )
        )

        if vis:
            visualize_curved_edges(network, title, pos, curves)

    print("\t----------\n\tS: {0}, I: {1}, R: {2}".format(S, queda, aumento))

    return {"Suscceptable": S, "Infected": queda, "Recovered": aumento}

In [None]:
from scipy.stats import pearsonr
from networkx.algorithms import tree

# minimum_spanning_tree_G = tree.mst.minimum_spanning_tree(G, weight="distance")
# G = create_network('grafo_cidades_aeroportos.gexf')
# for edge in G.edges(data=True):
#    print(edge[0],edge[1])
temp = casosDf.epidemiological_week[0]
temp2 = casosDf[casosDf.epidemiological_week == temp]
len(temp2)

In [None]:
def visualize_curved_edges(network, title, pos, curves):
    """
    Visualize the network given an array of posisitons.
    """
    print("-- Starting to Visualize --")

    colors = []
    alphas = []
    edge_colors = []
    for node in network.nodes():
        colors.append(network.nodes[node]["color"])
        alphas.append(network.nodes[node]["alpha"])
    for edge in network.edges(data=True):
        edge_colors.append(network[edge[0]][edge[1]]["color"])

    positions = nx.get_node_attributes(network, "pos")
    lc = LineCollection(curves, color=edge_colors, alpha=0.1)
    capital = nx.get_node_attributes(network, "capital")
    sizes = [100 if x == "sim" else 15 for x in list(capital.values())]
    # Plot
    fig, ax = plt.subplots(figsize=(8, 8))
    nx.draw_networkx_nodes(
        network, positions, node_size=sizes, alpha=alphas, node_color=colors, ax=ax
    )
    plt.gca().add_collection(lc)
    plt.tick_params(
        axis="both",
        which="both",
        bottom=False,
        left=False,
        labelbottom=False,
        labelleft=False,
    )

    number_files = str(len(os.listdir()))
    while len(number_files) < 3:
        number_files = "0" + number_files

    plt.show()

In [None]:
def visualize(network, title, pos):
    """
    Visualize the network given an array of posisitons.
    """
    print("-- Starting to Visualize --")

    colors = []
    i_edge_colors = []
    d_edge_colors = []
    default = []
    infected = []
    for node in network.nodes():
        colors.append(network.nodes[node]["color"])
    for i, j in network.edges():
        color = network.nodes[i]["color"]
        alpha = 0.75
        if color == (0.42355299, 0.16934709, 0.42581586) or color == (
            0.48942421,
            0.72854938,
            0.56751036,
        ):
            color = "#A6A6A6"
            default.append((i, j))
            d_edge_colors.append(color)
        else:
            color = "#A6A6A6"  # 29A229"
            infected.append((i, j))
            i_edge_colors.append(color)

    fig, ax = plt.subplots(figsize=(10, 10))

    # Fist pass - Gray lines
    nx.draw_networkx_edges(
        network,
        pos,
        connectionstyle="arc3,rad=0.9",
        edgelist=default,
        width=0.5,
        edge_color=d_edge_colors,
        alpha=0.5,
        arrows=False,
        ax=ax,
    )

    # Second Pass - Colored lines
    nx.draw_networkx_edges(
        network,
        pos,
        connectionstyle="arc3,rad=0.9",
        edgelist=infected,
        width=0.5,
        edge_color=i_edge_colors,
        alpha=0.75,
        arrows=False,
        ax=ax,
    )

    positions = nx.get_node_attributes(G, "pos")
    nx.draw_networkx_nodes(
        network,
        pos,
        linewidths=0.5,
        node_size=15,
        alpha=0.5,
        # with_labels=False,
        node_color=colors,
        ax=ax,
    )

    # Adjust the plot limits
    cut = 1.05
    xmax = cut * max(xx for xx, yy in pos.values())
    xmin = min(xx for xx, yy in pos.values())
    xmin = xmin - (cut * xmin)

    ymax = cut * max(yy for xx, yy in pos.values())
    ymin = (cut) * min(yy for xx, yy in pos.values())
    ymin = ymin - (cut * ymin)

    number_files = str(len(os.listdir()))
    while len(number_files) < 3:
        number_files = "0" + number_files

    plt.show()
    plt.close()

In [None]:
try:
    G = create_network(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/grafo_cidades_aeroportos.gexf"
    )
except:
    G = create_network("grafo_cidades_aeroportos.gexf")
infection_yes_no(
    G,
    casosGroupDf,
    vis=True,
    title="title",
)

In [None]:
try:
    G = create_network(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/grafo_cidades_aeroportos.gexf"
    )
except:
    G = create_network("grafo_cidades_aeroportos.gexf")
infection_cases(
    G,
    casosGroupDf,
    vis=True,
    title="title",
)

In [None]:
casosGroupDf["state"] = casosGroupDf.city_state.str.get(
    -3
) + casosGroupDf.city_state.str.get(-2)

In [None]:
try:
    G = create_network(
        "/content/gdrive/MyDrive/Graph Theory for Machine Learning/gcavali-repo-main/grafo_cidades.gexf"
    )
except:
    G = create_network("grafo_cidades.gexf")
G.remove_edges_from(nx.selfloop_edges(G))
selected_nodes = [n for n, v in G.nodes(data=True) if n[-3:-1] == "SP"]
G_sub = G.subgraph(selected_nodes)
infection_yes_no(
    G_sub,
    casosGroupDf[casosGroupDf.state == "SP"],
    vis=True,
    title="title",
)

In [None]:
infection_cases(
    G_sub, casosGroupDf[casosGroupDf.state == "SP"], vis=True, title="title", DELAY=0
)