In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from pathlib import Path
import itertools
import matplotlib.pyplot as plt

In [None]:
here_path = Path().resolve()
repo_path = here_path.parents[0]
#sys.path.append(str(repo_path))

In [None]:
here_path, repo_path

In [None]:
YEARS=["_2017", "_2018", "_2019", "_2020"]#, ""]

In [None]:
DATA_PATH = repo_path / "outputs/samples/"

### Simple ID

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    from_ = "OrigemID"
    to_ = "DestinoID"
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    self_cycles_len = len(data[(data[from_]==data[to_])])
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    
    from_nodes = data_[from_].unique() 
    to_nodes = data_[to_].unique()
    
    print("Total nodes simple id", len(set(itertools.chain(*[from_nodes, to_nodes]))))
    print("Total links simple id", len(data_))
    print("Self-cycles simple id", self_cycles_len)

### Self-Cycles

### MANEJOS

In [None]:
manejos_max = []

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    from_ = "OrigemID"
    to_ = "DestinoID"
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    manejos_df = data_[data_[from_].str.lower().str.contains("manejo")]
    
    print("MANEJOS")
    G_manejo = nx.DiGraph()
    
    for index, row in data_[data_[from_].isin(manejos_df[from_].values)].iterrows():
        G_manejo.add_edge(row[from_], row[to_], weight=row[weight_])
    
    out_degrees = np.array([node[1] for node in G_manejo.out_degree])
    
    plt.figure()
    plt.hist(out_degrees)
    plt.xlabel("Number of Nodes", fontsize=14)
    plt.ylabel("Degree", fontsize=14)
    plt.show()

    print("MANEJOS by Transactions")
    G_transaction = nx.DiGraph()
    transactions_df = pd.pivot_table(manejos_df, index=[from_], values=[to_], aggfunc={to_:len}).reset_index()
    max_by_transactions = transactions_df[transactions_df[to_] == transactions_df[to_].max()][from_].values[0]
    
    study_case = manejos_df[manejos_df[from_] == max_by_transactions]
    
    print(max_by_transactions, len(study_case), study_case[weight_].sum())
    
    for index, row in data_[data_[from_]==max_by_transactions].iterrows():
        G_transaction.add_edge(row[from_], row[to_], weight=row[weight_])
    
    plt.figure()
    plt.title(f"{year} - {max_by_transactions} - {len(study_case)} - {study_case[weight_].sum()}")
    nx.draw(G_transaction, node_size=150, node_color=['green' if node==max_by_transactions else 'blue'  for node in G_transaction.nodes ])
    plt.show()
    
    print("MANEJOS by Volume")
    G_weight = nx.DiGraph()
    weights_df = pd.pivot_table(manejos_df, index=[from_], values=[weight_], aggfunc={weight_:sum}).reset_index()
    max_by_weight = weights_df[weights_df[weight_] == weights_df[weight_].max()][from_].values[0]
    
    study_case = manejos_df[manejos_df[from_] == max_by_weight]
    
    print(max_by_weight, len(study_case), study_case[weight_].sum())
    
    for index, row in data_[data_[from_]==max_by_weight].iterrows():
        G_weight.add_edge(row[from_], row[to_], weight=row[weight_])
    
    plt.figure()
    plt.title(f"{year} - {max_by_weight} - {len(study_case)} - {study_case[weight_].sum()}")
    nx.draw(G_weight, node_size=150, node_color=['green' if node==max_by_weight else 'blue'  for node in G_weight.nodes ])
    plt.show()
    
    manejos_max.append(max_by_transactions)

### Maximos MANEJOS

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    from_ = "OrigemID"
    to_ = "DestinoID"
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    
    for man_id in manejos_max:
        G_manejo = nx.DiGraph()
    
        for index, row in data_[data_[from_].isin([man_id])].iterrows():
            G_manejo.add_edge(row[from_], row[to_], weight=row[weight_])
        
        plt.figure()
        plt.title(f"{man_id}-{year}")
        nx.draw(G_manejo, node_size=150, node_color=['green' if node==man_id else 'blue'  for node in G_manejo.nodes ])
        plt.show()
        

### FINAL

In [None]:
final_max = []

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    from_ = "OrigemID"
    to_ = "DestinoID"
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    final_df = data_[data_[to_].str.lower().str.contains("final")]
    
    print("FINAL")
    G_final = nx.DiGraph()
    
    for index, row in data_[data_[to_].isin(final_df[to_].values)].iterrows():
        G_final.add_edge(row[from_], row[to_], weight=row[weight_])
    
    in_degrees = np.array([node[1] for node in G_final.in_degree])
    
    plt.figure()
    plt.hist(in_degrees)
    plt.xlabel("Number of Nodes", fontsize=14)
    plt.ylabel("Degree", fontsize=14)
    plt.show()

    print("FINAL by Transactions")
    G_transaction = nx.DiGraph()
    transactions_df = pd.pivot_table(final_df, index=[to_], values=[from_], aggfunc={from_:len}).reset_index()
    max_by_transactions = transactions_df[transactions_df[from_] == transactions_df[from_].max()][to_].values[0]
    
    study_case = final_df[final_df[to_] == max_by_transactions]
    
    print(max_by_transactions, len(study_case), study_case[weight_].sum())
    
    for index, row in data_[data_[to_]==max_by_transactions].iterrows():
        G_transaction.add_edge(row[from_], row[to_], weight=row[weight_])
    
    plt.figure()
    plt.title(f"{year} - {max_by_transactions} - {len(study_case)} - {study_case[weight_].sum()}")
    nx.draw(G_transaction, node_size=150, node_color=['orange' if node==max_by_transactions else 'blue'  for node in G_transaction.nodes ])
    plt.show()
    
    print("FINAL by Volume")
    G_weight = nx.DiGraph()
    weights_df = pd.pivot_table(final_df, index=[to_], values=[weight_], aggfunc={weight_:sum}).reset_index()
    max_by_weight = weights_df[weights_df[weight_] == weights_df[weight_].max()][to_].values[0]
    
    study_case = final_df[final_df[to_] == max_by_weight]
    
    print()
    
    for index, row in data_[data_[to_]==max_by_weight].iterrows():
        G_weight.add_edge(row[from_], row[to_], weight=row[weight_])
    
    plt.figure()
    plt.title(f"{year} - {max_by_weight} - {len(study_case)} - {study_case[weight_].sum()}")
    nx.draw(G_weight, node_size=150, node_color=['orange' if node==max_by_weight else 'blue'  for node in G_weight.nodes ])
    
    plt.show()
    
    final_max.append(max_by_transactions)

### Maximos Final

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    from_ = "OrigemID"
    to_ = "DestinoID"
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    
    for final_id in final_max:
        G_final = nx.DiGraph()
    
        for index, row in data_[data_[to_].isin([final_id])].iterrows():
            G_final.add_edge(row[from_], row[to_], weight=row[weight_])
        
        plt.figure()
        plt.title(f"{final_id}-{year}")
        nx.draw(G_final, node_size=150, node_color=['orange' if node==final_id else 'blue' for node in G_final.nodes ])
        plt.show()
        

### Composed ID

In [None]:
for year in YEARS:
    data = pd.read_csv(f"{DATA_PATH}/years{year}.csv", sep=",", low_memory=False)
    
    data["OrigID"] = data["OrigemID"].map(str) \
                    + "|" + data["MunOrigem"].map(str) \
                    + "|" + data["LatOrigem"].map(str) \
                    + "|" + data["LongOrigem"].map(str) \
                    + "|" + data["NomeOrigem"].map(str) 
    data["DestID"] = data["DestinoID"].map(str) \
            + "|" + data["MunDestino"].map(str) \
            + "|" + data["LatDestino"].map(str) \
            + "|" + data["LongDestino"].map(str) \
            + "|" + data["NomeDestino"].map(str) 
    from_ = "OrigID" 
    to_ = "DestID" 
    weight_ = "Volume"
    
    data_G = pd.pivot_table(data[[from_, to_, weight_]], 
                          index=[from_, to_],
                          values=[weight_],
                          aggfunc={
                                     weight_:sum
                                 }
                          ).reset_index()
    
    print("Year", year)
    
    self_cycles_len = len(data[(data[from_]==data[to_])])
    
    data_ = data_G[~(data_G[from_] == data_G[to_])].copy()
    
    from_nodes = data_[from_].unique() 
    to_nodes = data_[to_].unique()
    
    print("Total nodes simple id", len(set(itertools.chain(*[from_nodes, to_nodes]))))
    print("Total links simple id", len(data_))
    print("Self-cycles simple id", self_cycles_len)