In [1]:
import sys, warnings
import numpy as np
import pandas as pd
import networkx as nx
from collections import defaultdict
from scipy.stats import binom
import os

filename = "../data/country_flight_country_w1.csv"

In [25]:
def read(filename, column_of_interest, triangular_input = False, consider_self_loops = True, undirected = False, drop_zeroes = True, sep = "\t"):
    """Reads a field separated input file into the internal backboning format (a Pandas Dataframe).
    The input file should have three or more columns (default separator: tab).
    The input file must have a one line header with the column names.
    There must be two columns called 'src' and 'trg', indicating the origin and destination of the interaction.
    All other columns must contain integer or floats, indicating the edge weight.
    In case of undirected network, the edges have to be present in both directions with the same weights, or set triangular_input to True.

    Args:
    filename (str): The path to the file containing the edges.
    column_of_interest (str): The column name identifying the weight that will be used for the backboning.

    KWArgs:
    triangular_input (bool): Is the network undirected and are the edges present only in one direction? default: False
    consider_self_loops (bool): Do you want to consider self loops when calculating the backbone? default: True
    undirected (bool): Is the network undirected? default: False
    drop_zeroes (bool): Do you want to keep zero weighted connections in the network? Important: it affects methods based on degree, like disparity_filter. default: False
    sep (char): The field separator of the inout file. default: tab

    Returns:
    The parsed network data, the number of nodes in the network and the number of edges.
    """
    table = pd.read_csv(filename,sep=',')
    table = table[["src", "trg", column_of_interest]]
    table.rename(columns = {column_of_interest: "nij"}, inplace = True)
    if drop_zeroes:
        table = table[table["nij"] > 0]
        print("potatoe")
    if not consider_self_loops:
        table = table[table["src"] != table["trg"]]
        print("apple")
    if triangular_input:
        table = make_symmetric(table)
        print("cinamon")
    original_nodes = len(set(table["src"]) | set(table["trg"]))
    original_edges = table.shape[0]
    if undirected:
        return table, original_nodes, original_edges / 2
    else:
        return table, original_nodes, original_edges

In [26]:
a = read(filename, "weights", sep = ',')
a

potatoe


(              src           trg  nij
 0     Afghanistan    Azerbaijan    1
 1     Afghanistan         India    1
 2     Afghanistan          Iran    1
 3     Afghanistan        Kuwait    1
 4     Afghanistan      Pakistan    1
 ...           ...           ...  ...
 4553     Zimbabwe        Malawi    1
 4554     Zimbabwe       Namibia    1
 4555     Zimbabwe  South Africa    1
 4556     Zimbabwe      Tanzania    1
 4557     Zimbabwe        Zambia    1
 
 [4558 rows x 3 columns],
 225,
 4558)