In [1]:
import pandas as pd
import tabulate
import numpy as np

In [6]:
def to_fwf(df, fname, cols=None):
    """Custom method 'to_fwf' for Pandas

    Parameters
    ----------
    fname : str
        The path to the new file in which the hidden parameters will be written
    cols : list or array of strings, optional
        A list or an array containing the name of the columns (as strings) to
        be written in the file (if None all columns will be written).
    """

    if cols is None:
        cols = df.columns

    header = list(cols)
    header[0] = '# ' + header[0]
    content = tabulate.tabulate(df[cols].values.tolist(), header,
                                tablefmt='plain', stralign='right',
                                colalign=('left',))
    open(fname, 'w').write(content)


# Adds the custom method to Pandas.
pd.DataFrame.to_fwf = to_fwf

In [5]:
graphPropFilename = '../graphs/graph_data/graph_properties_augmented.txt'
header = open(graphPropFilename, 'r').readline().replace('#', ' ').split()
graphPropDF = pd.read_table(graphPropFilename, names=header, comment="#", delimiter=r"\s+")
graphPropDF.set_index('name', inplace=True)

effRanksFilename = '../singular_values/properties/effective_ranks.txt'
header = open(effRanksFilename, 'r').readline().replace('#', ' ').split()
effRanksDF = pd.read_table(effRanksFilename, names=header, comment="#", delimiter=r"\s+")
effRanksDF.rename(columns={'(un)dir': 'Directed'}, inplace=True)
effRanksDF.rename(columns={'Name': 'name'}, inplace=True)
effRanksDF.set_index('name', inplace=True)

fullDF = pd.merge(graphPropDF, effRanksDF, left_index=True, right_index=True)
fullDF.reset_index(inplace=True)

# ['name', '(un)dir', '(un)weighted', 'uni/bi-partite', 'selfloops', 'multiedges', 'nbVertices', 'nbEdges', 'density',
# 'averageDegree', 'tags', 'weightTag', 'Size', 'Rank', 'OptimalThreshold', 'OptimalShrinkage', 'Erank', 'Elbow',
# 'EnergyRatio', 'StableRank', 'NuclearRank']

fullDF.columns = ['Name', 'Directed', 'Weighted', 'Bipartite', 'Selfloops', 'Multiedges', 'nbVertices', 'nbEdges', 'Density', 'binDensity', 'averageDegree',
 'Tags', 'WeightTag', 'url', 'Size', 'rank', 'thrank', 'shrank', 'erank', 'elbow', 'energy', 'srank', 'nrank']

fullDF['nbEdges'] = np.round(fullDF.nbEdges).astype('Int64')
fullDF['averageDegree'] = np.round(fullDF.averageDegree, 1).astype('float')
fullDF['srank'] = np.round(fullDF.srank, 1).astype('float')
fullDF['nrank'] = np.round(fullDF.nrank, 1).astype('float')
fullDF['erank'] = np.round(fullDF.erank, 1).astype('float')

fullDF.drop('Density', axis=1, inplace=True)
fullDF.drop('WeightTag', axis=1, inplace=True)
fullDF.drop('Size', axis=1, inplace=True)

fullDF

Unnamed: 0,Name,Directed,Weighted,Bipartite,Selfloops,Multiedges,nbVertices,nbEdges,binDensity,averageDegree,Tags,url,rank,thrank,shrank,erank,elbow,energy,srank,nrank
0,7th_graders,directed,unweighted,unipartite,noselfloops,multiedges,29,740,0.447087,25.5,"Social,Offline,Multilayer",https://manliodedomenico.com/data.php,29,2,3,18.8,4,10,1.7,4.6
1,AT_2008,undirected,unweighted,,noselfloops,nomultiedges,2271,4648,0.000902,4.1,Economic,http://,842,82,108,687.5,376,410,51.6,170.0
2,CY_2015,undirected,unweighted,,noselfloops,nomultiedges,335,616,0.005505,3.7,Economic,http://,108,6,8,90.1,108,54,14.6,33.3
3,EE_2010,undirected,unweighted,,noselfloops,nomultiedges,1260,2604,0.001642,4.1,Economic,http://,312,20,34,227.5,60,114,6.1,29.9
4,PT_2009,undirected,unweighted,,noselfloops,nomultiedges,1257,2418,0.001532,3.8,Economic,http://,560,16,36,471.7,128,309,19.0,84.4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675,word_adjacency_spanish,directed,unweighted,unipartite,noselfloops,nomultiedges,11586,45129,0.000336,3.9,"Informational,Language",https://doi.org/10.1126/science.1089167,2794,415,499,1634.0,170,423,10.7,92.9
676,yeast_transcription,directed,unweighted,unipartite,selfloops,nomultiedges,916,1094,0.001305,1.2,"Biological,Generegulation",http://www.weizmann.ac.il/mcb/UriAlon/download...,123,8,10,97.8,18,58,10.9,29.6
677,zebrafish_meso,directed,weighted,,selfloops,nomultiedges,71,1427,0.287123,20.1,"Biological,Connectome",http://,71,6,10,41.5,6,17,1.8,6.5
678,zebrafish_rnn,directed,weighted,,noselfloops,nomultiedges,4589,2106610,0.100034,459.1,Learned,http://,4589,1103,1258,2336.3,262,943,16.5,160.3


In [7]:
cols_to_keep = ['Name', 'nbVertices', 'nbEdges', 'binDensity', 'rank', 'srank', 'nrank', 'elbow',  'energy', 'thrank', 'shrank', 'erank']
with open('datasets_table.tex', 'w') as f:
    data_to_keep = fullDF[cols_to_keep].values.tolist()
    data_to_keep.insert(0, cols_to_keep)
    f.write(tabulate.tabulate(data_to_keep, tablefmt='latex_longtable'))
fullDF[cols_to_keep].to_fwf('datasets_table.txt')