In [1]:
import pandas as pd
import tabulate
import numpy as np

In [2]:
def to_fwf(df, fname, cols=None):
    """Custom method 'to_fwf' for Pandas

    Parameters
    ----------
    fname : str
        The path to the new file in which the hidden parameters will be written
    cols : list or array of strings, optional
        A list or an array containing the name of the columns (as strings) to
        be written in the file (if None all columns will be written).
    """

    if cols is None:
        cols = df.columns

    header = list(cols)
    header[0] = '# ' + header[0]
    content = tabulate.tabulate(df[cols].values.tolist(), header,
                                tablefmt='plain', stralign='right',
                                colalign=('left',))
    open(fname, 'w').write(content)


# Adds the custom method to Pandas.
pd.DataFrame.to_fwf = to_fwf

In [3]:
graphPropFilename = '../graphs/graph_data/graph_properties_augmented.txt'
header = open(graphPropFilename, 'r').readline().replace('#', ' ').split()
graphPropDF = pd.read_table(graphPropFilename, names=header, comment="#", delimiter=r"\s+")
graphPropDF.set_index('name', inplace=True)

effRanksFilename = '../singular_values/properties/effective_ranks.txt'
header = open(effRanksFilename, 'r').readline().replace('#', ' ').split()
effRanksDF = pd.read_table(effRanksFilename, names=header, comment="#", delimiter=r"\s+")
effRanksDF.rename(columns={'(un)dir': 'Directed'}, inplace=True)
effRanksDF.rename(columns={'Name': 'name'}, inplace=True)
effRanksDF.set_index('name', inplace=True)

fullDF = pd.merge(graphPropDF, effRanksDF, left_index=True, right_index=True)
fullDF.reset_index(inplace=True)

# ['name', '(un)dir', '(un)weighted', 'uni/bi-partite', 'selfloops', 'multiedges', 'nbVertices', 'nbEdges', 'density',
# 'averageDegree', 'tags', 'weightTag', 'Size', 'Rank', 'OptimalThreshold', 'OptimalShrinkage', 'Erank', 'Elbow',
# 'EnergyRatio', 'StableRank', 'NuclearRank']

fullDF.columns = ['Name', 'Directed', 'Weighted', 'Bipartite', 'Selfloops', 'Multiedges', 'nbVertices', 'nbEdges', 'Density', 'binDensity', 'matDensity', 'averageDegree',
 'Tags', 'WeightTag', 'url', 'Size', 'rank', 'thrank', 'shrank', 'erank', 'elbow', 'energy', 'srank', 'nrank']

fullDF['nbEdges'] = np.round(fullDF.nbEdges).astype('Int64')
fullDF['averageDegree'] = np.round(fullDF.averageDegree, 1).astype('float')
fullDF['srank'] = np.round(fullDF.srank, 1).astype('float')
fullDF['nrank'] = np.round(fullDF.nrank, 1).astype('float')
fullDF['erank'] = np.round(fullDF.erank, 1).astype('float')

fullDF.drop('Density', axis=1, inplace=True)
fullDF.drop('WeightTag', axis=1, inplace=True)
fullDF.drop('Size', axis=1, inplace=True)

fullDF['Name\href'] = '\\href{' + fullDF['url'] + '}{' + fullDF['Name'] + '}'

fullDF

Unnamed: 0,Name,Directed,Weighted,Bipartite,Selfloops,Multiedges,nbVertices,nbEdges,binDensity,matDensity,...,url,rank,thrank,shrank,erank,elbow,energy,srank,nrank,Name\href
0,7th_graders,directed,unweighted,unipartite,noselfloops,multiedges,29,740,0.463054,0.447087,...,https://manliodedomenico.com/data.php,29,2,3,18.8,4,10,1.7,4.6,\href{https://manliodedomenico.com/data.php}{7...
1,AT_2008,undirected,unweighted,,noselfloops,nomultiedges,2271,4648,0.000902,0.000901,...,https://doi.org/10.1007/s41060-019-00204-1,842,82,108,687.5,376,410,51.6,170.0,\href{https://doi.org/10.1007/s41060-019-00204...
2,CY_2015,undirected,unweighted,,noselfloops,nomultiedges,335,616,0.005505,0.005489,...,https://doi.org/10.1007/s41060-019-00204-1,108,6,8,90.1,108,54,14.6,33.3,\href{https://doi.org/10.1007/s41060-019-00204...
3,EE_2010,undirected,unweighted,,noselfloops,nomultiedges,1260,2604,0.001642,0.001640,...,https://doi.org/10.1007/s41060-019-00204-1,312,20,34,227.5,60,114,6.1,29.9,\href{https://doi.org/10.1007/s41060-019-00204...
4,PT_2009,undirected,unweighted,,noselfloops,nomultiedges,1257,2418,0.001532,0.001530,...,https://doi.org/10.1007/s41060-019-00204-1,560,16,36,471.7,128,309,19.0,84.4,\href{https://doi.org/10.1007/s41060-019-00204...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
674,word_adjacency_spanish,directed,unweighted,unipartite,noselfloops,nomultiedges,11586,45129,0.000336,0.000336,...,https://doi.org/10.1126/science.1089167,2794,415,499,1634.0,170,423,10.7,92.9,\href{https://doi.org/10.1126/science.1089167}...
675,yeast_transcription,directed,unweighted,unipartite,selfloops,nomultiedges,916,1094,0.001304,0.001304,...,http://www.weizmann.ac.il/mcb/UriAlon/download...,123,8,10,97.8,18,58,10.9,29.6,\href{http://www.weizmann.ac.il/mcb/UriAlon/do...
676,zebrafish_meso,directed,weighted,,selfloops,nomultiedges,71,1427,0.283079,0.283079,...,http://dx.doi.org/10.1016/j.neuron.2019.04.034,71,6,10,41.5,6,17,1.8,6.5,\href{http://dx.doi.org/10.1016/j.neuron.2019....
677,zebrafish_rnn,directed,weighted,,noselfloops,nomultiedges,4589,2106610,0.100056,0.100034,...,http://dx.doi.org/%2010.1016/j.neuron.2021.06.007,4589,1103,1258,2336.3,262,943,16.5,160.3,\href{http://dx.doi.org/%2010.1016/j.neuron.20...


In [4]:
cols_to_keep = ['Name\href', 'nbVertices', 'rank', 'srank', 'nrank', 'elbow',  'energy', 'thrank', 'shrank', 'erank']
with open('datasets_table.tex', 'w') as f:
    data_to_keep = fullDF[cols_to_keep].values.tolist()
    cols_to_keep[0] = 'Name'
    data_to_keep.insert(0, cols_to_keep)
    file_content = tabulate.tabulate(data_to_keep, tablefmt='latex_longtable')
    file_content = file_content.replace('\{', '{')
    file_content = file_content.replace('\}', '}')
    file_content = file_content.replace('textbackslash{}', '')
    file_content = file_content.replace("\\textasciitilde{}", '~')
    file_content = file_content.replace("networks/wikipedia\_link\_", "networks/wikipedia_link_")
    file_content = file_content.replace("networks/wiki\_talk\_", "networks/wiki_talk_")
    file_content = file_content.replace("articles/A\_Wikipedia\_Based\_Map\_of\_Science/11638932", "articles/A_Wikipedia_Based_Map_of_Science/11638932")
    file_content = file_content.replace("networks/moreno\_", "networks/moreno_")
    file_content = file_content.replace("dfci.harvard.edu/C\_elegans", "dfci.harvard.edu/C_elegans")
    file_content = file_content.replace("networks/brunson\_", "networks/brunson_")
    file_content = file_content.replace("dfci.harvard.edu/S\_cerevisiae", "dfci.harvard.edu/S_cerevisiae")
    file_content = file_content.replace("networks/radoslaw\_", "networks/radoslaw_")
    file_content = file_content.replace("networks/subelj\_", "networks/subelj_")
    file_content = file_content.replace("transtats.bts.gov/DL\_SelectFields.asp?Table\_ID=310", "transtats.bts.gov/DL_SelectFields.asp?Table_ID=310")
    file_content = file_content.replace("iwdb.nceas.ucsb.edu/html/vazquez\_", "iwdb.nceas.ucsb.edu/html/vazquez_")
    file_content = file_content.replace("iwdb.nceas.ucsb.edu/html/kato\_1", "iwdb.nceas.ucsb.edu/html/kato_1")
    file_content = file_content.replace("data\_malaria\_PLOSCompBiology\_2013", "data_malaria_PLOSCompBiology_2013")
    file_content = file_content.replace("data/kegg2006\_", "data/kegg2006_")
    file_content = file_content.replace("2015/07/High-School\_data\_2013", "2015/07/High-School_data_2013")
    file_content = file_content.replace("~snijders/siena/Zeggelink\_data", "~snijders/siena/Zeggelink_data")
    file_content = file_content.replace("html/thomps\_towns", "html/thomps_towns")
    file_content = file_content.replace("American\_College\_Football\_Network\_Files", "American_College_Football_Network_Files")
    file_content = file_content.replace("~snijders/siena/tutorial2010\_data", "~snijders/siena/tutorial2010_data")
    f.write(file_content)

In [5]:
fullDF.to_fwf('datasets_table.txt')