In [199]:
# %load build_datasets
from typing import Dict, List, Tuple

import os
from pathlib import Path
import pandas as pd
import numpy as np
#this is a change

# This is kind of the main routine, it is used to build a dataset website from a csvfile
def build_pages_for_dataset(
        csv_file: str, #name of the csv_file without extension
        dataset_id: str, #inner id for the dataset, eg. "chiralMaps"
        dataset_title: str, #actual title to be displayed on the website, eg. "Chiral maps up to 6000 edges"
        max_rows :int =10000,
) -> None :
    
    data=pd.read_csv(f"../csv/{csv_file}.csv")#reads csv file
    preprocess_DataSet(data)
    
    if not os.path.exists(f'../{dataset_id}'):
        os.mkdir(f'../{dataset_id}')

    build_tables_from_csv(data,max_rows,dataset_id,dataset_title)
    if not os.path.exists(f'../_datasets/{dataset_id}.md'): #only runs if the file does not exists, to avoid overwriting
        build_dataset_page(dataset_id,dataset_title)


def build_tables_from_csv(
  data, #pandas dataset
  max_rows: int, 
  dataset_id: str, #inner id for the dataset, eg. "chiralMaps"
  dataset_title: str #actual title (on the website) eg "Chiral maps up to 6000 edges"
 ) -> None :
    data_split_list=[]
    for i in range(len(data)//max_rows):
        data_split_list.append(data.iloc[max_rows*i:max_rows*(i+1)])
    data_split_list.append(data.iloc[max_rows*(len(data)//max_rows):])

    for i in range(len(data_split_list)):
        frontmatter=f'---\nlayout: default\ndataset: {dataset_id}\ndataset_title: {dataset_title}\nfirst_entry: {data_split_list[i].iat[0,0]}\nlast_entry: {data_split_list[i].iat[-1,0]}\n---\n\n'
        html=data_split_list[i].to_html(index=False)
        html=html.replace('border="1"', 'id="myTable"')
        html=html.replace('class="dataframe"','class="display compact" style="width=100%"')
        html=html.replace('style="text-align: right;"','')
        with open(f'../_tables/{dataset_id}{i}.md', 'w') as md_file:
            md_file.write(frontmatter)
            md_file.write(f"The following table contains the entries from {{{{ page.first_entry }}}} to {{{{ page.last_entry }}}} of the dataset of [{{{{ page.dataset_title }}}}]( /datasets/{dataset_id} ).\n ")
            md_file.write(html)

def build_dataset_page(
  dataset_id: str, #inner id for the dataset, eg. "chiralMaps"
  dataset_title: str #actual title (on the website) eg "Chiral maps up to 6000 edges"
) -> None:
    with open(f'../_datasets/{dataset_id}.md', 'w') as dataset_md:             
        md_text=f"--- \nlayout: page\ntitle: {dataset_title}\n---\n"
        md_text=md_text+f"### Tables \n<ol>\n{{% for post in site.tables %}}\n  {{% if post.dataset == '{dataset_id}' %}}\n <li> A <a href= \"{{{{ site.url }}}}{{{{ post.url | relative_url }}}}\" > table </a> containing the entries from {{{{ post.first_entry }}}} to {{{{ post.last_entry }}}} </li>\n{{% endif %}}{{% endfor %}} \n </ol>\n\n\n### Resources"
        dataset_md.write(md_text)

def populate_a_dataEntry_page(
    dataEntry, #an entry o pandas
    toHumanDict: Dict, #a dictionary with the columns of the dataset translated to readable language
    dataset_id: str, #the id of the dataset which it belongs
    dataset_title: str #actual title (on the website) eg "Chiral maps up to 6000 edges"
) -> None:
    dataDict=dataEntry.to_dict()
    frontmatter=f'--- \n permalink: /{dataset_id}/{dataEntry.ID_url} \n collection: {dataset_id}\n layout: dataEntry\n title: {dataset_title} : {dataEntry.ID}\n---\n\n'
    with open(f'../_{dataset_id}/{dataEntry.ID_url}.md', 'w') as md_file:
        md_file.write(frontmatter)
        #iterate over the entries of the dictionary and populate
        for k in toHumanDict:
            if len(toHumanDict[k])>1 and not isinstance(toHumanDict[k], str):
                md_file.write("- **"+toHumanDict[k][0]+"**: ["+str(dataDict[k])+"]("+toHumanDict[k][1]+ str(dataDict[k+"_url"])+")\n")
            else:
                md_file.write("- **"+toHumanDict[k]+"**: "+str(dataDict[k])+"\n")

def create_dataPages_for_Dataset(
    dataset_id:str, #dataset_id
    dataSet, #a pandas Dataset
    dataset_title: str, #actual title (on the website) eg "Chiral maps up to 6000 edges"
    toHumanDict: Dict, #a dictionary with the columns of the dataset translated to readable language
) -> None:
  if not os.path.exists(f'../_{dataset_id}'):
        os.mkdir(f'../_{dataset_id}')
  for index,data in dataSet.iterrows():
      populate_a_dataEntry_page(data,toHumanDict,dataset_id,dataset_title)

def preprocess_DataSet(
    dataset, #a pandas dataset
    cols_to_url: List, #columns that need to be url-ised
)->None:
    dataset.replace(np.nan, '', regex=True)
    for col in cols_to_url:
        if col in dataset.columns:
            dataset[col+"_url"]=dataset[col].str.replace("[","_")
            dataset[col+"_url"]=dataset[col+"_url"].str.replace(";","_")
            dataset[col+"_url"]=dataset[col+"_url"].str.replace("]","")




In [180]:
dataset_id="test"
if not os.path.exists(f'../{dataset_id}'):
        os.mkdir(f'../{dataset_id}')

In [173]:
dataSet=pd.read_csv("../csv/test.csv")
preprocess_DataSet(dataSet,["ID","Du","Mir","DuMir","Sk","PlhSk"])
dataSet

Unnamed: 0,ID,genus,p,q,r,solv,V,E,F,vMult,...,plt,plh,Sk,PlhSk,ID_url,Du_url,Mir_url,DuMir_url,Sk_url,PlhSk_url
0,CM[10;1],1,4,4,10,S,5,10,5,1,...,Y,N,Sk(5;2),,CM_10_1,CM_10_1,CM_10_2,CM_10_2,Sk(5_2),
1,CM[10;2],1,4,4,10,S,5,10,5,1,...,Y,N,Sk(5;2),,CM_10_2,CM_10_2,CM_10_1,CM_10_1,Sk(5_2),
2,CM[20;1],1,4,4,10,S,10,20,10,1,...,Y,Y,Sk(10;3),PlhSk(10;2),CM_20_1,CM_20_1,CM_20_2,CM_20_2,Sk(10_3),PlhSk(10_2)
3,CM[20;2],1,4,4,10,S,10,20,10,1,...,Y,Y,Sk(10;3),PlhSk(10;2),CM_20_2,CM_20_2,CM_20_1,CM_20_1,Sk(10_3),PlhSk(10_2)
4,CM[21;1],1,3,6,14,S,7,21,14,1,...,Y,Y,Sk(7;2),PlhSk(7;1),CM_21_1,CM_21_4,CM_21_2,CM_21_3,Sk(7_2),PlhSk(7_1)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114,CM[80;7],11,8,4,40,S,40,80,20,1,...,Y,Y,Sk(40;7),,CM_80_7,CM_80_6,CM_80_8,CM_80_5,Sk(40_7),
115,CM[80;8],11,8,4,40,S,40,80,20,1,...,Y,Y,Sk(40;7),,CM_80_8,CM_80_5,CM_80_7,CM_80_6,Sk(40_7),
116,CM[80;9],11,8,4,40,S,40,80,20,1,...,Y,Y,Sk(40;8),,CM_80_9,CM_80_4,CM_80_10,CM_80_3,Sk(40_8),
117,CM[80;10],11,8,4,40,S,40,80,20,1,...,Y,Y,Sk(40;8),,CM_80_10,CM_80_3,CM_80_9,CM_80_4,Sk(40_8),


In [177]:
testDict={'ID':'ID',
          'genus':'Genus of the underlying surface',
          'p':'Length of the face',
          'q':'Valency',
          'r':'Length of the Petrie polygon',
          'solv':'Is the automorphism group solvable?',
          'V':'Number of vertices',
          'E':'Number of edges',
          'F':'Number of faces',
          'vMult':'Vertex multiplicity',
          'fMult':'Face multiplicity',
          'self':'The map is self-',
          'Du':['Dual map','../test/'],
          'Mir':['Mirror (enantihomorphic) map',"../test/"],
          'DuMir':['Dual of the mirror image',"../test/"],
          'Zq*:Exp':'Z_q-Exponent?',
          'Hj':'No idea',
          'plt':'No idea',
          'plh':'No idea',
          'Sk':'Skeleton',
          'PlhSk':'Some other kind of skeleton'
          }


In [200]:
create_dataPages_for_Dataset("test",dataSet,"Test dataset", testDict)

TypeError: create_dataPages_for_Dataset() missing 1 required positional argument: 'toHumanDict'

In [157]:
def url_from_id(
    id:str #id
)-> str: #the id but in a URL-friendly format
  id=id.replace("[","_")
  id=id.replace("]","")
  id=id.replace(";","_")
  return id

In [158]:
url_from_id("CM[10;1]")

'CM_10_1'