In [1]:
import pandas
from datetime import date

dataset = pandas.read_csv("dataset.tsv",sep="\t")
df_obj = dataset.select_dtypes(['object'])
dataset[df_obj.columns] = df_obj.apply(lambda x: x.str.strip())
dataset["Name"] = dataset.Genus + " " + dataset.Species

md = ""
Introduction = """# Cnidarian/Ctenophore Resources
This is a collection of links to different datasets of Cnidarian and Ctenophore resources.  
It is unlikely that this is a complete list so if I am missing something please let me know.\n
If this page has helped you to save time or you just like the idea of creating this resouces for the cnidarian community let me know at the next conference or Buy me a [virtual coffee](https://www.buymeacoffee.com/Xylo).
*Contributers:*
- B. Gideon Bergheim, Özbek Group, University Heidelberg, 
Contact: email: gideon.bergheim@cos.uni-heidelberg.de Twitter: [@XyloGideon](https://twitter.com/XyloGideon)
\n\n"""


md += Introduction

Species_count = len(dataset.Name.unique())

md += "Current number of species: {}<br>".format(Species_count)
md += "Last updated: {}\n\n".format(date.today())

def clean(str):
    return str.lower().replace(" ","-")

#toc
for phylum in dataset.Phylum.unique():
    md += "&nbsp;"*4+"[{}](#{})<br>".format(phylum, clean(phylum))
    df_phylum = dataset.loc[dataset.Phylum == phylum]
    for subphylum in df_phylum.Subphylum.unique():
        md += "&nbsp;"*8+"[{}](#{})<br>".format(subphylum, clean(subphylum))
        df_subphylum = df_phylum.loc[df_phylum.Subphylum == subphylum]
        for _class in df_subphylum.Class.unique():
            md += "&nbsp;"*12+"[{}](#{})<br>".format(_class, clean(_class))
            df_class = df_subphylum.loc[df_phylum.Class == _class]
            for genus in df_class.Genus.unique():
                md += "&nbsp;"*16+"[{}](#{})<br>".format(genus, clean(genus))
                df_genus = df_class.loc[df_class.Genus == genus]
                for species in df_genus.Species.unique():
                    df_species = df_genus.loc[df_class.Species == species]
                    species_resources = df_species.Resource_type.unique()
                    resource_list = ", ".join(species_resources)
                    md += "&nbsp;"*20+"- *[{}](#{})* ({})<br>".format("{} {}".format(genus,species),clean("{} {}".format(genus,species)),resource_list)
md += "[References](#references)\n\n"

#data
for phylum in dataset.Phylum.unique():
    phylum_header = "\n## {}\n".format(phylum)
    md += phylum_header
    df_phylum = dataset.loc[dataset.Phylum == phylum]
    for subphylum in df_phylum.Subphylum.unique():
        subphylum_header = "### {}\n".format(subphylum)
        md += subphylum_header
        df_subphylum = df_phylum.loc[df_phylum.Subphylum == subphylum]
        for _class in df_subphylum.Class.unique():
            class_header = "#### {}\n".format(_class)
            md += class_header
            df_class = df_subphylum.loc[df_phylum.Class == _class]
            for genus in sorted(df_class.Genus.unique()):
                genus_header = "##### {}\n".format(genus)
                md += genus_header
                df_genus = df_class.loc[df_class.Genus == genus]
                for species in sorted(df_genus.Species.unique()):
                    species_header = "###### *{} {}*\n".format(genus,species)
                    md += species_header + "\n"

                    df_species = df_genus.loc[df_class.Species == species]
                    for resource in dataset.Resource_type.unique():
                        rows = df_species.loc[df_species.Resource_type == resource]
                        if len(rows) == 0:
                            continue
                        for _,row in rows.iterrows():

                            link = row["Link"]
                            doi = row["DOI"]   
                            doi_link = ""       
                            if type(doi) == str:
                                doi_link = "https://doi.org/"+ doi

                            if not pandas.isnull(row.Publication):
                                pub = row.Publication.split(" ")[0] +" et al."
                            else:
                                pub= ""
                            md += "<a href='{}'><img src='icons/{}.svg'></a>".format(link,resource) 
                            md += "- [{}]({})\t\tPublication: {}\t\t DOI: [{}]({})\n\n".format(resource,link,pub,doi,doi_link)
                    md += "<hr class='Species'>\n\n"


md += "# References\n"
p = dataset.Publication.dropna()
for publication in sorted(p.unique()):
    if type(publication) == str:
        s_pub = publication.split(" ")
        formatted = "> **{}** ".format(s_pub[0]) + " ".join(s_pub[1:]) + "\n\n"
        md += formatted


with open("README.md","w",encoding="utf-8") as f:
    f.write(md)                  