In [None]:
import pandas

: 

In [None]:
dataset = pandas.read_csv("dataset.tsv",sep="\t")
dataset = dataset.set_index(["Phylum","Subphylum","Class","Genus"])
dataset

: 

In [964]:
def construct_dataset_table_row(dataset_url, dataset_doi, publication, publication_doi, additional_info):
  # If any of the input values are missing, set them to '-'
  if dataset_url is None:
    dataset_url = '-'
  if dataset_doi is None:
    dataset_doi = '-'
  if publication is None:
    publication = '-'
  if publication_doi is None:
    publication_doi = '-'
  if additional_info is None:
    additional_info = '-'
  
  # Define a template string for a table row, with placeholders for the input values
  row_template = '''                            <tr>
                                <td>{}</td>
                                <td>{}</td>
                                <td>{}</td>
                                <td>{}</td>
                                <td>{}</td>
                              </tr>'''
  
  # Construct the dataset link element
  if dataset_url == '-':
    dataset_link = '-'
  else:
    dataset_link = '<a href="{}">Link to Dataset</a>'.format(dataset_url)
  
  # Construct the dataset DOI element
  if dataset_doi == '-':
    dataset_doi_element = '-'
  else:
    dataset_doi_element = '<a href="https://doi.org/{}">{}</a>'.format(dataset_doi, dataset_doi)
  
  # Construct the publication DOI element
  if publication_doi == '-':
    publication_doi_element = '-'
  else:
    publication_doi_element = '<a href="https://doi.org/{}">{}</a>'.format(publication_doi, publication_doi)
  
  # Return the template string with the input values inserted into the placeholders
  return row_template.format(dataset_link, dataset_doi_element, publication, publication_doi_element, additional_info)

print(construct_dataset_table_row("https://marinegenomics.oist.jp/aacu/viewer/dow", "", "Bergheim et al", "10.1093/molbev/msaa216", ""))

                            <tr>
                                <td><a href="https://marinegenomics.oist.jp/aacu/viewer/dow">Link to Dataset</a></td>
                                <td><a href="https://doi.org/"></a></td>
                                <td>Bergheim et al</td>
                                <td><a href="https://doi.org/10.1093/molbev/msaa216">10.1093/molbev/msaa216</a></td>
                                <td></td>
                              </tr>


In [965]:
table_header = """\n                              <tr>
                                <th>Dataset Link</th>
                                <th>Dataset DOI</th>
                                <th>Publication</th>
                                <th>Publication DOI</th>
                                <th>Additional Information</th>
                              </tr>
  """

def construct_dataset_table(genus, species, dataset_type, dataset_rows):
    """Constructs an HTML table for a given dataset type and rows."""
    # Replace spaces in dataset type with underscores
    clean_resource_type = dataset_type.replace(" ", "_")

    # Initialize empty list of rows
    rows = []
    for index, row in dataset_rows.iterrows():
        # Append HTML for a single table row to the list
        rows.append(construct_dataset_table_row(
            row.dataset_url,
            row.dataset_doi,
            row.publication,
            row.publication_doi,
            row.information
        ))

    # Join rows with newline characters and indent
    rows = "\n".join(rows)

    # Construct the HTML table with the header and rows
    table = (
        f"\n                          <table class='hidden resource_table {clean_resource_type}' id='{genus}_{species}_{clean_resource_type}_table'>"
        + table_header
        + rows
        + "\n                          </table>"
    )

    return table
  
#print(construct_dataset_table("Acropora","acuminata","Genome Assembly",test_row))

In [966]:
def construct_species_dataset_revealer(genus, species, species_df, resource_type):
    """Constructs an HTML element that reveals a table of datasets for a given resource type."""
    # Get rows of the species dataframe for the given resource type
    dataset_rows = species_df.loc[species_df.Resource_type == resource_type]

    # Replace spaces in resource type with underscores
    clean_resource_type = resource_type.replace(" ", "_")

    # Construct the revealer element
    revealer = (
        f"\n                      <div class='dataset_type_group {resource_type}' id='{genus}_{species}_{resource_type}'>"
        f"""\n                        <p onclick="toggleVisibility('{genus}_{species}_{clean_resource_type}_table')" """
        f"class='reveal resource {clean_resource_type}_title'>{resource_type}</p>"
    )

    # Construct the HTML table for the resource type
    html_table = construct_dataset_table(genus, species, resource_type, dataset_rows)

    # Concatenate the revealer and table
    full_html = revealer + html_table + "\n                      </div>"

    return full_html

#print(construct_species_dataset_revealer("Acropora","acuminata",test,"Genome Assembly"))
        

In [967]:
def construct_species_datasets(genus_index, genus, species):
    """Constructs an HTML element that reveals a group of tables of datasets for a given species."""
    # Get dataframe for the genus
    genus_df = dataset.loc[genus_index]

    # Get dataframe for the species
    species_df = genus_df.loc[genus_df.Species == species]

    # Get unique resource types for the species
    resource_types = species_df.Resource_type.unique()

    # Construct the revealer element
    revealer = (
        f"\n                  <!--==================={genus.upper()} {species.upper()}======================-->"
        f"""\n                  <p class='reveal species_name' onclick="toggleVisibility('{genus}_{species}_datasets')">{genus} {species}</p>"""
        f"\n                  <div class='hidden dataset_entry' id='{genus}_{species}_datasets'>"
    )

    # Initialize empty list of dataset tables
    dataset_tables = []
    for resource_type in resource_types:
        # Append HTML for a single dataset table to the list
        dataset_tables.append(construct_species_dataset_revealer(genus, species, species_df, resource_type))

    # Join dataset tables with newline characters and indent
    dataset_tables = "\n".join(dataset_tables)

    # Concatenate the revealer and dataset tables
    full_species_html = revealer + dataset_tables + "\n                  </div>"

    return full_species_html

#print(construct_species_datasets(dataset.index[0], "Acropora", "acuminata"))

In [968]:
def construct_genus(genus_index, genus_df, genus):
    """Constructs an HTML element that reveals a group of tables of datasets for all species in a given genus."""
    # Get list of unique species in the genus
    species_list = genus_df.Species.unique()

    # Construct the outer element for the genus
    constructor = (
        f"\n              <div class='genus_data' id='{genus}'>"
        f"\n                <h5 class='genus_name'>{genus}</h5>"
        f"\n                <!--==================={genus.upper()}======================-->"
        f"\n                <div class='genus_species'>"
    )

    # Initialize empty list of species datasets
    species_datasets = []
    for species in species_list:
        # Append HTML for a single species dataset to the list
        species_datasets.append(construct_species_datasets(genus_index, genus, species))

    # Join species datasets with newline characters and indent
    species_datasets = "\n".join(species_datasets)

    # Concatenate the outer element and species datasets
    constructor += species_datasets + "\n                </div>\n              </div>"

    return constructor

#print(construct_all_species(('Cnidaria', 'Anthozoa', 'Hexacorallia', 'Acropora'),dataset.loc[dataset.index[0]],dataset.index[0][-1]))


In [969]:
def construct_class_genera(given_class):
    """Constructs an HTML element that reveals a group of tables of datasets for all genera."""
    # Initialize empty list of genera tables
    genera_tables = []

    # Get list of unique genus indices
    genus_indices = [index for index in dataset.index.unique() if index[-2] == given_class]
    for index in genus_indices:
        # Append HTML for a single genus table to the list
        genera_tables.append(construct_genus(index, dataset.loc[index], index[-1]))

    # Join genera tables with newline characters and indent
    constructor = (
    f"\n              <!--==================={given_class.upper()}======================-->"
    f"\n              <div class='tax_class_data'>"
    f"\n              <h4 class = 'tax_class_name'>{given_class}</h4>"
    f"\n              <div class='tax_class_genera'>")
    constructor += "\n".join(genera_tables) + "\n              </div>\n              </div>"

    return constructor

#print(construct_class_genera("Hexacorallia"))

In [970]:
def construct_subphylum_classes(phylum_df,given_subphylum):
    """Constructs an HTML element that reveals a group of tables of datasets for all classes."""
    # Initialize empty list of genera tables
    class_tables = []

    # Get list of unique genus indices
    classes = phylum_df.loc[given_subphylum].index.get_level_values(0).unique()
    for given_class in classes:
        # Append HTML for a single genus table to the list
        class_tables.append(construct_class_genera(given_class))

    # Join genera tables with newline characters and indent
    constructor = (
    f"\n              <!--==================={given_subphylum.upper()}======================-->"
    f"\n              <div class='subphylum_data'>"
    f"\n              <h3 class = 'subphylum_name'>{given_subphylum}</h3>"
    f"\n              <div class='subphylum_genera'>")
    constructor += "\n".join(class_tables) + "\n              </div>\n              </div>"

    return constructor

#construct_subphylum_classes(dataset.loc["Cnidaria"],"Medusozoa")

In [971]:
def construct_phylum(dataset_df, given_phylum):
    """Constructs an HTML element that reveals a group of tables of datasets subphyla."""
    # Initialize empty list of genera tables
    subphylum_tables = []

    # Get list of unique genus indices
    subphyla = dataset_df.loc[given_phylum].index.get_level_values(0).unique()
    for given_subpylum in subphyla:
        # Append HTML for a single genus table to the list
        subphylum_tables.append(construct_subphylum_classes(dataset_df.loc[given_phylum],given_subpylum ))

    # Join genera tables with newline characters and indent
    constructor = (
    f"\n              <!--==================={given_phylum.upper()}======================-->"
    f"\n              <div class='subphylum_data'>"
    f"\n              <h3 class = 'subphylum_name'>{given_phylum}</h3>"
    f"\n              <div class='subphylum_genera'>")
    constructor += "\n".join(subphylum_tables) + "\n              </div>\n              </div>"

    return constructor

#construct_phylum(dataset, "Cnidaria")

In [972]:
def construct_buttons():
    resource_types = dataset.Resource_type.unique()
    clean_resource_types = [resource_type.replace(" ","_") for resource_type in resource_types]
    constructor = (
        f"<div class='power_button_frame'>"
    )
    buttons = []
    for resource_type,clean_resource_type in zip(resource_types,clean_resource_types):
        buttons.append(f"""
        <button class="power_button" onclick='resourceVisibility("{clean_resource_type}")'>Show/Hide all {resource_type}</button>""")
    constructor += "".join(buttons) + "\n </div>"

    return constructor

#construct_buttons()

In [973]:
def construct_input_form():
    resource_types = dataset.Resource_type.unique()
    options = "\n".join([f'      <option value="{resource}">{resource}</option>' for resource in resource_types])

    lines = (
        f'<!--SUBMIT DATASET FORM-->'
        f'\n<form class= "" id="submit_dataset" action="https://formspree.io/f/xzbqpgjv" method="POST">'
        f'''\n  <h2 class=" reveal form_title" onclick="toggleVisibility('input_form')">Suggest/Submit a Dataset</h2>'''
        f'\n  <div class = "hidden" id="input_form">'
        f'\n    <label for="email">Your email (in case I have questions):</label><br>'
        f'\n    <input type="text" id="email" name="email"><br>'
        f'\n    <label for="species-input">Enter the species name:</label><br>'
        f'\n    <input type="text" id="species-input" name="species-input"><br>'
        f'\n    <label for="dataset-type-select">Select the dataset type:</label><br>'
        f'\n    <select id="dataset-type-select" name="dataset-type-select">'
        f'{options}'
        f'      <option value="other">other, please specify</option>'
        f'\n    </select><br>'
        f'\n    <label for="dataset-link-input">Enter the link to the dataset:</label><br>'
        f'\n    <input type="text" id="dataset-link-input" name="dataset-link-input"><br>'
        f'\n    <label for="publication-textarea">Enter the publication:</label><br>'
        f'\n    <textarea id="publication-textarea" name="publication-textarea"></textarea><br>'
        f'\n    <label for="info-textarea">Enter any comment or added information:</label><br>'
        f'\n    <textarea id="info-textarea" name="info-textarea"></textarea><br>'
        f'\n    <p>I will review the submitted datasets and make sure that the information is correct. it might take a few days for a dataset to be added.</p>'
        f'\n    <input type="submit" value="Submit">'
        f'\n  </div>'
        f'\n</form>'
    )
    return lines

#print(construct_input_form())

In [974]:
from datetime import datetime

# Get the current date and time
now = datetime.now()

# Format the date and time as a string in the 'yyyy-mm-dd' format
date_string = now.strftime('%Y-%m-%d')

#build webpage
pow_but = construct_buttons()
header= f"""<!DOCTYPE html>
<html>
    <head>
        <link rel="stylesheet" type="text/css" href="styles.css">
        <script src="scripts.js"></script>
    </head>
        <body>
            <header>
                <h1 class='page_title'>Cnidarian Sequence Resource</h1>
                <p>I created this resource for everyone that wants to work with cnidarian datasets and gets frustrated about the time it takes to find all the different data sources. This is not a complete list. It is just the result of me searching for a few days. I am always happy to add new datasets to the list. If this page helped you to save some time let me know on the next conference or gift me a <a href='https://www.buymeacoffee.com/Xylo'>virtual coffee</a></p>
            {pow_but}
            </header>
            <main>"""
main = (
    f'<p id="update">last update: {date_string}</p>'
)
main += construct_input_form()
main += construct_phylum(dataset, "Cnidaria")
main += construct_phylum(dataset, "Ctenophora")
footer = """        </main>
        <footer>
        </footer>
    </body>
</html>"""

with open("index.html","w",encoding = "utf-8") as test:
    test.write(header)
    test.write(main)
    test.write(footer)


