# Display sequencing emo-bon efforts across European sites
- At first showing the validated and relessed data from [emo-bon-data-validataion](https://github.com/emo-bon/emo-bon-data-validation/tree/main/validated-data)
- Second, ask Cymon what metadata can be shown about data which are not ready/released yet.
- I use `leafmap` for GIS integration

In [2]:
# system dependent setup
import sys
import os
import io
import logging
from IPython import get_ipython

logger = logging.getLogger(name="Diversity analysis app")

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

else:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # local utils, to be removed in the future

    # downside of this is that all the deps need to be installed in the current (momics-demos) environment
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../../marine-omics')))  # local momics package, to be removed too

from utils import init_setup, get_notebook_environment
init_setup()

# Initialize the environment variable
notebook_environment = 'unknown'
# Determine the notebook environment
env = get_notebook_environment()
logger.info(f"Environment: {env}")

Platform: local Linux


## Import

In [3]:
import sys
import os
import io

import requests
import pandas as pd
# import leafmap.leafmap as leafmap
import leafmap

In [4]:
# parquet files
if 'google.colab' in str(get_ipython()):
    root_folder = os.path.abspath(os.path.join('/content/momics-demos'))
else:
    root_folder = os.path.abspath(os.path.join('../'))


assets_folder = os.path.join(root_folder, 'assets')

## Data from the validated-data repo ran through pydantic by Cymon

In [28]:
url_obs = "https://raw.githubusercontent.com/emo-bon/emo-bon-data-validation/refs/heads/main/validated-data/Observatory_combined_logsheets_validated.csv"
url_metadata = "https://raw.githubusercontent.com/emo-bon/emo-bon-data-validation/refs/heads/main/validated-data/Batch1and2_combined_logsheets_2024-11-12.csv"

# tracking csv file
df_tracker = pd.read_csv("min_merged.csv" ,index_col=False)
df_obs = pd.read_csv(url_obs ,index_col=0)
df_metadata = pd.read_csv(url_metadata ,index_col=0)

# df_obs.columns, df_metadata.columns
df_tracker.columns, df_obs.columns

(Index(['ref_code', 'obs_id', 'batch', 'sample_type', 'reads_name',
        'seq_run_ro_crate_fname', 'forward_read_fname', 'backward_read_fname',
        'run_status', 'version', 'date_started', 'who', 'who.1', 'system_run',
        'output_loc', 'output_size'],
       dtype='object'),
 Index(['project_name', 'latitude', 'longitude', 'geo_loc_name',
        'loc_broad_ocean', 'loc_broad_ocean_mrgid', 'loc_regional',
        'loc_regional_mrgid', 'loc_loc', 'loc_loc_mrgid', 'env_broad_biome',
        'env_local', 'env_package', 'tot_depth_water_col', 'organization',
        'organization_country', 'organization_edmoid', 'wa_id',
        'extra_site_info', 'contact_name', 'contact_email', 'contact_orcid',
        'ENA_accession_number_umbrella', 'ENA_accession_number_project'],
       dtype='object'))

In [29]:
"RFormosa" in df_obs.index

True

In [30]:
df_tracker.head()

Unnamed: 0,ref_code,obs_id,batch,sample_type,reads_name,seq_run_ro_crate_fname,forward_read_fname,backward_read_fname,run_status,version,date_started,who,who.1,system_run,output_loc,output_size
0,EMOBON00084,BPNS,1,sediment,DBH_AAANOSDA_1_HMNJKDSX3.UDI248,,DBH_AAANOSDA_1_1_HMNJKDSX3.UDI248_clean.fastq.gz,,COMPLETED,1.0,25/09/2023,CCMAR,,REDI,,
1,EMOBON00085,BPNS,1,sediment,DBH_AAAOOSDA_1_HMNJKDSX3.UDI260,,DBH_AAAOOSDA_1_1_HMNJKDSX3.UDI260_clean.fastq.gz,,COMPLETED,1.0,02/10/2023,CCMAR,,REDI,,7.7
2,EMOBON00087,BPNS,1,sediment,DBH_AAAIOSDA_1_HMNJKDSX3.UDI224,DBH_AAAIOSDA_1_HMNJKDSX3.UDI224.zip,DBH_AAAIOSDA_1_1_HMNJKDSX3.UDI224_clean.fastq.gz,DBH_AAAIOSDA_1_2_HMNJKDSX3.UDI224_clean.fastq.gz,COMPLETED,1.0,01/06/2023,HCMR,,HCMR-HPC,HCMR-HPC,12.0
3,EMOBON00094,NRMCB,1,sediment,DBH_AAACOSDA_1_HWLTKDRXY.UDI211,,DBH_AAACOSDA_1_1_HWLTKDRXY.UDI211_clean.fastq.gz,,COMPLETED,1.0,18/07/2023,CCMAR,,REDI,,19.0
4,EMOBON00095,NRMCB,1,sediment,DBH_AAAFOSDA_1_HMNJKDSX3.UDI283,,DBH_AAAFOSDA_1_1_HMNJKDSX3.UDI283_clean.fastq.gz,,COMPLETED,1.0,11/08/2023,CCMAR,,REDI,,9.3


In [31]:
# I need to merge the tracker with the metadata


# statistics part
def get_stats(df: pd.DataFrame) -> pd.DataFrame:
    # group by organization
    df_grouped = df.groupby("obs_id").count()
    df_grouped = df_grouped.reset_index()
    return df_grouped

In [32]:
df_tracker_stats = get_stats(df_tracker)
df_tracker_stats.head()

Unnamed: 0,obs_id,ref_code,batch,sample_type,reads_name,seq_run_ro_crate_fname,forward_read_fname,backward_read_fname,run_status,version,date_started,who,who.1,system_run,output_loc,output_size
0,AAOT,18,18,18,15,3,15,6,15,15,15,15,0,15,3,8
1,BPNS,26,26,26,23,1,23,2,23,23,23,23,0,23,3,14
2,EMT21,14,14,14,12,1,12,2,12,12,12,12,0,12,0,7
3,ESC68N,14,14,14,6,2,6,2,6,6,6,6,0,6,2,0
4,HCMR-1,10,10,10,5,1,3,1,3,3,3,3,0,3,1,0


## GIS methods

In [8]:
def create_map(df: pd.DataFrame) -> leafmap.Map:
    # Create a map centered at the given coordinates
    m = leafmap.Map(center=(50, 10), zoom=4)
    m.add_points_from_xy(
        df, x="longitude", y="latitude",
        popup=['organization', "contact_name", "contact_email", "ENA_accession_number_umbrella", 'tot_depth_water_col'],
        layer_name="EMO-BON Observatories")
    return m

## APP

In [None]:
import panel as pn

from momics.utils import memory_load, reconfig_logger
from momics.panel_utils import serve_app, close_server

In [None]:
pn.extension("tabulator")
if 'google.colab' in str(get_ipython()):
    pn.extension(comms='colab')
ACCENT = "teal"

styles = {
    "box-shadow": "rgba(50, 50, 93, 0.25) 0px 6px 12px -2px, rgba(0, 0, 0, 0.3) 0px 3px 7px -3px",
    "border-radius": "4px",
    "padding": "10px",
}

image = pn.pane.JPG(os.path.join(assets_folder, "figs/metaGOflow_logo_italics.jpg"),
                    width=200,
                    height=100,
                    )

obs_map = create_map(df_obs)




def app():
    template = pn.template.FastListTemplate(
        title="Sequencing Progress Tracking",
        sidebar=[image,
                # "# Alpha diversity", select_table, select_cat_factor,
                # pn.layout.Divider(),
                # "# Beta diversity", select_table_beta, select_taxon, select_beta_factor,
                ],
        main=[
            obs_map,
        ],
        main_layout=None,
        accent=ACCENT,
    )
    return template


template = app()

s = serve_app(template, env=env, name="landing_page")

In [None]:
# close_server(s, env=env)