# Display sequencing emo-bon efforts across European sites
- At first showing the validated and relessed data from [emo-bon-data-validataion](https://github.com/emo-bon/emo-bon-data-validation/tree/main/validated-data)
- Second, ask Cymon what metadata can be shown about data which are not ready/released yet.
- I use `leafmap` for GIS integration

TODO: refactor with importing funcs from pull_seq_efforts.py

In [None]:
# system dependent setup
import sys
import os
import io
import logging
from IPython import get_ipython

logger = logging.getLogger(name="Diversity analysis app")

if 'google.colab' in str(get_ipython()):
    # clone the momics-demos repository to use the utils module from there
    # TODO: eventually utils from momics will be used for that
    try:
        os.system('git clone https://github.com/palec87/momics-demos.git')
        logger.info(f"Repository cloned")
    except OSError as e:
        logger.info(f"An error occurred while cloning the repository: {e}")

    sys.path.insert(0,'/content/momics-demos')

else:
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  # local utils, to be removed in the future

    # downside of this is that all the deps need to be installed in the current (momics-demos) environment
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../../marine-omics')))  # local momics package, to be removed too

from utils import init_setup, get_notebook_environment
init_setup()

# Initialize the environment variable
notebook_environment = 'unknown'
# Determine the notebook environment
env = get_notebook_environment()
logger.info(f"Environment: {env}")

Platform: local Linux


## Imports

In [None]:
# import requests
import pandas as pd
# import leafmap.leafmap as leafmap
import leafmap

## Data from the validated-data repo ran through pydantic by Cymon

In [3]:
url_obs = "https://raw.githubusercontent.com/emo-bon/emo-bon-data-validation/refs/heads/main/validated-data/Observatory_combined_logsheets_validated.csv"
url_metadata = "https://raw.githubusercontent.com/emo-bon/emo-bon-data-validation/refs/heads/main/validated-data/Batch1and2_combined_logsheets_2024-11-12.csv"


df_obs = pd.read_csv(url_obs ,index_col=0)
df_metadata = pd.read_csv(url_metadata ,index_col=0)

df_obs.columns, df_metadata.columns

(Index(['project_name', 'latitude', 'longitude', 'geo_loc_name',
        'loc_broad_ocean', 'loc_broad_ocean_mrgid', 'loc_regional',
        'loc_regional_mrgid', 'loc_loc', 'loc_loc_mrgid', 'env_broad_biome',
        'env_local', 'env_package', 'tot_depth_water_col', 'organization',
        'organization_country', 'organization_edmoid', 'wa_id',
        'extra_site_info', 'contact_name', 'contact_email', 'contact_orcid',
        'ENA_accession_number_umbrella', 'ENA_accession_number_project'],
       dtype='object'),
 Index(['source_mat_id_orig', 'samp_description', 'tax_id', 'scientific_name',
        'investigation_type', 'env_material', 'collection_date',
        'sampling_event', 'sampl_person', 'sampl_person_orcid',
        ...
        'silicate_method', 'sulfate', 'sulfate_method', 'sulfide',
        'sulfide_method', 'turbidity', 'turbidity_method', 'water_current',
        'water_current_method', 'env_package'],
       dtype='object', length=113))

In [4]:
m = leafmap.Map(center=(50, 10), zoom=4)
m.add_points_from_xy(
    df_obs, x="longitude", y="latitude",
    popup=['organization', "contact_name", "contact_email", "ENA_accession_number_umbrella", 'tot_depth_water_col'],
    layer_name="EMO-BON Observatories")
# m

## Template for the TRACKING table from the `sequencing-crate` repo and Cymon's spreadsheet
- sequence [repo](https://github.com/emo-bon/sequencing-crate/tree/main/shipment)
- full will combine emo-bon-validation, sequencing-crate and metagoflow tracker

In [6]:
from pull_seq_efforts import *

In [None]:
df_sediment = get_seq_track_data("SEDIMENTS")
df_filters = get_seq_track_data("FILTERS")

In [None]:
df_filters.columns, ALL_SHIPMENTS  # ALL_SHIPMENTS also imported from pull_seq_efforts

(Index(['Batch Number', 'Folder @ Genoscope', 'ref_code',
        'Seq Run RO-Crate Filename', 'Forward Read Filename',
        'BackwardRead Filename', 'Run Status', 'version', 'Date Started',
        'Notes', 'who', 'System Run', 'Output Location', 'Output size',
        'comments'],
       dtype='object'),
 ['001', '002', '003-0', '003-1', '003-2'])

### Merging tables

In [None]:
df_shipments = query_all_shipment_data()
df_tracking = query_track_data()

In [None]:
df_full = min_merge(df_shipments, df_tracking)
df_full.head()