# Alpha diversity on functional information

The **Shannon, or Shannon-Wiener, Diversity Index** measures the diversity of species in a given community.
More details [here](https://www.statology.org/shannon-diversity-index/):

    
What is α (alpha) diversity? Check basic explanation [here](https://eco-intelligent.com/2016/10/14/alpha-beta-gamma-diversity/)

### Installing and importing required modules

In [1]:
# Install
!pip install duckdb
!pip install pandasql
!pip install pingouin
!pip install fastparquet
!pip install nbQA # A linter for Python Jupyter notebooks.

import contextlib
import io
import math
# Import
import os
import warnings

import duckdb
import fastparquet
import ipywidgets as widgets
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pingouin as pg
import pyarrow
import scipy
import seaborn as sns
from duckdb import BinderException, CatalogException
from IPython.display import display
from pandasql import sqldf
from scipy.special import entr
from scipy.stats import levene, shapiro, ttest_ind



## Local imports

In [27]:
import sys
import os
import duckdb
sys.path.append(os.path.abspath(os.path.join('..')))

import numpy as np
import pandas as pd
import ipywidgets as widgets
import matplotlib.pyplot as plt
import seaborn as sns
import pingouin as pg

from duckdb import BinderException, CatalogException
# from IPython.display import display
# from pandasql import sqldf
# from scipy.special import entr
from scipy.stats import levene, shapiro, ttest_ind

import momics as mo
from momics.loader import load_parquet_files

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
root_folder = os.path.abspath(os.path.join('../parquet_files'))

### Load parquet files

In [7]:
# mgf_parquet_dfs = load_parquet_files("../parquet_files")
mgf_parquet_dfs = load_parquet_files(root_folder)

In [5]:
# display dicts if necessary
# [display(val) for _, val in mgf_parquet_dfs.items()]

### Get the data tables
- find out how to manage decision between creation of the DB vs CPU storage

In [6]:
# duckdb.execute("SHOW TABLES").df()
# duckdb.execute("SHOW ALL TABLES").df()

In [3]:
def createDuckDB(df_tables, sample_metadata, observatory_metadata):
    if isinstance(df_tables, list):
        # TODO: convert to dict
        raise NotImplementedError
    
    # dictionary
    duckdb.sql("CREATE TABLE SAMPLE_METADATA AS SELECT * FROM sample_metadata")
    duckdb.sql("SELECT COUNT(*) FROM SAMPLE_METADATA")
    duckdb.sql("CREATE TABLE OBS_METADATA AS SELECT * FROM observatory_metadata")
    duckdb.sql("SELECT COUNT(*) FROM OBS_METADATA")
    for table_name in df_tables:
        df = df_tables[table_name]
        cmd = f"CREATE TABLE {table_name} AS SELECT * FROM df"
        duckdb.sql(cmd)

    print(duckdb.sql("SHOW TABLES"))


In [12]:
sample_metadata = pd.read_csv(
    os.path.join(root_folder, "Batch1and2_combined_logsheets_2024-09-11.csv")
)

# Observatory metadata - from the GoogleSheets
observatory_metadata = pd.read_csv(
    os.path.join(root_folder, "Observatory_combined_logsheets_validated.csv")
)

# delete tables if they exist, should the user be asked?
try:
    for table_name in duckdb.execute("SHOW TABLES").df()['name']:
        duckdb.sql(f"DROP TABLE {table_name}")
except CatalogException:
    pass

# data to duckDB
createDuckDB(mgf_parquet_dfs, sample_metadata, observatory_metadata)

┌─────────────────┐
│      name       │
│     varchar     │
├─────────────────┤
│ LSU             │
│ OBS_METADATA    │
│ SAMPLE_METADATA │
│ SSU             │
│ go              │
│ go_slim         │
│ ips             │
│ ko              │
│ pfam            │
└─────────────────┘



In [37]:
mgf_parquet_dfs['go']

Unnamed: 0,ref_code,reads_name,id,name,aspect,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,GO:0055085,transmembrane transport,biological_process,401836
1,EMOBON00141,HCFCYDSX5.UDI124,GO:0006355,"regulation of transcription, DNA-templated",biological_process,308520
2,EMOBON00141,HCFCYDSX5.UDI124,GO:0000160,phosphorelay signal transduction system,biological_process,206933
3,EMOBON00141,HCFCYDSX5.UDI124,GO:0006508,proteolysis,biological_process,161569
4,EMOBON00141,HCFCYDSX5.UDI124,GO:0009058,biosynthetic process,biological_process,142412
...,...,...,...,...,...,...
107454,EMOBON00001,HWLTKDRXY.UDI235,GO:0051118,"glucan endo-1,3-alpha-glucosidase activity",molecular_function,1
107455,EMOBON00001,HWLTKDRXY.UDI235,GO:0061579,N-acyl homoserine lactone synthase activity,molecular_function,1
107456,EMOBON00001,HWLTKDRXY.UDI235,GO:0090482,vitamin transmembrane transporter activity,molecular_function,1
107457,EMOBON00001,HWLTKDRXY.UDI235,GO:0101006,protein histidine phosphatase activity,molecular_function,1


In [38]:
mgf_parquet_dfs['go_slim']

Unnamed: 0,ref_code,reads_name,id,name,aspect,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,GO:0008150,biological process,biological_process,49591
1,EMOBON00141,HCFCYDSX5.UDI124,GO:0071973,bacterial-type flagellar cell motility,biological_process,13382
2,EMOBON00141,HCFCYDSX5.UDI124,GO:0071840,cellular component organization or biogenesis,biological_process,78511
3,EMOBON00141,HCFCYDSX5.UDI124,GO:0071103,DNA conformation change,biological_process,53983
4,EMOBON00141,HCFCYDSX5.UDI124,GO:0045454,cell redox homeostasis,biological_process,279
...,...,...,...,...,...,...
6259,EMOBON00001,HWLTKDRXY.UDI235,GO:0016301,kinase activity,molecular_function,155241
6260,EMOBON00001,HWLTKDRXY.UDI235,GO:0016779,nucleotidyltransferase activity,molecular_function,71942
6261,EMOBON00001,HWLTKDRXY.UDI235,GO:0004871,signal transducer activity,molecular_function,32884
6262,EMOBON00001,HWLTKDRXY.UDI235,GO:0004872,receptor activity,molecular_function,31382


In [30]:
duckdb.execute("SHOW TABLES").df()

Unnamed: 0,name
0,LSU
1,OBS_METADATA
2,SAMPLE_METADATA
3,SSU
4,go
5,go_pivoted
6,go_slim
7,go_slim_pivoted
8,ips
9,ips_pivoted


In [39]:
display(duckdb.execute(f"SELECT * FROM go LIMIT 5").df())

Unnamed: 0,ref_code,reads_name,id,name,aspect,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,GO:0055085,transmembrane transport,biological_process,401836
1,EMOBON00141,HCFCYDSX5.UDI124,GO:0006355,"regulation of transcription, DNA-templated",biological_process,308520
2,EMOBON00141,HCFCYDSX5.UDI124,GO:0000160,phosphorelay signal transduction system,biological_process,206933
3,EMOBON00141,HCFCYDSX5.UDI124,GO:0006508,proteolysis,biological_process,161569
4,EMOBON00141,HCFCYDSX5.UDI124,GO:0009058,biosynthetic process,biological_process,142412


In [40]:
display(duckdb.execute(f"SELECT * FROM go_pivoted LIMIT 5").df())

Unnamed: 0,id,EMOBON00225,EMOBON00095,EMOBON00121,EMOBON00136,EMOBON00144,EMOBON00147,EMOBON00155,EMOBON00138,EMOBON00148,...,EMOBON00094,EMOBON00193,EMOBON00195,EMOBON00227,EMOBON00239,EMOBON00099,EMOBON00145,EMOBON00149,EMOBON00123,EMOBON00126
0,GO:0006418,58129,44517,145437,65177,48385,128567,55350,135527,19045,...,82579,37082,101626,77938,71349,37757,49112,32197,8470,10756
1,GO:0006096,28632,19364,53953,25541,20394,46121,22194,45359,8307,...,36335,18310,48755,38573,35035,17000,20506,14874,4503,4119
2,GO:0006313,45423,31887,1419,19298,18339,5339,7975,12821,3508,...,58261,29565,104831,63320,59555,23466,18699,6769,1424,1288
3,GO:0006812,21162,13109,16454,13869,10141,14127,8846,20673,3518,...,24763,12980,39170,28879,25197,9408,10525,6217,1422,1738
4,GO:1902600,24047,10082,39210,21414,15739,33304,18030,39829,8805,...,19459,15554,47415,32834,31377,7349,16058,15752,3292,4528


In [41]:
display(duckdb.execute(f"SELECT COUNT(*) FROM go_pivoted").df())

Unnamed: 0,count_star()
0,2637


In [32]:
# show the first few rows of the tables
for table_name in duckdb.execute("SHOW TABLES").df()['name']:
    print(table_name)
    display(duckdb.execute(f"SELECT * FROM {table_name} LIMIT 5").df())

LSU


Unnamed: 0,ref_code,reads_name,ncbi_tax_id,abundance,superkingdom,kingdom,phylum,class,order,family,genus,species
0,EMOBON00141,HCFCYDSX5.UDI124,2157,11.0,Archaea,,,,,,,
1,EMOBON00141,HCFCYDSX5.UDI124,928852,1.0,Archaea,,Candidatus_Bathyarchaeota,,,,,
2,EMOBON00141,HCFCYDSX5.UDI124,1781242,1.0,Archaea,,Candidatus_Bathyarchaeota,,,,,Candidatus_Bathyarchaeota_archaeon_TCS64
3,EMOBON00141,HCFCYDSX5.UDI124,743725,2.0,Archaea,,Candidatus_Diapherotrites,,,,,
4,EMOBON00141,HCFCYDSX5.UDI124,28889,2.0,Archaea,,Crenarchaeota,,,,,


OBS_METADATA


Unnamed: 0,obs_id,project_name,latitude,longitude,geo_loc_name,loc_broad_ocean,loc_broad_ocean_mrgid,loc_regional,loc_regional_mrgid,loc_loc,...,organization,organization_country,organization_edmoid,wa_id,extra_site_info,contact_name,contact_email,contact_orcid,ENA_accession_number_umbrella,ENA_accession_number_project
0,PiEGetxo,EMOBON,43.338583,-3.014639,Spain,North Atlantic Ocean,1912,Bay of Biscay,2359,Abra de Bilbao,...,Research Centre for Experimental Marine Biolog...,Spain,2167,PiEGetxo Wa,"The selected sampling site is a Marina, close ...",Oihane Diaz De Cerio Arruabarrena,oihane.diazdecerio@ehu.eus,0000-0002-5605-8434,PRJEB51688,PRJEB51660
1,OSD74,EMOBON,41.146528,-8.666639,Portugal,Atlantic Ocean,1902,North Atlantic Ocean,1912,Porto Valley,...,Interdisciplinary Centre of Marine and Environ...,Portugal,1626,OSD74 Wa,"This site participated in OSD2014, OSD2015 and...",Catarina Magalhães,cmagalhaes@ciimar.up.pt,0000-0001-9576-2398,PRJEB51688,PRJEB51659
2,RFormosa,EMOBON,37.005639,-7.96925,Portugal,Atlantic Ocean,1902,North Atlantic Ocean,1912,Ria Formosa,...,Centre of Marine Sciences (CCMAR),Portugal,2516,RFormosa Wa,"Yes, it’s OSD sampling site ID OSD81, annually...",Bruno Louro,blouro@ualg.pt,0000-0001-8164-581X,PRJEB51688,PRJEB51661
3,EMT21,EMOBON,42.201944,-8.7985,Spain,Atlantic Ocean,1902,North Atlantic Ocean,1912,Vigo Seamount,...,Estación de Ciencias Mariñas de Toralla - Cent...,Spain,2163,,This site is located at the marina of the ECIM...,"Jose Gonzalez, Jesús Troncoso","josegonzález@uvigo.es, troncoso@uvigo.es",0000-0002-0202-8256,PRJEB51688,PRJEB51653
4,ROSKOGO,EMOBON,48.771667,-3.968333,France,North Atlantic Ocean,1912,English Channel,2389,French part of the English Channel,...,Station Biologique de Roscoff,France,521,ROSKOGO Wa,The SOMLIT-Astan time-series station is consid...,Fabienne Rigaut-Jalabert,fabienne.jalabert@sb-roscoff.fr,,PRJEB51688,PRJEB51662


SAMPLE_METADATA


Unnamed: 0,source_mat_id,source_mat_id_orig,samp_description,tax_id,scientific_name,investigation_type,env_material,collection_date,sampling_event,sampl_person,...,silicate_method,sulfate,sulfate_method,sulfide,sulfide_method,turbidity,turbidity_method,water_current,water_current_method,env_package
0,EMOBON_ROSKOGO_So_210826_micro_1,EMOBON ROSKOGO So210826 micro 1,EMOBON metagenome sediment sample from station...,412755,marine sediment metagenome,metagenome,sediment [ENVO:00002007],2021-08-26,ROSKOGO_So_210826,Wilfried Thomas; Daguin; Thiebaut; Comtet,...,,,,,,,,,,soft_sediment
1,EMOBON_ROSKOGO_So_210826_micro_2,EMOBON ROSKOGO So210826 micro 2,EMOBON metagenome sediment sample from station...,412755,marine sediment metagenome,metagenome,sediment [ENVO:00002007],2021-08-26,ROSKOGO_So_210826,Wilfried Thomas; Daguin; Thiebaut; Comtet,...,,,,,,,,,,soft_sediment
2,EMOBON_ROSKOGO_So_211119_micro_1,EMOBON ROSKOGO So211119 micro 1,EMOBON metagenome sediment sample from station...,412755,marine sediment metagenome,metagenome,sediment [ENVO:00002007],2021-11-19,ROSKOGO_So_211119,Thomas; Camusat;Daguin; Thiebaut; Comtet,...,,,,,,,,,,soft_sediment
3,EMOBON_ROSKOGO_So_211119_micro_2,EMOBON ROSKOGO So211119 micro 2,EMOBON metagenome sediment sample from station...,412755,marine sediment metagenome,metagenome,sediment [ENVO:00002007],2021-11-19,ROSKOGO_So_211119,Thomas; Camusat;Daguin; Thiebaut; Comtet,...,,,,,,,,,,soft_sediment
4,EMOBON_ROSKOGO_So_211222_micro_1,EMOBON ROSKOGO So211222 micro 1,EMOBON metagenome sediment sample from station...,412755,marine sediment metagenome,metagenome,sediment [ENVO:00002007],2022-12-22,ROSKOGO_So_211222,Comtet & Broudin,...,,,,,,,,,,soft_sediment


SSU


Unnamed: 0,ref_code,reads_name,ncbi_tax_id,abundance,superkingdom,kingdom,phylum,class,order,family,genus,species
0,EMOBON00141,HCFCYDSX5.UDI124,2157,5.0,Archaea,,,,,,,
1,EMOBON00141,HCFCYDSX5.UDI124,1801616,1.0,Archaea,,Candidatus_Woesearchaeota,,,,,
2,EMOBON00141,HCFCYDSX5.UDI124,183967,1.0,Archaea,,Euryarchaeota,Thermoplasmata,,,,
3,EMOBON00141,HCFCYDSX5.UDI124,651137,5.0,Archaea,,Thaumarchaeota,,,,,
4,EMOBON00141,HCFCYDSX5.UDI124,46769,4.0,Archaea,,Thaumarchaeota,,Cenarchaeales,Cenarchaeaceae,Cenarchaeum,


go


Unnamed: 0,ref_code,reads_name,id,name,aspect,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,GO:0055085,transmembrane transport,biological_process,401836
1,EMOBON00141,HCFCYDSX5.UDI124,GO:0006355,"regulation of transcription, DNA-templated",biological_process,308520
2,EMOBON00141,HCFCYDSX5.UDI124,GO:0000160,phosphorelay signal transduction system,biological_process,206933
3,EMOBON00141,HCFCYDSX5.UDI124,GO:0006508,proteolysis,biological_process,161569
4,EMOBON00141,HCFCYDSX5.UDI124,GO:0009058,biosynthetic process,biological_process,142412


go_pivoted


Unnamed: 0,id,EMOBON00225,EMOBON00095,EMOBON00121,EMOBON00136,EMOBON00144,EMOBON00147,EMOBON00155,EMOBON00138,EMOBON00148,...,EMOBON00094,EMOBON00193,EMOBON00195,EMOBON00227,EMOBON00239,EMOBON00099,EMOBON00145,EMOBON00149,EMOBON00123,EMOBON00126
0,GO:0006418,58129,44517,145437,65177,48385,128567,55350,135527,19045,...,82579,37082,101626,77938,71349,37757,49112,32197,8470,10756
1,GO:0006096,28632,19364,53953,25541,20394,46121,22194,45359,8307,...,36335,18310,48755,38573,35035,17000,20506,14874,4503,4119
2,GO:0006313,45423,31887,1419,19298,18339,5339,7975,12821,3508,...,58261,29565,104831,63320,59555,23466,18699,6769,1424,1288
3,GO:0006812,21162,13109,16454,13869,10141,14127,8846,20673,3518,...,24763,12980,39170,28879,25197,9408,10525,6217,1422,1738
4,GO:1902600,24047,10082,39210,21414,15739,33304,18030,39829,8805,...,19459,15554,47415,32834,31377,7349,16058,15752,3292,4528


go_slim


Unnamed: 0,ref_code,reads_name,id,name,aspect,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,GO:0008150,biological process,biological_process,49591
1,EMOBON00141,HCFCYDSX5.UDI124,GO:0071973,bacterial-type flagellar cell motility,biological_process,13382
2,EMOBON00141,HCFCYDSX5.UDI124,GO:0071840,cellular component organization or biogenesis,biological_process,78511
3,EMOBON00141,HCFCYDSX5.UDI124,GO:0071103,DNA conformation change,biological_process,53983
4,EMOBON00141,HCFCYDSX5.UDI124,GO:0045454,cell redox homeostasis,biological_process,279


go_slim_pivoted


Unnamed: 0,id,EMOBON00237,EMOBON00092,EMOBON00120,EMOBON00127,EMOBON00139,EMOBON00125,EMOBON00158,EMOBON00001,EMOBON00224,...,EMOBON00099,EMOBON00145,EMOBON00149,EMOBON00123,EMOBON00126,EMOBON00236,EMOBON00238,EMOBON00098,EMOBON00085,EMOBON00124
0,GO:0065003,2256,2377,9894,1251,5425,9315,3152,1802,2484,...,2428,3047,2945,980,1041,3152,2834,1259,1425,7485
1,GO:0006259,325102,208028,309901,33904,308345,305187,95875,201805,179454,...,163156,174964,98250,36484,39672,455845,425096,180846,150051,259448
2,GO:0006351,112496,64255,156009,14249,134132,167160,48737,69515,48329,...,44637,65324,57158,12447,17309,156333,146823,61152,56262,142282
3,GO:0006412,223002,155967,515014,29883,428634,558414,117245,150890,111957,...,116274,158162,142505,28258,36428,309580,290848,130056,134521,477232
4,GO:0042221,17406,11608,6095,856,10845,6756,3596,12457,10068,...,8776,7936,4222,999,937,23899,22859,9636,9968,5536


ips


Unnamed: 0,ref_code,reads_name,accession,description,abundance
0,EMOBON00141,HCFCYDSX5.UDI124,IPR003439,"ABC transporter-like, ATP-binding domain",164263
1,EMOBON00141,HCFCYDSX5.UDI124,IPR001789,"Signal transduction response regulator, receiv...",154785
2,EMOBON00141,HCFCYDSX5.UDI124,IPR000160,GGDEF domain,113387
3,EMOBON00141,HCFCYDSX5.UDI124,IPR015590,Aldehyde dehydrogenase domain,106142
4,EMOBON00141,HCFCYDSX5.UDI124,IPR001633,EAL domain,98996


ips_pivoted


Unnamed: 0,accession,EMOBON00001,EMOBON00092,EMOBON00120,EMOBON00127,EMOBON00139,EMOBON00125,EMOBON00158,EMOBON00237,EMOBON00242,...,EMOBON00225,EMOBON00145,EMOBON00149,EMOBON00123,EMOBON00126,EMOBON00193,EMOBON00195,EMOBON00227,EMOBON00239,EMOBON00099
0,IPR041384,0,0,0,1,0,0,0,0,0,...,0,3,0,0,0,1,0,0,0,0
1,IPR041343,0,0,0,13,0,0,5,0,0,...,0,1,0,6,5,1,0,0,0,0
2,IPR040719,30,43,1,1,8,3,7,50,59,...,54,5,4,0,2,43,55,51,72,66
3,IPR035173,59,154,0,1,14,0,7,88,148,...,195,27,5,0,1,119,267,224,91,0
4,IPR032288,117,72,32,36,51,87,53,177,298,...,156,71,63,6,48,105,345,135,196,231


ko


Unnamed: 0,ref_code,reads_name,abundance,entry,name
0,EMOBON00141,HCFCYDSX5.UDI124,27518,K00626,acetyl-CoA C-acetyltransferase [EC:2.3.1.9]
1,EMOBON00141,HCFCYDSX5.UDI124,19786,K07497,putative transposase
2,EMOBON00141,HCFCYDSX5.UDI124,19396,K03406,methyl-accepting chemotaxis protein
3,EMOBON00141,HCFCYDSX5.UDI124,15663,K03310,"alanine or glycine:cation symporter, AGCS family"
4,EMOBON00141,HCFCYDSX5.UDI124,13489,K07486,transposase


ko_pivoted


Unnamed: 0,entry,EMOBON00225,EMOBON00095,EMOBON00121,EMOBON00136,EMOBON00144,EMOBON00147,EMOBON00155,EMOBON00138,EMOBON00148,...,EMOBON00099,EMOBON00145,EMOBON00149,EMOBON00123,EMOBON00126,EMOBON00236,EMOBON00238,EMOBON00098,EMOBON00085,EMOBON00124
0,K03408,14,1,0,1,4,0,1,2,0,...,0,1,0,0,0,6,17,2,1,0
1,K01730,11,11,2,4,1,3,6,0,0,...,0,1,4,0,0,7,6,7,0,0
2,K19712,0,4,0,0,0,0,1,0,1,...,0,0,0,0,1,8,1,1,2,12
3,K07816,1,10,0,1,5,3,10,2,0,...,94,2,2,0,0,7,3,5,1,0
4,K03259,33,1,65,63,80,17,130,96,106,...,0,61,171,52,79,14,1,8,0,74


pfam


Unnamed: 0,ref_code,reads_name,abundance,entry,name
0,EMOBON00141,HCFCYDSX5.UDI124,164263,PF00005,ABC transporter
1,EMOBON00141,HCFCYDSX5.UDI124,106142,PF00171,Aldehyde dehydrogenase family
2,EMOBON00141,HCFCYDSX5.UDI124,100414,PF00990,"Diguanylate cyclase, GGDEF domain"
3,EMOBON00141,HCFCYDSX5.UDI124,88250,PF00072,Response regulator receiver domain
4,EMOBON00141,HCFCYDSX5.UDI124,74510,PF00873,AcrB/AcrD/AcrF family


pfam_pivoted


Unnamed: 0,entry,EMOBON00001,EMOBON00092,EMOBON00237,EMOBON00125,EMOBON00158,EMOBON00120,EMOBON00127,EMOBON00139,EMOBON00242,...,EMOBON00141,EMOBON00194,EMOBON00226,EMOBON00130,EMOBON00156,EMOBON00238,EMOBON00098,EMOBON00236,EMOBON00124,EMOBON00085
0,PF00106,48830,37298,82868,82963,23497,64006,6995,80987,97950,...,71386,93765,53560,37551,76755,105933,43963,113249,70165,37287
1,PF00012,15223,17030,24018,48428,15757,47212,5653,37868,28048,...,21567,24208,21500,16279,34678,30903,13410,32257,40661,12940
2,PF02779,8642,8536,13865,26645,4993,24823,1444,21127,17130,...,18325,14194,11462,7975,21631,18102,8055,19582,23008,8306
3,PF00440,8515,7055,13148,2250,2711,2283,684,7462,14617,...,17848,15571,10457,4892,5335,17150,7128,18511,1963,8105
4,PF01571,12418,8203,17504,53908,4440,43562,1109,34129,22976,...,16977,20494,10856,11466,38802,22278,9878,23454,46684,7956


## Do everything in pandas

### Load Metadata/Factors

In [None]:
def dbQuery2df(q):
    return duckdb.sql(q).df()


# rewrite the function to use pandas tables instead of duckdb
def generate_pivot_table(table_name):
    key_column = get_key_column(table_name)
    # # Step 1: Generate the SQL query
    # sql_query = generate_sql_query(table_name)

    # Step 2: Get distinct ref_code (EMOBON IDs)
    ref_codes_db = duckdb.sql(f"SELECT DISTINCT ref_code FROM {table_name};").fetchdf()

    # select distinct ref_codes from the dataframe
    ref_codes = mgf_parquet_dfs[table_name]['ref_code'].unique()

    print('length of the ref_codes_db:', len(ref_codes_db))
    print('length of the ref_codes:', len(ref_codes))

    # Step 3
    # iterate over the ref_codes and create a a pivot table
    # for each ref_code
    for ref_code in ref_codes:
        print(ref_code)
        # filter the data
        df = mgf_parquet_dfs[table_name][mgf_parquet_dfs[table_name]['ref_code'] == ref_code]
        print(df)
        break

    # Step 4: Execute the query and fetch the result as a DataFrame
    pivoted_table = duckdb.sql(sql_query).fetchdf()

    # Step 5: Transpose the pivoted table
    pivoted_table_transposed = pivoted_table.transpose()

    # Step 6: Rename the columns
    pivoted_table_transposed.columns = pivoted_table_transposed.iloc[0]
    pivoted_table_transposed = pivoted_table_transposed.drop
    (pivoted_table_transposed.index[0])

    # Step 7: Sort the columns
    pivoted_table_transposed_sorted = pivoted_table_transposed.sort_index(axis=1)

    return pivoted_table_transposed_sorted


def generate_sql_query(table_name):
    key_column = get_key_column(table_name)

    # Step 2: Get distinct ref_code (EMOBON IDs)
    ref_codes = duckdb.sql(f"SELECT DISTINCT ref_code FROM {table_name};").fetchdf()

    # Step 3: Generate the dynamic pivot query with COALESCE to replace NULL with 0
    sql_query = f"SELECT {key_column}"
    for ref_code in ref_codes["ref_code"]:
        sql_query += f", COALESCE(MAX(CASE WHEN ref_code = '{ref_code}' THEN abundance END), 0) AS {ref_code}"
    sql_query += f" FROM {table_name} GROUP BY {key_column};"

    return sql_query

def merge_data(pivoted_table_transposed_sorted, full_metadata):
    return pd.merge(
        full_metadata, pivoted_table_transposed_sorted, on="ref_code", how="inner"
    )

# Function to get the appropriate column based on the selected table
# Example tables: ['go', 'go_slim', 'ips', 'ko', 'pfam']
def get_key_column(table_name):
    if table_name in ["go", "go_slim"]:
        return "id"
    elif table_name == "ips":
        return "accession"
    elif table_name in ["ko", "pfam"]:
        return "entry"
    else:
        raise ValueError(f"Unknown table: {table_name}")

        
def calculate_alpha_diversity(df, factors):
    # Select columns that start with the appropriate prefix
    numeric_columns = [
        col
        for col in df.columns
        if col.startswith("GO:")
        or col.startswith("IPR")
        or col.startswith("K")
        or col.startswith("PF")
    ]

    # Calculate Shannon index only from the selected columns
    shannon_values = calculate_shannon_index(df[numeric_columns])

    # Create DataFrame with Shannon index and ref_code
    alpha_diversity_df = pd.DataFrame(
        {"ref_code": df["ref_code"], "Shannon": shannon_values}
    )

    # Merge with factors
    alpha_diversity_df = alpha_diversity_df.merge(factors, on="ref_code")

    return alpha_diversity_df

def shannon_index(row):
    row = pd.to_numeric(row, errors="coerce")
    total_abundance = row.sum()
    if total_abundance == 0:
        return np.nan
    relative_abundance = row / total_abundance
    ln_relative_abundance = np.log(relative_abundance)
    ln_relative_abundance[relative_abundance == 0] = 0
    multi = relative_abundance * ln_relative_abundance * -1
    return multi.sum()  # Shannon entropy
    
def calculate_shannon_index(df):
    return df.apply(shannon_index, axis=1)

def plot_shannon_index(alpha_diversity_df, selected_factor):
    alpha_diversity_df = alpha_diversity_df.sort_values(by=selected_factor)
    alpha_diversity_df["ref_code"] = pd.Categorical(
        alpha_diversity_df["ref_code"],
        categories=alpha_diversity_df["ref_code"],
        ordered=True,
    )
    plt.figure(figsize=(12, 6))
    # sns.barplot(x='ref_code', y='Shannon', hue=selected_factor, data=alpha_diversity_df, dodge=False, palette="coolwarm", errorbar=None)
    sns.barplot(
        x="ref_code",
        y="Shannon",
        hue=selected_factor,
        data=alpha_diversity_df,
        dodge=False,
        palette="coolwarm",
    )
    plt.xlabel("Sample")
    plt.ylabel("Shannon Index")
    plt.title(f"Shannon Index Grouped by {selected_factor}")
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()


def plot_average_shannon_per_condition(alpha_diversity_df, selected_factor):
    grouped_data = alpha_diversity_df.groupby(selected_factor)["Shannon"]
    means = grouped_data.mean()
    errors = grouped_data.sem()
    means.plot(
        kind="bar",
        yerr=errors,
        capsize=5,
        figsize=(10, 6),
        color=sns.color_palette("coolwarm", len(means)),
    )
    plt.xlabel(selected_factor)
    plt.ylabel("Average Shannon Index")
    plt.title(f"Average Shannon Index by {selected_factor}")
    plt.xticks(rotation=45, ha="right")
    plt.grid(True)
    plt.tight_layout()
    plt.show()


query_metadata = """
    SELECT OBS_METADATA.*,
           SAMPLE_METADATA.*
    FROM SAMPLE_METADATA
    INNER JOIN OBS_METADATA 
    ON SAMPLE_METADATA.obs_id = OBS_METADATA.obs_id
    AND SAMPLE_METADATA.env_package = OBS_METADATA.env_package
    ORDER BY SAMPLE_METADATA.ref_code ASC
    """

In [44]:
# DP, do not understand, these two queries are the same
# Get the full_metadata DataFrame
full_metadata = dbQuery2df(query_metadata)

factors = duckdb.sql(query_metadata).df()
# Merge full_metadata with the pivoted table data on 'ref_code'

# Step 7: Calculate Shannon index and merge with factors

# Function to calculate Shannon index




<h2> Statistics <h2>

In [15]:
# Function to check normality using the Shapiro-Wilk test
def check_normality(data):
    stat, p_value = shapiro(data)
    return p_value > 0.05  # If p-value > 0.05, the data is normally distributed


# Function to check homogeneity of variances using Levene's test
def check_homogeneity_of_variances(groups):
    stat, p_value = levene(*groups)
    return p_value > 0.05  # If p-value > 0.05, variances are homogeneous


# Function to run t-test if there are only two conditions
def run_ttest(alpha_diversity_df, selected_factor, homogeneity):
    grouped_data = alpha_diversity_df.groupby(selected_factor)["Shannon"]
    groups = [group for _, group in grouped_data]

    # If variances are equal, run a standard t-test; otherwise, Welch's t-test
    equal_var = homogeneity
    t_stat, p_value = ttest_ind(groups[0], groups[1], equal_var=equal_var)

    return t_stat, p_value


# Modified ANOVA and post-hoc tests function with t-test option
def run_anova_and_posthoc(alpha_diversity_df, selected_factor):
    grouped_data = alpha_diversity_df.groupby(selected_factor)["Shannon"]
    groups = [group for _, group in grouped_data]

    if len(groups) == 1:
        return "Only one condition is present. No statistical test can be performed."

    normality = check_normality(alpha_diversity_df["Shannon"])

    # Handle cases with exactly two groups
    if len(groups) == 2:
        homogeneity = check_homogeneity_of_variances(groups)
        t_stat, p_value = run_ttest(alpha_diversity_df, selected_factor, homogeneity)
        result = f"\nT-test Results:\nT-statistic: {t_stat}, P-value: {p_value}\n"
        result += f"Normality: {'Pass' if normality else 'Fail'}\nHomogeneity: {'Pass' if homogeneity else 'Fail'}"
        return result

    # Handle cases with more than two groups
    try:
        homogeneity = check_homogeneity_of_variances(groups)
        anova_results = pg.anova(
            data=alpha_diversity_df, dv="Shannon", between=selected_factor
        )
        # print(anova_results.columns)  # Debugging line
        p_value_col = "p-unc" if "p-unc" in anova_results.columns else "p_val"
        result = f"\nANOVA Results:\n{anova_results}\nNormality: {'Pass' if normality else 'Fail'}\nHomogeneity: {'Pass' if homogeneity else 'Fail'}"

        if anova_results[p_value_col].values[0] < 0.05:
            if normality and homogeneity:
                posthoc = pg.pairwise_ttests(
                    data=alpha_diversity_df,
                    dv="Shannon",
                    between=selected_factor,
                    padjust="holm",
                )
                result += f"\nPost-hoc: Tukey's HSD\n{posthoc}"
            else:
                posthoc = pg.pairwise_gameshowell(
                    data=alpha_diversity_df, dv="Shannon", between=selected_factor
                )
                result += f"\nPost-hoc: Games-Howell\n{posthoc}"
        else:
            result += f"\nNo significant difference in {selected_factor}. No post-hoc test needed."

        return result
    except ValueError as e:
        return f"Error in statistical tests: {str(e)}"

<h2> Widgets <h2>

In [28]:
# Widget Definition for Table Selection (SSU or LSU)
table_selection_dropdown = widgets.Dropdown(
    options=["go", "go_slim", "ips", "ko", "pfam"],  # Options for table selection
    value="go",  # Default selection
    description="Select Table:",
)

# Widget Definition for Color Factor Selection
color_factor_dropdown = widgets.Dropdown(
    options=["Please select"]
    + [
        col for col in factors.columns if factors[col].dtype == "object"
    ],  # Non-numeric columns from factors
    value="Please select",  # Default value
    description="Color by:",
)

# Output widget for displaying plots
output_plot = widgets.Output()


#### Updating plots - Function to update the plot based on user selections
def update_plot(change):
    # Fetch the selected table and factor from the dropdowns
    selected_table = table_selection_dropdown.value
    selected_factor = color_factor_dropdown.value

    # Print debug information (you can remove these after debugging)
    print(f"Selected table: {selected_table}, Selected factor: {selected_factor}")

    if selected_factor != "Please select":
        # Generate the SQL query for the selected table
        query = generate_sql_query(selected_table)
        print(f"Generated SQL query: {query}")

        # Create or replace the pivoted table
        duckdb.sql(f"CREATE OR REPLACE TABLE {selected_table}_pivoted AS {query};")

        # Fetch the pivoted table
        pivoted_table = duckdb.sql(f"SELECT * FROM {selected_table}_pivoted;").fetchdf()
        print(f"Pivoted table: {pivoted_table.head()}")

        # Set key column ('id', 'accession', or 'entry') as index
        key_column = get_key_column(selected_table)
        pivoted_table.set_index(key_column, inplace=True)

        # Transpose the DataFrame
        pivoted_table_transposed = pivoted_table.T

        # Reset the index to get 'ref_code' as a column
        pivoted_table_transposed.reset_index(inplace=True)
        pivoted_table_transposed.rename(columns={"index": "ref_code"}, inplace=True)

        # Sort the DataFrame by 'ref_code'
        pivoted_table_transposed_sorted = pivoted_table_transposed.sort_values(
            by="ref_code"
        )

        # Merge the full metadata with the pivoted table
        merged_data = merge_data(pivoted_table_transposed_sorted, full_metadata)
        print(f"Merged data: {merged_data.head()}")

        # Calculate Shannon index and plot
        alpha_diversity_df = calculate_alpha_diversity(merged_data, full_metadata)
        print(f"Alpha diversity DF: {alpha_diversity_df.head()}")

        # plot_shannon_index(alpha_diversity_df, selected_factor)
        # plot_average_shannon_per_condition(alpha_diversity_df, selected_factor)
        # Clear previous plot and display the new one
        with output_plot:
            output_plot.clear_output(wait=True)
            plot_shannon_index(alpha_diversity_df, selected_factor)
            plot_average_shannon_per_condition(alpha_diversity_df, selected_factor)
            anova_output = run_anova_and_posthoc(alpha_diversity_df, selected_factor)
            print(anova_output)


# Observe changes in dropdowns
color_factor_dropdown.observe(update_plot, names="value")
table_selection_dropdown.observe(update_plot, names="value")

# Display dropdowns and output
display(table_selection_dropdown, color_factor_dropdown, output_plot)

Dropdown(description='Select Table:', options=('go', 'go_slim', 'ips', 'ko', 'pfam'), value='go')

Dropdown(description='Color by:', options=('Please select', 'obs_id', 'project_name', 'geo_loc_name', 'loc_bro…

Output()

Selected table: go, Selected factor: obs_id
Generated SQL query: SELECT id, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00141' THEN abundance END), 0) AS EMOBON00141, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00194' THEN abundance END), 0) AS EMOBON00194, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00226' THEN abundance END), 0) AS EMOBON00226, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00243' THEN abundance END), 0) AS EMOBON00243, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00156' THEN abundance END), 0) AS EMOBON00156, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00130' THEN abundance END), 0) AS EMOBON00130, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00193' THEN abundance END), 0) AS EMOBON00193, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00195' THEN abundance END), 0) AS EMOBON00195, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00227' THEN abundance END), 0) AS EMOBON00227, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00239' THEN abundance END), 0) AS EMOBON00239, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON000

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: go, Selected factor: geo_loc_name
Generated SQL query: SELECT id, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00225' THEN abundance END), 0) AS EMOBON00225, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00095' THEN abundance END), 0) AS EMOBON00095, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00121' THEN abundance END), 0) AS EMOBON00121, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00136' THEN abundance END), 0) AS EMOBON00136, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00144' THEN abundance END), 0) AS EMOBON00144, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00147' THEN abundance END), 0) AS EMOBON00147, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00155' THEN abundance END), 0) AS EMOBON00155, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00138' THEN abundance END), 0) AS EMOBON00138, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00148' THEN abundance END), 0) AS EMOBON00148, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00122' THEN abundance END), 0) AS EMOBON00122, COALESCE(MAX(CASE WHEN ref_code = 'EMO

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: go_slim, Selected factor: geo_loc_name
Generated SQL query: SELECT id, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00236' THEN abundance END), 0) AS EMOBON00236, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00238' THEN abundance END), 0) AS EMOBON00238, COALESCE(MAX(CASE WHEN ref_code =

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: go_slim, Selected factor: loc_regional
Generated SQL query: SELECT id, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00224' THEN abundance END), 0) AS EMOBON00224, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00140' THEN abundance END), 0) AS EMOBON00140, COALESCE(MAX(CASE WHEN ref_code =

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ips, Selected factor: loc_regional
Generated SQL query: SELECT accession, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00085' THEN abundance END), 0) AS EMOBON00085, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00236' THEN abundance END), 0) AS EMOBON00236, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00124' THEN abundance END), 0) AS EMOBON00124, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00238' THEN abundance END), 0) AS EMOBON00238, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00098' THEN abundance END), 0) AS EMOBON00098, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00131' THEN abundance END), 0) AS EMOBON00131, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00151' THEN abundance END), 0) AS EMOBON00151, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00133' THEN abundance END), 0) AS EMOBON00133, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00093' THEN abundance END), 0) AS EMOBON00093, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00088' THEN abundance END), 0) AS EMOBON00088, COALESCE(MAX(CASE WHEN ref_cod

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ips, Selected factor: contact_name
Generated SQL query: SELECT accession, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00094' THEN abundance END), 0) AS EMOBON00094, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00084' THEN abundance END), 0) AS EMOBON00084, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00089' THEN abundance END), 0) AS EMOBON00089, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00137' THEN abundance END), 0) AS EMOBON00137, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00157' THEN abundance END), 0) AS EMOBON00157, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00140' THEN abundance END), 0) AS EMOBON00140, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00142' THEN abundance END), 0) AS EMOBON00142, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00097' THEN abundance END), 0) AS EMOBON00097, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00224' THEN abundance END), 0) AS EMOBON00224, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00156' THEN abundance END), 0) AS EMOBON00156, COALESCE(MAX(CASE WHEN ref_cod

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ips, Selected factor: loc_broad_ocean
Generated SQL query: SELECT accession, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00122' THEN abundance END), 0) AS EMOBON00122, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00095' THEN abundance END), 0) AS EMOBON00095, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00121' THEN abundance END), 0) AS EMOBON00121, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00136' THEN abundance END), 0) AS EMOBON00136, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00144' THEN abundance END), 0) AS EMOBON00144, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00147' THEN abundance END), 0) AS EMOBON00147, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00155' THEN abundance END), 0) AS EMOBON00155, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00138' THEN abundance END), 0) AS EMOBON00138, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00148' THEN abundance END), 0) AS EMOBON00148, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00225' THEN abundance END), 0) AS EMOBON00225, COALESCE(MAX(CASE WHEN ref_

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ips, Selected factor: ENA_accession_number_umbrella
Generated SQL query: SELECT accession, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00242' THEN abundance END), 0) AS EMOBON00242, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00156' THEN abundance END), 0) AS EMOBON00156, COALESCE(MAX(

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: ENA_accession_number_umbrella
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00094' THEN abundance END), 0) AS EMOBON00094, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00224' THEN abundance END), 0) AS EMOBON00224, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00140' THEN abundance END), 0) AS EMOBON00140, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00142' THEN abundance END), 0) AS EMOBON00142, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00097' THEN abundance END), 0) AS EMOBON00097, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00084' THEN abundance END), 0) AS EMOBON00084, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00089' THEN abundance END), 0) AS EMOBON00089, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00137' THEN abundance END), 0) AS EMOBON00137, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00157' THEN abundance END), 0) AS EMOBON00157, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00143' THEN abundance END), 0) AS EMOBON00143, COALESCE(MAX(CASE 

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: obs_id
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00225' THEN abundance END), 0) AS EMOBON00225, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00095' THEN abundance END), 0) AS EMOBON00095, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: env_broad_biome
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00094' THEN abundance END), 0) AS EMOBON00094, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00224' THEN abundance END), 0) AS EMOBON00224, COALESCE(MAX(CASE WHEN ref_code 

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: wa_id
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00242' THEN abundance END), 0) AS EMOBON00242, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00193' THEN abundance END), 0) AS EMOBON00193, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON0

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: extra_site_info
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00236' THEN abundance END), 0) AS EMOBON00236, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00238' THEN abundance END), 0) AS EMOBON00238, COALESCE(MAX(CASE WHEN ref_code 

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: ko, Selected factor: loc_regional
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00225' THEN abundance END), 0) AS EMOBON00225, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00095' THEN abundance END), 0) AS EMOBON00095, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00121' THEN abundance END), 0) AS EMOBON00121, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00136' THEN abundance END), 0) AS EMOBON00136, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00144' THEN abundance END), 0) AS EMOBON00144, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00147' THEN abundance END), 0) AS EMOBON00147, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00155' THEN abundance END), 0) AS EMOBON00155, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00138' THEN abundance END), 0) AS EMOBON00138, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00148' THEN abundance END), 0) AS EMOBON00148, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00122' THEN abundance END), 0) AS EMOBON00122, COALESCE(MAX(CASE WHEN ref_code = '

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: pfam, Selected factor: loc_regional
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00155' THEN abundance END), 0) AS EMOBON00155, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00138' THEN abundance END), 0) AS EMOBON00138, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00148' THEN abundance END), 0) AS EMOBON00148, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00122' THEN abundance END), 0) AS EMOBON00122, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00095' THEN abundance END), 0) AS EMOBON00095, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00121' THEN abundance END), 0) AS EMOBON00121, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00136' THEN abundance END), 0) AS EMOBON00136, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00225' THEN abundance END), 0) AS EMOBON00225, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00144' THEN abundance END), 0) AS EMOBON00144, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00147' THEN abundance END), 0) AS EMOBON00147, COALESCE(MAX(CASE WHEN ref_code =

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: pfam, Selected factor: project_name
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00124' THEN abundance END), 0) AS EMOBON00124, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00085' THEN abundance END), 0) AS EMOBON00085, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00236' THEN abundance END), 0) AS EMOBON00236, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00238' THEN abundance END), 0) AS EMOBON00238, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00098' THEN abundance END), 0) AS EMOBON00098, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00094' THEN abundance END), 0) AS EMOBON00094, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00089' THEN abundance END), 0) AS EMOBON00089, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00137' THEN abundance END), 0) AS EMOBON00137, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00157' THEN abundance END), 0) AS EMOBON00157, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00224' THEN abundance END), 0) AS EMOBON00224, COALESCE(MAX(CASE WHEN ref_code =

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k

Selected table: pfam, Selected factor: geo_loc_name
Generated SQL query: SELECT entry, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00001' THEN abundance END), 0) AS EMOBON00001, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00092' THEN abundance END), 0) AS EMOBON00092, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00237' THEN abundance END), 0) AS EMOBON00237, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00125' THEN abundance END), 0) AS EMOBON00125, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00158' THEN abundance END), 0) AS EMOBON00158, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00120' THEN abundance END), 0) AS EMOBON00120, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00127' THEN abundance END), 0) AS EMOBON00127, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00139' THEN abundance END), 0) AS EMOBON00139, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00242' THEN abundance END), 0) AS EMOBON00242, COALESCE(MAX(CASE WHEN ref_code = 'EMOBON00133' THEN abundance END), 0) AS EMOBON00133, COALESCE(MAX(CASE WHEN ref_code =

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **k