In [None]:
from platform import python_version
print(python_version())

## 14 DPC
### Select 2 Cases and Random Pathways (S2CRP)
#### Calc hard-reproducibility
#### Calc soft-reproducibility

## Gemini API

https://ai.google.dev/gemini-api/docs

### API key - Free of charge

https://aistudio.google.com/app/apikey

### Google Enable API

  - You are about to enable 'Generative Language API'.

https://ai.google.dev/gemini-api/docs/oauth

In [None]:
import os, sys, pickle

import numpy as np
import pandas as pd
pd.set_option('display.width', 100)
pd.set_option('max_colwidth', 80)
import yaml

import seaborn as sns
sns.set_context("notebook", font_scale=1.4)

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
%matplotlib inline

sys.path.insert(1, '../src/')

from Basic import *
from entrez_conversion import *
from biopax_lib import *
from gemini_lib import *

import warnings
warnings.filterwarnings("ignore")

from IPython.display import display, HTML
# display(HTML("<style>.container { width:100% !important; }</style>"))
display(HTML("<style>:root { --jp-notebook-max-width: 100% !important; }</style>"))

email = "flalix@gmail.com"

# !pip3 install pyyaml
with open('params.yml', 'r') as file:
    dic_yml = yaml.safe_load(file)

# print(dic_yml)

In [None]:
root_chibe = dic_yml['root_chibe']
root_colab = dic_yml['root_colab']
root0 = dic_yml['root0']

project = dic_yml['project']
s_project = dic_yml['s_project']

gene_protein = dic_yml['gene_protein']
s_omics = dic_yml['s_omics']

has_age = dic_yml['has_age']
has_gender = dic_yml['has_gender']

want_normalized = dic_yml['want_normalized']

abs_lfc_cutoff_inf = dic_yml['abs_lfc_cutoff_inf']
s_pathw_enrichm_method = dic_yml['s_pathw_enrichm_method']
num_min_degs_for_ptw_enr = dic_yml['num_min_degs_for_ptw_enr']

tolerance_pathway_index = dic_yml['tolerance_pathway_index']
type_sat_ptw_index = dic_yml['type_sat_ptw_index']
saturation_lfc_index = dic_yml['saturation_lfc_index']
chosen_model_sampling = dic_yml['chosen_model_sampling']

case_list = dic_yml['case_list']
case_sel_list = dic_yml['case_sel_list']
s_len_case = dic_yml['s_len_case']

pval_pathway_cutoff = dic_yml['pval_pathway_cutoff']
fdr_pathway_cutoff = dic_yml['fdr_pathway_cutoff']
num_of_genes_cutoff = dic_yml['num_of_genes_cutoff']

run_list = dic_yml['run_list']
chosen_model_list = dic_yml['chosen_model_list']
i_dfp_list = dic_yml['i_dfp_list']

exp_normalization='quantile_norm' if want_normalized else None
normalization='not_normalized' if exp_normalization is None else exp_normalization

cfg = Config(project, s_project, case_list, root0)

case = case_list[0]

n_genes_annot_ptw, n_degs, n_degs_in_ptw, n_degs_not_in_ptw, degs_in_all_ratio = -1,-1,-1,-1,-1
abs_lfc_cutoff, fdr_lfc_cutoff, n_degs, n_degs_up, n_degs_dw = cfg.get_best_lfc_cutoff(case, 'not_normalized')

print(f"G/P LFC cutoffs: lfc={abs_lfc_cutoff:.3f}; fdr={fdr_lfc_cutoff:.3f}")
print(f"Pathway cutoffs: pval={pval_pathway_cutoff:.3f}; fdr={fdr_pathway_cutoff:.3f}; num of genes={num_of_genes_cutoff}")

In [None]:
bpx = Biopax(gene_protein, s_omics, project, s_project, root0,
             case_list, has_age, has_gender, clone_objects=False,
             exp_normalization=exp_normalization, geneset_num=0, 
             num_min_degs_for_ptw_enr=num_min_degs_for_ptw_enr, 
             tolerance_pathway_index=tolerance_pathway_index, 
             s_pathw_enrichm_method = s_pathw_enrichm_method)

case = case_list[0]

bpx.cfg.set_default_best_lfc_cutoff(normalization, abs_lfc_cutoff=1, fdr_lfc_cutoff=0.05)
ret, degs, degs_ensembl, dfdegs = bpx.open_case(case, verbose=False)
print("\nEcho Parameters:")
bpx.echo_parameters()

### is_seldata = True - 2 cases randomly selected pathways (2CRSP)

In [None]:
#######################
is_seldata=True
i_dfp_list = [0]
i_dfp=0

case_list = case_sel_list
case_sel0 = case_sel_list[0]
case_sel1 = case_sel_list[1]

with_gender=bpx.has_gender
with_gender_list = [False, True] if with_gender else [False]

print(f"with_gender = {with_gender} because has_gender = {bpx.has_gender}")
#######################

In [None]:
is_seldata, case_list, s_len_case, run_list, chosen_model_list, i_dfp_list, chosen_model_sampling

### Instantiating Gemini

In [None]:
API_KEY = dic_yml['API_KEY']

disease = dic_yml['disease']
context_disease = dic_yml['context_disease']
n_sentences = dic_yml['n_sentences']
chosen_model_sampling = dic_yml['chosen_model_sampling']

gem = Gemini( bpx=bpx, is_seldata=is_seldata, disease=disease, context_disease=context_disease, 
             API_KEY=API_KEY, n_sentences=n_sentences, root0=root0, 
             chosen_model_list=chosen_model_list, i_dfp_list=i_dfp_list, chosen_model_sampling=chosen_model_sampling)
print("\n")
print(gem.disease, gem.is_seldata, gem.i_dfp_list, gem.chosen_model_list)
print("Context:", context_disease)

In [None]:
gem.set_case(bpx.case, bpx.df_enr, bpx.df_enr0)

In [None]:
gem.is_seldata, gem.bpx.case_list, gem.chosen_model_list, gem.i_dfp_list, gem.chosen_model_sampling

### Reading Selected Case and Pathways + settings vars

In [None]:
test=False
force=False
verbose=False

chosen_model=3
gemini_model=gem.gemini_models[chosen_model]

query_type='strong'
N=30

case=case_sel0
print("")
dfsel = gem.open_yes_no_sampling(case=case, N=N, query_type=query_type, verbose=True)
print("")
dfsel.head(3)

### Read Gemini

In [None]:
verbose=False
run='run01'
case=case_list[0]
iq=0
chosen_model=3

df = gem.read_gemini(run=run, case=case, iq=iq, i_dfp=i_dfp, chosen_model=chosen_model, verbose=verbose)
print(len(df))
df.head(3)

In [None]:
df.curation.unique()

In [None]:
len(df[df.curation=='Yes']), len(df[df.curation!='Yes'])

In [None]:
run='run01'

fig = gem.barplot_yes_no_per_case_run(run=run, case_list=case_list,   
                                      chosen_model_list=chosen_model_list, i_dfp_list=i_dfp_list,
                                      width=800, height=600, fontsize=12, fontcolor='black',
                                      xaxis_title="cases-idfp-models", yaxis_title='n answers',
                                      minus_y_yes_no=-3, minus_y_i_dfp=-6, minus_y_case=-9,
                                      annot_fontfamily="Arial, monospace", annot_fontsize=12, 
                                      annot_fontcolor='black', savePlot=True, verbose=False)

if fig: fig.show()

## Hard reprocucibility (code)

In [None]:
gem.root_gemini_root

In [None]:
verbose=True
force=False

run1 = run_list[0]
run2 = run_list[1]

dfall = gem.rrr_concat_2_runs(run1, run2, force=force, verbose=verbose)
mu  = dfall.answer_sim.mean()
std = dfall.answer_sim.std()

dfsim = dfall[dfall.answer_sim >= gem.answer_min_cutoff]
nsim = len(dfsim)

dfnot = dfall[dfall.answer_sim < gem.answer_min_cutoff]
ndiff = len(dfnot)

mu, std, nsim, ndiff

In [None]:
print(dfall.columns)
dfall.head(3)

In [None]:
dfall = dfall.sort_values(['case', 'iq', 'i_dfp', 'model_name', 'pathway_id'])
dfall.head(3)

In [None]:
verbose=True

text, dfall2, df_case, mu_all, std_all, n, nEq_all, nNot_all = \
gem.calc_run_run_hard_repro(run1, run2, chosen_model_list=chosen_model_list, case_list=case_list, verbose=verbose)

print(f"\n'{text}'")

In [None]:
f"mu {100*mu_all:.1f}% ({std_all:.1f}%), n={n}, nEq_all={nEq_all}, nNot_all={nNot_all}"

In [None]:
print(len(dfall2))
dfall2.head(3)

In [None]:
df_case

### Inter-model reproducibility: IMR - hard reprodubility

In [None]:
verbose=True
force=False

one_or_two=1

text, df_imr, dfcase_imr, mu_imr, std_imr, n, nsim_imr, nnot_imr = \
gem.calc_inter_model_hard_repro_one_or_two(one_or_two, run1, run2, 
                                           chosen_model_list=chosen_model_list, case_list=case_list, 
                                           force=force, verbose=verbose)
print(f"\n'{text}'")

In [None]:
df_imr.head(3)

In [None]:
chosen_model_list, mu_imr, std_imr, n, nsim_imr, nnot_imr

In [None]:
dfcase_imr

### Hard reproducibility summary for run01 or run02

In [None]:
verbose=True
force=False

dfhard = gem.summary_hard_repro(one_or_two=1, run1=run1, run2=run2,
                                chosen_model_list=chosen_model_list, case_list=case_list,
                                force=force, verbose=verbose)
dfhard

In [None]:
dfhard = gem.summary_hard_repro(one_or_two=2, run1=run1, run2=run2,
                                chosen_model_list=chosen_model_list, case_list=case_list,
                                force=force, verbose=verbose)
dfhard

### Gemini counts
  - Count Yes and No per model, run versus iq and i_dfp:
    - 2 iq have PubMed inside the search (pubmed=True) and 2 dont
    - i_dfp: 0 to 3, 0=enriched, 1=middle, 2=end of the table, and 3=out of enriched table

In [None]:
force=False
verbose=False

for run in run_list:
    print(">>>", run, end=' ')
    for chosen_model in chosen_model_list:
        print(chosen_model, end='  ')

        # old gemini_create_statistical_analysis
        dfall = gem.gemini_calc_answers_counts(run=run, case_list=case_list, chosen_model=chosen_model, force=force, verbose=verbose)
        print(len(dfall), end=' ')
    print("")

In [None]:
verbose=True

run='run01'
chosen_model=1

# old open_gemini_statistical_analysis
dfall = gem.open_gemini_answers_counts(run=run, chosen_model=chosen_model, verbose=verbose)
print(len(dfall))

dfall.head(6)

## Soft reproducibility (code)

### Calc dpiv: one run, one model, all cases, all i_dfp

In [None]:
verbose=True
force=False

for run in run_list:
    for chosen_model in chosen_model_list:
        print(">>>", run, chosen_model)
        gem.calc_dfpiv_semantic_consensus_run_per_model(run=run, case_list=case_list,
                                                        chosen_model=chosen_model,
                                                        force=force, verbose=verbose)
        print("")


### Save yes/no consensus - run, all models

In [None]:
verbose=False
force=False

for run in run_list:
    print(">>>", run)
    _ = gem.save_gemini_yes_no_run_per_model(run=run, chosen_model_list=chosen_model_list,
                                             force=force, verbose=verbose)
    print("")

### Open yes-no one model

In [None]:
verbose = True
run='run01'
consensus='No'
consensus='Yes'

dfpivc = gem.open_gemini_yes_no_run_per_model(run=run, consensus=consensus, verbose=verbose)
print(len(dfpivc))
print(dfpivc.columns, '\n')
dfpivc.head(3)

In [None]:
dfpivc.tail(4)

### One Model Consensus Reproducibility (OMCR)
  - open_dfpiv_semantic_consensus_run_per_model()
  - return: consensus and 4 questins, n_yes, n_no, unanimous

In [None]:
verbose = True
run='run01'
chosen_model=1

dfpiv = gem.open_dfpiv_semantic_consensus_run_per_model(run=run, chosen_model=chosen_model, verbose=verbose)
print(len(dfpiv))
dfpiv.head(3)

In [None]:
dfpiv.columns

In [None]:
dfpiv.consensus.unique()

In [None]:
dfpiv.run.unique(), dfpiv.case.unique(), dfpiv.i_dfp.unique(), dfpiv.chosen_model.unique()

### Run-run Consensus Reproducibility (RRCR)

In [None]:
verbose = True
force=False

for run in run_list:
    print(">>>", run)
    # calc_gemini_consensus_counts_run_all_models -> 
    _ = gem.calc_gemini_dfpiva_all_models_one_run(run=run, case_list=case_list, 
                                                 chosen_model_list=chosen_model_list,
                                                 force=force, verbose=verbose)
    print("")
print("\n------------- end ----------------")

In [None]:
verbose = True
run='run01'

# open_gemini_consensus_counts_run_all_models --> open_gemini_dfpiva_all_models_one_run
dfpiva = gem.open_gemini_dfpiva_all_models_one_run(run=run, chosen_model_list=chosen_model_list, verbose=verbose)

print(len(dfpiva))
dfpiva.head(3)

### Compare hard 2 runs (uses dfpiva)

In [None]:
verbose=False
force=False
run1='run01'
run2='run02'

dftot, dfstat = gem.compare_hard_2_runs_total_answers(run1, run2, case_list=case_list, chosen_model_list=chosen_model_list, 
                                                      pval_cutoff=0.05, force=force, verbose=verbose)
dftot

In [None]:
cols = ['case', 'run1', 'run2', 'pval_cutoff', 'pval_cutoff_bonf', 's_pvalue', 's_stat', 'stat',
       'pvalue', 'dof', 'expected', 'vals1', 'vals2']

cols = ['case', 'run1', 'run2', 'pval_cutoff', 'pval_cutoff_bonf', 's_pvalue', 's_stat', 'pvalue', 'dof']
dfstat[cols]

In [None]:
#dfstat.columns

### Run-run one model one run consensus

In [None]:
run_list

In [None]:
force=False
verbose=False

chosen_model=3
run='run01'

dfrep = gem.calc_soft_one_run_one_model_consensus(run=run, case_list=case_list, chosen_model=chosen_model, 
		  					 		              force=force, verbose=verbose)
print(len(dfrep))
dfrep.head(3)

In [None]:
force=False
verbose=True

pd.options.display.float_format = "{:,.3f}".format

# call many runs, all models, all cases calc_run_model_4DSSQ
# old: calc_all_semantic_unanimous_repro
dfc_stat = gem.calc_soft_consensus_stats_case_i_dfp(run_list=run_list, case_list=case_list,
                                                    chosen_model_list=chosen_model_list, i_dfp_list=i_dfp_list,
                                                    force=force, verbose=verbose)

cols=['run', 'chosen_model', 'model_name', 'case', 'i_dfp', 'mu_consensus_yes',
       'std_consensus_yes', 'mu_unanimous', 'std_unanimous', 'n', 'n_yes', 'n_doubt', 'n_no',
       'n_unan', 'n_not_unan']
cols=['run', 'chosen_model', 'case', 'i_dfp', 'mu_consensus_yes',
       'std_consensus_yes', 'mu_unanimous', 'std_unanimous', 'n', 'n_yes', 'n_doubt', 'n_no',
       'n_unan', 'n_not_unan']
cols2=['run', 'model', 'case', 'i_dfp', 'mu_cons_yes',
       'std_cons_yes', 'mu_unan', 'std_unan', 'n', 'n_yes', 'n_doubt', 'n_no',  'n_unan', 'n_not_unan']

dfc_stat2 = dfc_stat[cols]
dfc_stat2.columns = cols2
dfc_stat2

In [None]:
verbose=True

dfrep = gem.open_soft_run_run_one_model_consensus(chosen_model_list=chosen_model_list, verbose=verbose)
if dfrep is None or dfrep.empty:
    dfrep = pd.DataFrame()

print(len(dfrep))

case=case_list[0]

cols=['run', 'chosen_model', 'case', 'i_dfp', 'mu_consensus_yes',
       'std_consensus_yes', 'mu_unanimous', 'std_unanimous', 'n', 'n_yes', 'n_doubt', 'n_no',
       'n_unan', 'n_not_unan']
cols2=['run', 'model', 'case', 'i_dfp', 'mu_cons_yes',
       'std_cons_yes', 'mu_unan', 'std_unan', 'n', 'n_yes', 'n_doubt', 'n_no',  'n_unan', 'n_not_unan']

dfrep2 = dfrep[cols]
dfrep2.columns = cols2
dfrep2.head(6)

### Run-run many-models consensus

In [None]:
case_list

In [None]:
verbose=True
force=False

run1='run01'
run2='run02'

# compare_2_runs_unanimous_mean --> calc_soft_RRCR_mean -> calc_soft_run_run_consensus_unanimous
dfc = gem.rrcr_stats_2_runs(run1, run2, case_list=case_list, chosen_model_list=chosen_model_list, 
                            force=force, verbose=verbose)

dfc

In [None]:
i_dfp_list, chosen_model_list

In [None]:
verbose=True
force=False
run='run01'

# report_gemini 
msg, df = gem.calc_analytical_soft_consensus(run=run, case_list=case_list, i_dfp_list=i_dfp_list,  
                                             chosen_model_list=chosen_model_list,
                                             force=force, verbose=verbose)
print(len(df))
df

### Run-run many-models consensus reproducibility

In [None]:
verbose=True
force=False

run1='run01'
run2='run02'

# calc_soft_run_run_one_model_consensus_repro
dfstat = gem.calc_soft_RRCR_stats_per_idfp(run1=run1, run2=run2, chosen_model_list=chosen_model_list, case_list=case_list,
                                           i_dfp_list=i_dfp_list, force=force, verbose=verbose)

cols = ['run1', 'run2', 'case', 'i_dfp', 'n', 'repro_yes_s_stat', 'repro_yes_stat',
       'repro_yes_pvalue', 'repro_yes_dof', 'repro_yes_expected', 'unan_s_stat', 'unan_stat',
       'unan_pvalue', 'unan_dof', 'unan_expected', 'repro_yes_mu_perc1', 'repro_yes_std_perc1',
       'repro_yes_mu_perc2', 'repro_yes_std_perc2', 'unan_mu_perc1', 'unan_std_perc1',
       'unan_mu_perc2', 'unan_std_perc2', 'repro_yes_list1', 'repro_yes_list2', 'repro_yes_perc1',
       'repro_yes_perc2', 'unan_list1', 'unan_list2', 'unan_perc1', 'unan_perc2']

cols = ['case', 'i_dfp', 'n', 'repro_yes_pvalue', 'repro_yes_mu_perc1','repro_yes_std_perc1', 'repro_yes_mu_perc2','repro_yes_std_perc2',
        'repro_yes_list1', 'repro_yes_list2']

dfstat[ (dfstat.i_dfp==0) ][cols]

In [None]:
cols = ['case', 'i_dfp', 'n', 'unan_pvalue', 'unan_mu_perc1','unan_std_perc1', 'unan_mu_perc2','unan_std_perc2',
        'unan_list1', 'unan_list2']
dfstat[ (dfstat.i_dfp==0) ][cols]

### Open dfpiv: one run, one model

In [None]:
case=case_list[0]
run='run01'
chosen_model1 = 1

dfpiv1 = gem.open_dfpiv_gemini_run_case_model(run=run, case=case, chosen_model=chosen_model1, verbose=verbose)
# print(dfpiv1.columns)
# print("")
print(len(dfpiv1))
dfpiv1.head(3)

### Open consensus one model, one run

In [None]:
case=case_list[0]
run='run01'
chosen_model1 = 1

dfpiv1 = gem.open_dfpiv_semantic_consensus_run_per_model(run=run, chosen_model=chosen_model1, verbose=verbose)
# print(dfpiv1.columns)
# print("")
print(len(dfpiv1))
dfpiv1.head(3)

### Open consensus all models, one run

In [None]:
case=case_list[0]
run='run01'
chosen_model1 = 1

dfpiva = gem.open_gemini_dfpiva_all_models_one_run(run=run, chosen_model_list=chosen_model_list, verbose=verbose)
# print(dfpiv1.columns)
# print("")
print(len(dfpiv1))
dfpiv1.head(3)

In [None]:
cols = ['case', 'i_dfp', 'pathway_id', 'pathway', 
        'simple_model_1', 'simple+pubmed_model_1', 'disease_model_1', 'disease+pubmed_model_1', 
        'simple_model_3', 'simple+pubmed_model_3', 'disease_model_3', 'disease+pubmed_model_3', 
        'run', 'consensus', 'n_yes', 'n_no', 'unanimous']

cols1 = ['case', 'i_dfp', 'pathway_id', 'pathway',
         'simp1', 'simpub1', 'dis1', 'dispub1',
         'simp3', 'simpub3', 'dis3', 'dispub3', 
         'run', 'consensus', 'n_yes', 'n_no',
       'unanimous']

cols2 = ['case', 'i_dfp', 'pathway_id', 'pathway',
         'simp1', 'simpub1', 'dis1', 'dispub1',
         'simp3', 'simpub3', 'dis3', 'dispub3', 
         'consensus', 'n_yes', 'n_no',  'unanimous']

dfpiva2 = dfpiva.copy()

dfpiva2.columns = cols1
dfpiva2 = dfpiva2[cols2]
print(len(dfpiva2))
dfpiva2.head(4)

### Inter-model consensus reproducibility: flexible or not

  - method: run_all_inter_model_soft_consensus_repro()
    - flexible ~consensus, not flexible ~equal consensus, n_yes, n_no
    - for each run, case, i_dfp
      - run_inter_model_soft_consensus_repro()
        - dfpiv0 = self.open_dfpiv_semantic_consensus_run_per_model(run=run, chosen_model=chosen_model0, verbose=verbose)
        - dfpiv1 = self.open_dfpiv_semantic_consensus_run_per_model(run=run, chosen_model=chosen_model1, verbose=verbose)
          - filter case and i_dfp
          - flexible: equal consensus
          - not flexible: equal consensus, n_yes, n_no

In [None]:
verbose=False
force=False

chosen_model0=1
chosen_model1=3

# run_all_comparing_geminis_by_model
msg, dfc, df_idfp = gem.run_all_inter_model_soft_consensus_repro(chosen_model0=chosen_model0, chosen_model1=chosen_model1,
                    										     run_list=run_list, case_list=case_list,
                                                                 force=force, verbose=verbose)

# print(msg)
cols = ['chosen_model0', 'model_name0', 'chosen_model1', 'model_name1', 'run', 'case', 'i_dfp',
        'n', 'mean_consensus', 'std_consensus', 'mean_cons_yes', 'std_cons_yes', 'text']
cols = ['run', 'case', 'i_dfp', 'n', 'mean_consensus', 'std_consensus', 'mean_cons_yes', 'std_cons_yes']

run='run01'
i_dfp=0
print("Flexible: compare only consensuses")
print(len(df_idfp))
df_idfp[(df_idfp.run==run) & (df_idfp.i_dfp==i_dfp) ][cols]

### Inter-model consensus venn - detailed

In [None]:
verbose=False
force=False

model0=chosen_model_list[0]
model1=chosen_model_list[1]

only_common_pathways=False

for run in run_list:
    dfpiv0, model_name0, dfpiv1, model_name1, dff, fname = \
        gem.run_inter_model_soft_consensus_venn(run=run, case_list=case_list, 
                                                i_dfp_list=i_dfp_list, model0=model0, model1=model1, 
                                                only_common_pathways=only_common_pathways,
                                                force=force, verbose=verbose)

    print(run, len(dff))

In [None]:
cols = ['run', 'model0', 'model1', 'case', 'consensus', 'i_dfp', 'n0_pathways', 'n1_pathways',
       'n0_consensus', 'n1_consensus', 'n_tot_consensus', 'n_common_consensus',
       'n_only0_consensus', 'n_only1_consensus', 'perc0_consensus', 'perc1_consensus',
       'perc_commons', 'perc_common_consensus', 'perc_only0_consensus', 'perc_only1_consensus',
       'n_pathw_default_tot', 'n_pathw0_new', 'p_hyper0', 'n_pathw1_new', 'p_hyper1',
       'n_pathw_defa0_common', 'n_pathw_defa1_common', 'pathw_commons', 'pathw_only0',
       'pathw_only1', 'vals0', 'vals1', 'pathw_default_tot', 'pathw_new0', 'pathw_new1',
       'pathw_default0', 'pathw_default1']

cols = ['run', 'model0', 'model1', 'case', 'consensus', 'i_dfp', 'n0_pathways', 'n1_pathways',
       'n0_consensus', 'n1_consensus', 'n_tot_consensus', 'n_common_consensus',
       'n_only0_consensus', 'n_only1_consensus', 'perc0_consensus', 'perc1_consensus',
       'perc_commons', 'perc_common_consensus', 'perc_only0_consensus', 'perc_only1_consensus',]

dff[cols]

In [None]:
cols = ['run', 'model0', 'model1', 'case', 'consensus', 'i_dfp', 'n0_pathways', 'n1_pathways',
       'n0_consensus', 'n1_consensus', 'n_tot_consensus', 'n_common_consensus',
       'n_only0_consensus', 'n_only1_consensus', 'perc0_consensus', 'perc1_consensus',
       'perc_commons', 'perc_common_consensus', 'perc_only0_consensus', 'perc_only1_consensus',
       'vals0', 'vals1', 'pathw_default_tot', 'pathw_new0', 'pathw_new1',
       'pathw_default0', 'pathw_default1']

i=0
case=case_list[i]

dff[ (dff.case==case) & (dff.i_dfp.isin([0]) ) ][cols].T

In [None]:
i=1
case=case_list[i]

dff[ (dff.case==case) & (dff.i_dfp.isin([0]) ) ][cols].T

#### New pathways (discover)

In [None]:
case=case_list[0]

df2 = dff[ (dff.case==case) & (dff.i_dfp.isin([0,3]) ) ][cols]

mat_new = df2.iloc[0].pathw_new0

if isinstance(mat_new, str):
    mat_new = eval(mat_new)

print(len(mat_new))
print("\n".join(mat_new))

In [None]:
mat_new = df2.iloc[0].pathw_new1

if isinstance(mat_new, str):
    mat_new = eval(mat_new)

print(len(mat_new))
print("\n".join(mat_new))

In [None]:
mat0 = df2.iloc[0].vals0

if isinstance(mat0, str):
    mat0 = eval(mat0)

print(len(mat0))
print("\n".join(mat0))

In [None]:
mat1 = df2.iloc[0].vals1

if isinstance(mat1, str):
    mat1 = eval(mat1)

print(len(mat1))
print("\n".join(mat1))

In [None]:
enr_defa = df2.iloc[0].pathw_default_tot

if isinstance(enr_defa, str):
    enr_defa = eval(enr_defa)

print(len(enr_defa))
print("\n".join(enr_defa))

In [None]:
len(enr_defa), len(mat0), 'not all pathways are Yes for Gemini ->', len(mat_new)

### Inter-model statistics

In [None]:
verbose=True
force=False

# calc_stat_gemini_compare_2_models
df_stat = gem.calc_stat_inter_model_soft_consensus_venn(run_list=run_list, case_list=case_list, 
                                                         i_dfp_list=i_dfp_list, model0=model0, model1=model1, 
                                                         only_common_pathways=only_common_pathways, force=force, verbose=verbose)
cols = ['run', 'model0', 'model1', 'mean_all', 'std_all', 'mean_enr_yes_no', 'std_enr_yes_no', 'text']
cols = ['run', 'model0', 'model1', 'mean_all', 'std_all', 'mean_enr_yes_no', 'std_enr_yes_no']
df_stat[cols]

In [None]:
# df_stat.columns

In [None]:
i_dfp_list

### Inter-models Venn Diagrams

In [None]:
verbose=False

run='run01'
print_plot=True
save=True
dpi=300
figsize=(12,8)

want_run=True
%matplotlib inline

if want_run:
    dfpiv0, model_name0, dfpiv1, model_name1, dff, fname = \
           gem.run_inter_model_soft_consensus_venn(run=run, case_list=case_list, 
                                              i_dfp_list=i_dfp_list, only_common_pathways=only_common_pathways,
                                              model0=model0, model1=model1, 
                                              force=force, verbose=verbose)

    for case in case_list:
        for i_dfp in i_dfp_list:
            for filter in ['Yes', 'No', 'Doubt']:
                print(">>>", case, filter, len(dfpiv0), len(dfpiv1))
                fig, text, perc_commons, commons, n0, n1, only0, only1 = \
                          gem.venn_diagram_between_2models(run=run, filter=filter, case=case, i_dfp=i_dfp,
                                                           model_name0=model_name0, df0=dfpiv0,
                                                           model_name1=model_name1, df1=dfpiv1,
                                                           only_common_pathways=only_common_pathways,
                                                           print_plot=print_plot, title_font_size=12,
                                                           dpi=dpi, save=save, figsize=figsize, verbose=verbose)
        
                # print(text)

In [None]:
gem.root_figure

### Check the data

In [None]:
verbose=False
force=False

for run in run_list:
    print(">>>", run)
    _ = gem.summary_stat_dfpiv_all_models(run=run, case_list=case_list, chosen_model_list=chosen_model_list,
    									 force=force, verbose=verbose)

In [None]:
run='run01'
dfsumm = gem.summary_stat_dfpiv_all_models(run=run, case_list=case_list, chosen_model_list=chosen_model_list, verbose=verbose)
print(len(dfsumm))
dfsumm.head(8)

#### Barplot

In [None]:
run='run01'

fig = gem.barplot_yes_no_per_case_run(run=run, case_list=case_list,   
                                      chosen_model_list=chosen_model_list, i_dfp_list=i_dfp_list,
                                      width=800, height=600, fontsize=12, fontcolor='black',
                                      xaxis_title="cases-idfp-models", yaxis_title='n answers',
                                      minus_y_yes_no=-3, minus_y_i_dfp=-6, minus_y_case=-9,
                                      annot_fontfamily="Arial, monospace", annot_fontsize=12, 
                                      annot_fontcolor='black', savePlot=True, verbose=False)

if fig: fig.show()

In [None]:
run='run02'

fig = gem.barplot_yes_no_per_case_run(run=run, case_list=case_list,   
                                      chosen_model_list=chosen_model_list, i_dfp_list=i_dfp_list,
                                      width=800, height=600, fontsize=12, fontcolor='black',
                                      xaxis_title="cases-idfp-models", yaxis_title='n answers',
                                      minus_y_yes_no=-3, minus_y_i_dfp=-6, minus_y_case=-9,
                                      annot_fontfamily="Arial, monospace", annot_fontsize=12, 
                                      annot_fontcolor='black', savePlot=True, verbose=False)

if fig: fig.show()

### Comparing runs - summary gemini consensus - all models
  - for each run
  - for all models and i_dfp (0..3)
  - summarise total Yes, No, Doubts, unamimous, not_unanimous

In [None]:
verbose=True
force=False
save_files=force

text, dfcons = gem.calc_gemini_summary_consensus_statitics(run_list=run_list, chosen_model_list=chosen_model_list, 
                                                           case_list=case_list, save_files=save_files,
                                                           force=force, verbose=verbose)
print(text)

In [None]:
text, dfcons = gem.open_gemini_summary_consensus_statitics(chosen_model_list=chosen_model_list, verbose=True)
print(len(dfcons))

run='run01'
i=0
case=case_list[i]

print(">>>", case, '\n')
dfcons[ (dfcons.run==run) ] # & (dfcons.case==case)