## Analysis of the Distribution of the Correct Hits ##

In [None]:
import os
import pandas as pd
import numpy as np
import math

%matplotlib inline
import matplotlib.pyplot as plt
from plotly import __version__
print(__version__)

import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import n_colors
from plotly.subplots import make_subplots


from plotly.offline import download_plotlyjs, init_notebook_mode,plot,iplot
init_notebook_mode(connected=True)  #connect with Javascript

import numpy as np
import scipy.special

list_protocols="""
'mixed_ali-delim-delim',
'single_pep-delim-delim',
'unpaired_ali-delim-delim',
'mixed_ali-fl-fl',
'mixed_ali-delim-fl',
'unpaired_ali-delim-fl',
'mixed_ali-delim-200',
'mixed_ali-delim-100',
'single_pep-delim-100',
'single_pep-delim-200'
"""



### Recover the environment variable in the config file ###

In [None]:
import configparser

#script_path = os.path.abspath(__file__)
script_dir = "../" #os.path.dirname(os.path.dirname(script_path))

config = configparser.ConfigParser()
config.read(os.path.join(f'{script_dir}', 'config.ini'))

WORKING_DIR = os.path.abspath("../..") #config['DEFAULT']['WORKING_DIR']""
DATA_DIR = os.path.abspath(os.path.join(WORKING_DIR, 'data', ))


CUTMODELS_DIR = os.path.abspath(os.path.join(DATA_DIR, config['DEFAULT']['CUTMODELS_DIR']))
CAPRIEVAL_DIR = os.path.abspath(os.path.join(DATA_DIR, config['DEFAULT']['CAPRIEVAL_DIR']))
REFERENCE_STRUCT_DIR = os.path.abspath(os.path.join(DATA_DIR, config['DEFAULT']['REFERENCE_DIR']))

n_iterations = int(config['DEFAULT']['N_ITERATIONS'])

RESULTS_DIR = os.path.abspath(os.path.join(DATA_DIR, config['DEFAULT']['RESULTS_DIR']))
fglobaloutput_path = os.path.abspath(os.path.join(RESULTS_DIR, config['DEFAULT']['OUTPUT_GLOBAL']))

In [None]:
path = RESULTS_DIR
path_figures = os.path.join(path, 'Figures')
if not os.path.isdir(path_figures):
    os.system(f"mkdir {path_figures}")
DO_DUMP_FIGURES = False

Ncol = 23
path2file = fglobaloutput_path

list_header = []
with open(path2file) as fin:
    f = fin.readlines()[:Ncol]
    for l in f:
        s = l.split()[-1]
        colname = s.split(':')[1]
        list_header.append(colname)

df = pd.read_table(path2file, sep="\t", header = None, names = list_header, skiprows=Ncol, low_memory=False)


df

In [None]:
#
# IF NEEDED : Which PDB to exclude :
#  add to list_exclude elements as <index>_<PDB> 
#

list_exclude = []

In [None]:

df_with_index = df.set_index("index_pdb")
df = df_with_index.drop(list_exclude)
df = df.reset_index()


In [None]:
list_entries_sorted = [y[1] for y in sorted([[int(x.split('_')[0]), x] for x in list(set(df['index_pdb']))])]
print(list_entries_sorted)
print(len(list_entries_sorted))
list_conditions = [x for x in list(set(df['protocol']))]
print(list_conditions)

In [None]:

dfsampledelim = df.loc[(df['protocol']=='mixed_ali-delim-delim') & (df['AF2Rank']==1) & (df['version']==1)]
dfsampleFL = df.loc[(df['protocol']=='mixed_ali-fl-fl') & (df['AF2Rank']==1) & (df['version']==1)]
dfsampledelFL = df.loc[(df['protocol']=='mixed_ali-delim-fl') & (df['AF2Rank']==1) & (df['version']==1)]
#print(dfsampledelim['SizeModel'])
fig = go.Figure()

fig.add_trace(go.Box(x=dfsampleFL['SizeModel'], line=dict(color='black'),))
fig.add_trace(go.Box(x=dfsampledelFL['SizeModel'], line=dict(color='black'),))
fig.add_trace(go.Box(x=dfsampledelim['SizeModel'], line=dict(color='black')))

#fig.add_trace(go.Histogram(x=x1))

# Overlay both histograms
fig.update_layout(barmode='overlay', template='simple_white')
fig.update_layout(autosize=False,
    width=700,
    height=300,
    font=dict(
    family="Arial",
    size=16,
    ),
    showlegend=False,
    yaxis=dict(
        ticktext=["Full-Length Protein Partners",
                  "Delimited Receptor / Full-Length Partner",
                  "Delimited Receptor / Peptide Ligand"],
        tickvals=[0, 1, 2],
        tickmode="array",
        tickfont=dict(size=16),
    ),
    xaxis=dict(
        title='Inputs Size',
        #ticktext=["Full-Length Proteins","Domain-Peptide"],
        #tickvals=[0, 1],
        #tickmode="array",
        tickfont=dict(size=16),
        titlefont=dict(size=16),
    ),
                 )

# Reduce opacity to see both histograms
fig.update_traces(opacity=1.0)
if DO_DUMP_FIGURES:
    fig.write_image(os.path.join(path,f"AnalysesForFigures", f"Figures", f"Dataset_properties{extension}_LengthBarplot.svg"))

fig.show()