# Results report

In [None]:
#Import all the needed python modules:

# Data Wrangling
import numpy as np
import pandas as pd

# Visualization
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import warnings
from plotly.subplots import make_subplots
from PIL import Image

# File string matching
import fnmatch

pio.renderers.default='notebook'

## Summary

This is a report summarizing your results.

## Coverage

Here you can see which parts of your query sequence we found covered by experimentally resolved structures (in <span style="color:blue">blue</span>) and covered by models (in <span style="color:orange">orange</span>). If you have introduced your own structures or models, wou yill find them here too.

In [None]:
import pandas as pd
import os
from pathlib import Path, PurePosixPath



i = 0
df_list = []
structure_list = []
for child in Path('./REPORT/COVERAGE/').iterdir():
    if child.is_file() and "composite" not in str(child):
        i += 1
        df = pd.read_csv(child)
        df_list.append(df)
        structure_list.append(os.path.basename(child))
            
fig = make_subplots(rows=i, cols=1, shared_xaxes=True)

i = 1
for df in df_list:
    fig.append_trace(go.Scatter(
        x=df[df.columns[0]], # ResID
        y=df[df.columns[1]],
        fill='tozeroy', 
        name=structure_list[i-1]
    ), row=i, col=1)
    i +=1

fig.update_layout(height=400, width=1000, title_text="Coverage")
fig.update_yaxes(showgrid=False, range=[0,1], nticks=2)



    
    
fig.show()

## Hinges and flexibility

Flexibility is an important feature of proteins, since they need to move to perform their function and interact with their substrates. In the following section, we provide you with two types of flexibility prediction: the Dynamic Flexibility Index and Hinge Prediction.

*Dynamic Flexibility Index*  
This is per-residue index indicating the contribution of each residue to the overall flexibility of the protein. It uses a method based in an Elastic Network Model, which is a more lightweight (but less precise, obviously) alternative to Molecular Dynamics. for ore info, [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3673471/) is the original paper.

*Hinge Prediction*  
Hinges are the regions of the protein that allow it to move and change conformations. Using [this tool](https://academic.oup.com/bioinformaticsadvances/advance-article/doi/10.1093/bioadv/vbac007/6525212?login=true) the predicted hinge regions are showed on top of the DFI plot, with the significative ones colored in green, and  the non-significative ones in red.

In [None]:
# A couple of function definitions to make things easier later
def read_hng_files(hng_dir):
    """
    Given a folder containing PACKMAN .hng files, return a dictionary with the format:
    {filename : start:end residues}
    """
    i = 0
    hinges_dict = {}
    for child in Path(hng_dir).iterdir():
        if child.is_file() and fnmatch.fnmatch(child, "*.hng"):
            i += 1
            filename = os.path.basename(child)
            hinges_domains_df = pd.read_csv(child, sep="\t", names=["Chain", "Classification", "Start:End"])        
            hinge_df = hinges_domains_df[hinges_domains_df['Classification'].str.match('^H.*')== True]
            hinges_dict.update({filename[0:6] : hinge_df["Start:End"].tolist()})
    return hinges_dict

def read_DFI_csvs(dfi_csv_dir):
    """
    Given a folder containing csv files containing Dynamic Flexibility Index info, 
    return a dictionary with the format:
    {filename : 'start:end' residues}
    
    Format of the csv:
        header: Chain,ResID,pctdfi
    Chain example:3a58_A 
    """
    i = 0
    df_dict = {}
    for child in Path(dfi_csv_dir).iterdir():
        if child.is_file():
            i += 1
            df = pd.read_csv(child)
            df_dict.update({os.path.basename(child)[0:6] : df})
    return df_dict

def read_compsite_files(composite_dir):
    """
    Given a folder containing composite .csv files, return a dictionary with the format:
    {psition : coverage (0/1)}
    """
    i = 0
    comp_dict = {}
    for child in Path(composite_dir).iterdir():
        if child.is_file() and fnmatch.fnmatch(child, "*composite_coverage.csv"):
            i += 1
            filename = os.path.basename(child)
            df = pd.read_csv(child)
            comp_dict.update({os.path.basename(child)[0:6] : df})
    return comp_dict

In [None]:
# Obtain  DFI profiles from CSV files
dfi_dict = read_DFI_csvs('./REPORT/DFI/')
        
        
# Obtain hinges from .hng files
hng_dict = read_hng_files('./HINGES/')
            

fig = make_subplots(rows=len(dfi_dict.keys()), cols=1, shared_xaxes=True)

i = 1
for file in dfi_dict.keys():
    df = dfi_dict[file]
    fig.append_trace(go.Scatter(
        x=df["ResID"],
        y=df["pctdfi"],
        name=structure_list[i-1]
    ), row=i, col=1)
    j =1
    for hinge in hng_dict[file]:
        fig.add_vrect(
            x0=hinge.split(':')[0], 
            x1=hinge.split(':')[1],
            annotation_text=f"H{j}", annotation_position="top left",
            fillcolor="#52BE80", opacity=0.2,
            layer="below", line_width=0, 
            row=i, col=1)
        j += 1
    i +=1
    fig.update_layout(height=600, width=1200, title_text="DFI profiles + Predicted hinges", 
                      margin_pad=0, barmode="group")
    fig.update_yaxes(showgrid=False, range=[0,1], nticks=2)



    
fig.show()

## Composite

The program automatically generates a composite of the structures found, and an IMP topology file correspoding to it. In the figure below, the selected structures are represented in the positions they provide structure for.

In [None]:
composite_dir = "./REPORT/COVERAGE/"
comp_dict = read_compsite_files(composite_dir)

In [None]:
# Clear data and layout
fig.data = []
fig.layout = {}

# Read the files
composite_dir = "./REPORT/COVERAGE/"
comp_dict = read_compsite_files(composite_dir)

# Plot

fig = make_subplots(rows=len(comp_dict.keys())+1, cols=1, shared_xaxes=True)
for file in comp_dict.keys():
    df = comp_dict[file]
    print(len(df.columns))
    fig = make_subplots(rows=len(df.columns)-1, cols=1, shared_xaxes=True)
    i = 0
    for column in df.columns:
        if i >= 1:
            fig.append_trace(go.Scatter(
                x=df.iloc[:,0],
                y=df[df.columns[i]],
                fill='tozeroy',
                name=str(column)
            ), row=i, col=1)
        i +=1
    fig.update_layout(height=300, width=1200, title_text="Composite coverage", 
                          margin_pad=0, barmode="overlay")
    fig.update_yaxes(showgrid=False, range=[0,1], nticks=2)
fig.show()


In [None]:
# Collect data for the interactive Dashboard


    
all_filenames = []
for df in df_list:
    i = 0
    for column in df.columns:
        if i >= 1:
            all_filenames.append(str(column)) 
        i+=1
        
print(all_filenames)


In [None]:
## DASH

from jupyter_dash import JupyterDash

import dash
from dash import dcc
from dash import html

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
app.css.config.serve_locally = True
app.scripts.config.serve_locally = True


app.layout = html.Div([
    html.Div(children=[
        html.Label('Multi-Select Custom fragments'),
        dcc.Dropdown(all_filenames,
                     ['Montréal', 'San Francisco'],
                     multi=True),

        html.Br(),
        html.Label('Radio Items'),
        dcc.RadioItems(['New York City', 'Montréal', 'San Francisco'], 'Montréal'),
    ], style={'padding': 10, 'flex': 1}),

    html.Div(children=[
        html.Label('Checkboxes'),
        dcc.Checklist(all_filenames,),

        html.Br(),
        html.Label('Text Input'),
        dcc.Input(value='MTL', type='text'),

        html.Br(),
        html.Label('Slider'),
        dcc.Slider(
            min=0,
            max=9,
            marks={i: f'Label {i}' if i == 1 else str(i) for i in range(1, 6)},
            value=5,
        ),
    ], style={'padding': 10, 'flex': 1})
], style={'display': 'flex', 'flex-direction': 'row'})
app.run_server(mode="inline")
#app.run_server(mode="jupyterlab")

In [None]:
#!jupyter nbconvert --to html --no-input --no-prompt report_template.ipynb

In [None]:
#fuser -k 8050/tcp