# Results report

In [None]:
#Import all the needed python modules:

# Data Wrangling
import numpy as np
import pandas as pd

# Visualization
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import warnings
from plotly.subplots import make_subplots
from PIL import Image


pio.renderers.default='notebook'

## Summary

This is a report summarizing your results.

## Coverage

Here you can see which parts of your query sequence we found covered by experimentally resolved structures (in <span style="color:blue">blue</span>) and covered by models (in <span style="color:orange">orange</span>). If you have introduced your own structures or models, wou yill find them here too.

In [None]:
import pandas as pd
import os
from pathlib import Path, PurePosixPath



i = 0
df_list = []
structure_list = []
for child in Path('./REPORT/COVERAGE/').iterdir():
    if child.is_file():
        i += 1
        df = pd.read_csv(child)
        df_list.append(df)
        structure_list.append(os.path.basename(child))
            

fig = make_subplots(rows=i, cols=1, shared_xaxes=True)
i = 1
for df in df_list:
    fig.append_trace(go.Scatter(
        x=df["ResID"],
        y=df["Structure"],
        fill='tozeroy', 
        name=structure_list[i-1]
    ), row=i, col=1)
    i +=1

fig.update_layout(height=400, width=1000, title_text="Stacked Subplots")


    
    
fig.show()

## Hinges and flexibility

Flexibility is an important feature of proteins, since they need to move to perform their function and interact with their substrates. In the following section, we provide you with two types of flexibility prediction: the Dynamic Flexibility Index and Hinge Prediction.

*Dynamic Flexibility Index*  
This is per-residue index indicating the contribution of each residue to the overall flexibility of the protein. It uses a method based in an Elastic Network Model, which is a more lightweight (but less precise, obviously) alternative to Molecular Dynamics. for ore info, [here](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3673471/) is the original paper.

*Hinge Prediction*  
Hinges are the regions of the protein that allow it to move and change conformations. Using [this tool](https://academic.oup.com/bioinformaticsadvances/advance-article/doi/10.1093/bioadv/vbac007/6525212?login=true) the predicted hinge regions are showed on top of the DFI plot, with the significative ones colored in green, and  the non-significative ones in red.

In [None]:
# A couple of function definitions to make things easier later
def read_hng_files(hng_dir):
    """
    Given a folder containing PACKMAN .hng files, return a dictionary with the format:
    {filename : start:end residues}
    """
    i = 0
    hinges_dict = {}
    for child in Path(hng_dir).iterdir():
        if child.is_file():
            i += 1
            filename = os.path.basename(child)
            hinges_domains_df = pd.read_csv(child, sep="\t", names=["Chain", "Classification", "Start:End"])        
            hinge_df = hinges_domains_df[hinges_domains_df['Classification'].str.match('^H.*')== True]
            hinges_dict.update({filename[0:6] : hinge_df["Start:End"].tolist()})
    return hinges_dict

def read_DFI_csvs(dfi_csv_dir):
    """
    Given a folder containing csv files containing Dynamic Flexibility Index info, 
    return a dictionary with the format:
    {filename : start:end residues}
    
    Format of the csv:
        header: Chain,ResID,pctdfi
    Chain example:3a58_A 
    """
    i = 0
    df_dict = {}
    for child in Path(dfi_csv_dir).iterdir():
        if child.is_file():
            i += 1
            df = pd.read_csv(child)
            df_dict.update({os.path.basename(child)[0:6] : df})
    return df_dict


In [None]:
# Obtain  DFI profiles from CSV files
dfi_dict = read_DFI_csvs('./REPORT/DFI/')
        
        
# Obtain hinges from .hng files
hng_dict = read_hng_files('./HINGES/')
            

fig = make_subplots(rows=len(dfi_dict.keys()), cols=1, shared_xaxes=True)

i = 1
for file in dfi_dict.keys():
    df = dfi_dict[file]
    fig.append_trace(go.Scatter(
        x=df["ResID"],
        y=df["pctdfi"],
        name=structure_list[i-1]
    ), row=i, col=1)
    j =1
    for hinge in hng_dict[file]:
        fig.add_vrect(
            x0=hinge.split(':')[0], 
            x1=hinge.split(':')[1],
            annotation_text=f"H{j}", annotation_position="top left",
            fillcolor="#52BE80", opacity=0.2,
            layer="below", line_width=0, 
            row=i, col=1)
        j += 1
    i +=1
    fig.update_layout(height=600, width=1200, title_text="Stacked Subplots", margin_pad=0, barmode="group")


    
fig.show()

In [None]:
%%capture
# export this notebook as HTML
!jupyter nbconvert --to html --no-input --no-prompt report.ipynb

%reset -f