# Description
In order to represent the gene alignments resulted of applying [Smith-Watermann]('../k_mers_rel_all.ipynb'), *dash-bio* package has a solution for it. Unfortunately, this function is created for nucleotides or aminoacid alignments read from *FASTA* or *Clustal* format files.  

In this notebook, we try to adapt or find possible solutions for using this function and representation.

*Reference: https://dash.plotly.com/dash-bio/alignmentchart* 
*Code repo: https://github.com/plotly/dash-bio/blob/master/dash_bio/AlignmentChart.py*

In [1]:
import dash
import dash_bio as dashbio
from dash import html
import urllib.request as urlreq
from dash.dependencies import Input, Output
import pandas as pd

import sys
# setting path
sys.path.append('../') 

# importing
from swco import preprocessing

In [2]:
human = pd.read_excel('C:/Users/bertr/LRZ Sync+Share/Thesis/Data/Raw/Tables_Filtered_IK.xlsx', 'Human')
human['Specie'] = 'Human'


In [3]:
human = preprocessing(human)

In [4]:
human.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 24726 entries, 1 to 25698
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   #Replicon Name      24726 non-null  object
 1   Replicon Accession  24726 non-null  object
 2   Start               24726 non-null  int64 
 3   Stop                24726 non-null  int64 
 4   Strand              24726 non-null  object
 5   GeneID              24726 non-null  int64 
 6   Locus               24726 non-null  object
 7   Protein product     24726 non-null  object
 8   Length              24726 non-null  int64 
 9   Protein name        24726 non-null  object
 10  Specie              24726 non-null  object
 11  Gene_non_or         24726 non-null  object
 12  Gene                24726 non-null  object
 13  Specie_Scaffold     24726 non-null  object
dtypes: int64(4), object(10)
memory usage: 2.8+ MB


In [5]:
human['Gene_non_or'].astype(str)

1         GPX
2         GPX
3        ZBED
4        TRIM
5         ZNF
         ... 
25694      ND
25695      ND
25696      ND
25697      ND
25698    CYTB
Name: Gene_non_or, Length: 24726, dtype: object

In [6]:
def dataframe_series_to_string(df, series_name):
    return df[series_name].astype(str).str.cat(sep='')

In [7]:
input_data = '>Sequence_1\n' + dataframe_series_to_string(human.loc[human['Replicon Accession'].str.contains('NC_000001.11')], 'Gene_non_or')

In [8]:
'''
alignment_data_no_quotes = [
    [BTG;FMOD;PRELP;OPTC;ATP;-;-;ZC;SNRPE;SOX;ETNK;-;REN;-;GOLT;PLEKHA;PPP;PIK;MDM;LRRN;NFASC;CNTN;-;CNTN;TMEM;RBBP;DSTYK;TMCC;NUAK;KLHDC;LEMD;CDK;MFSD;ELK;SLC;NUCKS;RAB;SLC;PM;SLC;-;-;RAB;CTSE;-;-;AVPR;FAM],
    [BTG;FMOD;PRELP;OPTC;ATP;LAX;ZBED;ZC;SNRPE;SOX;ETNK;ETNK;REN;KISS;GOLT;PLEKHA;PPP;PIK;MDM;LRRN;NFASC;NFASC;NFASC;CNTN;TMEM;RBBP;DSTYK;TMCC;NUAK;KLHDC;LEMD;CDK;MFSD;ELK;SLC;NUCKS;RAB;SLC;PM;SLC;SLC;SLC;RAB;CTSE;CTSE;C;AVPR;FAM]
]
'''


'\nalignment_data_no_quotes = [\n    [BTG;FMOD;PRELP;OPTC;ATP;-;-;ZC;SNRPE;SOX;ETNK;-;REN;-;GOLT;PLEKHA;PPP;PIK;MDM;LRRN;NFASC;CNTN;-;CNTN;TMEM;RBBP;DSTYK;TMCC;NUAK;KLHDC;LEMD;CDK;MFSD;ELK;SLC;NUCKS;RAB;SLC;PM;SLC;-;-;RAB;CTSE;-;-;AVPR;FAM],\n    [BTG;FMOD;PRELP;OPTC;ATP;LAX;ZBED;ZC;SNRPE;SOX;ETNK;ETNK;REN;KISS;GOLT;PLEKHA;PPP;PIK;MDM;LRRN;NFASC;NFASC;NFASC;CNTN;TMEM;RBBP;DSTYK;TMCC;NUAK;KLHDC;LEMD;CDK;MFSD;ELK;SLC;NUCKS;RAB;SLC;PM;SLC;SLC;SLC;RAB;CTSE;CTSE;C;AVPR;FAM]\n]\n'

In [9]:
'''
alignment_data = [    [        "BTG", "FMOD", "PRELP", "OPTC", "ATP", "-", "-", "ZC", "SNRPE", "SOX",        "ETNK", "-", "REN", "-", "GOLT", "PLEKHA", "PPP", "PIK", "MDM", "LRRN",        "NFASC", "CNTN", "-", "CNTN", "TMEM", "RBBP", "DSTYK", "TMCC", "NUAK",        "KLHDC", "LEMD", "CDK", "MFSD", "ELK", "SLC", "NUCKS", "RAB", "SLC", "PM",        "SLC", "-", "-", "RAB", "CTSE", "-", "-", "AVPR", "FAM"    ],
    [        "BTG", "FMOD", "PRELP", "OPTC", "ATP", "LAX", "ZBED", "ZC", "SNRPE",        "SOX", "ETNK", "ETNK", "REN", "KISS", "GOLT", "PLEKHA", "PPP", "PIK",        "MDM", "LRRN", "NFASC", "NFASC", "NFASC", "CNTN", "TMEM", "RBBP", "DSTYK",        "TMCC", "NUAK", "KLHDC", "LEMD", "CDK", "MFSD", "ELK", "SLC", "NUCKS",        "RAB", "SLC", "PM", "SLC", "SLC", "SLC", "RAB", "CTSE", "CTSE", "C",        "AVPR", "FAM"]]
'''

'\nalignment_data = [    [        "BTG", "FMOD", "PRELP", "OPTC", "ATP", "-", "-", "ZC", "SNRPE", "SOX",        "ETNK", "-", "REN", "-", "GOLT", "PLEKHA", "PPP", "PIK", "MDM", "LRRN",        "NFASC", "CNTN", "-", "CNTN", "TMEM", "RBBP", "DSTYK", "TMCC", "NUAK",        "KLHDC", "LEMD", "CDK", "MFSD", "ELK", "SLC", "NUCKS", "RAB", "SLC", "PM",        "SLC", "-", "-", "RAB", "CTSE", "-", "-", "AVPR", "FAM"    ],\n    [        "BTG", "FMOD", "PRELP", "OPTC", "ATP", "LAX", "ZBED", "ZC", "SNRPE",        "SOX", "ETNK", "ETNK", "REN", "KISS", "GOLT", "PLEKHA", "PPP", "PIK",        "MDM", "LRRN", "NFASC", "NFASC", "NFASC", "CNTN", "TMEM", "RBBP", "DSTYK",        "TMCC", "NUAK", "KLHDC", "LEMD", "CDK", "MFSD", "ELK", "SLC", "NUCKS",        "RAB", "SLC", "PM", "SLC", "SLC", "SLC", "RAB", "CTSE", "CTSE", "C",        "AVPR", "FAM"]]\n'

In [10]:
'''
import pandas as pd
data = pd.read_csv("../Data/Raw/alignment_test.csv")
'''

'\nimport pandas as pd\ndata = pd.read_csv("../Data/Raw/alignment_test.csv")\n'

In [11]:
'''
data=[{'id': 'Sequence 1', 'sequence': 'BTGFMODPRELPOPTCATP--ZCSNRPESOXETNK-REN-GOLTPLEKHAPPPPIKMDMLRRNNFASCCNTN-CNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLC--RABCTSE--AVPRFAM'},
      {'id': 'Sequence 2', 'sequence': 'BTGFMODPRELPOPTCATPLAXZBEDZCSNRPESOXETNKETNKRENKISSGOLTPLEKHAPPPPIKMDMLRRNNFASCNFASCNFASCCNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLCSLCSLCRABCTSECTSECAVPRFAM'}]
'''

"\ndata=[{'id': 'Sequence 1', 'sequence': 'BTGFMODPRELPOPTCATP--ZCSNRPESOXETNK-REN-GOLTPLEKHAPPPPIKMDMLRRNNFASCCNTN-CNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLC--RABCTSE--AVPRFAM'},\n      {'id': 'Sequence 2', 'sequence': 'BTGFMODPRELPOPTCATPLAXZBEDZCSNRPESOXETNKETNKRENKISSGOLTPLEKHAPPPPIKMDMLRRNNFASCNFASCNFASCCNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLCSLCSLCRABCTSECTSECAVPRFAM'}]\n"

In [12]:
# The file has to be closed!!

data = open("C:/Users/bertr/LRZ Sync+Share/Thesis/Data/Raw/test.fasta", "r")
record = data.read()
data.close()

In [13]:
record

'>Sequence_1\nBTGFMODPRELPOPTCATP-------ZCSNRPESOXETNK----REN----GOLTPLEKHAPPPPIKMDMLRRNNFASCCNTN-CNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLC--RABCTSE--AVPRFAM\n>Sequence_2\nBTGFMODPRELPOPTCATPLAXZBEDZCSNRPESOXETNKETNKRENKISSGOLTPLEKHAPPPPIKMDMLRRNNFASCNFASCNFASCCNTNTMEMRBBPDSTYKTMCCNUAKKLHDCLEMDCDKMFSDELKSLCNUCKSRABSLCPMSLCSLCSLCRABCTSECTSECAVPRFAM'

In [14]:
input_data

'>Sequence_1\nORORORSAMDNOCKLHLPLEKHNPLEKHNPERMHESISGAGRNRNFRNFCTTLLTTLLTNFRSFTNFRSFSDFBFAMUBESCNNACAPPUSLCPSFCPTPTASDVLMXRAAURKAIPCCNLCCNLMRPLANKRDTMEMVWAATADATADATADATADATADTMEMSSUCMIBMIBMMPCDKSLCCDKSLCNADKNADKGNBCALMLTMEMCFAPCFAPGABRDPRKCZPRKCZPRKCZPRKCZFAAPFAAPFAAPFAAPSKIMORNRERPEXPLCHPLCHPLCHPLCHPANKPANKHESTNFRSFTNFRSFTNFRSFTNFRSFFAMMMELTTCACTRTPRDMPRDMARHGEFMEGFMEGFTPRGWRAPTPTPCCDCCCDCSMIMLRRCCEPCEPDFFBDFFBCAJAPNPHPKCNABCHDRPLRNFICMTHESGPRACOTHESESPNTNFRSFPLEKHGPLEKHGNOLNOLTASTASZBTBZBTBKLHLPHFTHAPDNAJCCAMTACAMTAVAMPPERPERUTSUTSTNFRSFPARKERRFISLCREREREREENOCACASLCSLCGPRHSPSBSLCTMEMTMEMPIKCLSTNCTNNBIPLZICNMNATRBPUBEUBEKIFPGDAPITDAPITDDFFAPEXCASZCTARDBPMASPSRMEXOSCMTORANGPTLUBIADDISPFBXOFBXOFBXOMADDRAXINAGTRAPCMTHFRMTHFRCLCNNPPANPPBKIAAPLODMFNMIIPTNFRSFTNFRSFTNFRSFVPSDHRSDHRSAADACLAADACLCPRAMEFPRAMEFPRAMEFHNRNPCLPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFHNRNPCLPRAMEFHNRNPCLPRAMEFHNRNPCLPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFPRAMEFLRRCPDPNPRDMPRDMPRDMPRDMPRDMKAZNKAZNK

In [15]:
app = dash.Dash(__name__)

app.layout = html.Div([
    dashbio.AlignmentChart(
        id='my-default-alignment-viewer',
        data=record,
        height=900,
        tilewidth=30,
    ),
    html.Div(id='default-alignment-viewer-output')
])

@app.callback(
    Output('default-alignment-viewer-output', 'children'),
    Input('my-default-alignment-viewer', 'eventDatum')
)
def update_output(value):
    if value is None:
        return 'No data.'
    return str(value)

if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: on


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
%tb