# Notebook com analise voltada a sars-cov2

#### Arquivos que devem ser importados para fazer analises

In [None]:
import sqlite3
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import numpy as np

#### Conexão com o BD

In [None]:
con = sqlite3.connect("../data/propriedades.db")

#### Query para carregar os dados

In [None]:
dataframe = pd.read_sql("select * from dados", con)

#### Comando para mostrar o BD

In [None]:
dataframe

#### Biblioteca com descritores em python e outros modulos

In [None]:
from rdkit import Chem

In [None]:
smiles = '[H]/N=C/[C@H](C[C@@H]1CCNC1=O)NC(=O)[C@@H]2[C@@H]3[C@@H](C3(C)C)CN2C(=O)[C@H](C(C)(C)C)NC(=O)C(F)(F)F'

In [None]:
pf = Chem.MolFromSmiles(smiles)

In [None]:
pf

In [None]:
pf_finger = Chem.RDKFingerprint(pf)

#### Busca no BD da molecula 'smiles' a cima

In [None]:
dataframe.query("canonical_smiles == @smiles")

#### Importando modulos desenvolvidos no laboratorio

In [None]:
import sys
sys.path.append("../module/")
from calcula_tanimoto import *

#### Calculando indices de tanimoto

In [None]:
%%time
data_com_tanimoto = calcula_tanimoto(smiles, dataframe, 20)

#### Ordenando os pelo maior

In [None]:
data_com_tanimoto.sort_values(by='i_tanimoto',ascending=False)

In [5]:
con_2 = sqlite3.connect("ligantes_clauber.db")

In [6]:
ligantes = pd.read_sql("select * from ligantes_clauber", con_2)

In [7]:
ligantes

Unnamed: 0,Smiles,logp,tpsa,molwt,qed,hba,hbd,active_probability
0,CCCC1NC(=O)C(C(C)C)NC(=O)C(C(C)O)N(C)C(=O)C(C(...,1.18150,131.08,509.357720,0.407688,6,4,0.025474
1,CNC(=O)c1ccc(CCNC(=O)C2CCCN2C(=O)CNC(=O)C2CC2c...,1.10810,127.84,506.252920,0.383918,5,4,0.004862
2,N=C1CC(N2CC(NC(=O)C(Cc3ccc4ccccc4c3)NS(=O)(=O)...,1.39557,131.46,505.178375,0.343820,5,4,0.026046
3,CCNC(=O)CNC(=O)C1(C(F)C2(C)CC2(Cl)Cl)CC(O)CN1C...,0.84950,136.04,497.149569,0.275038,5,4,0.000392
4,CCSC1CC2CNC(=O)N(C(=O)C(Cc3ccccc3)NC(=O)C(NC(N...,2.49660,133.63,517.272276,0.378408,5,4,0.003619
...,...,...,...,...,...,...,...,...
2202,CCC(C)(NCCCNCc1cccc(F)c1)C(=O)NC1C(=O)N2C(C(=O...,1.28510,131.08,503.254397,0.264350,6,4,0.002152
2203,C=CCNC(=O)CC1NC2C(C(=O)N3CCCC3C(=O)NC(CC)CC)CC...,1.33210,110.85,501.331505,0.413059,5,3,0.013842
2204,CCc1c(Cl)ccc(CC(NC(=O)C(N)Cc2c[nH]cn2)C(=O)N2C...,1.48900,133.21,506.220845,0.408699,5,4,0.018219
2205,O=C1CCCN(C(=O)C(=O)NC(Cc2ccccc2)C(O)CNC(=O)c2c...,1.02200,127.84,506.252920,0.417204,5,4,0.005340


In [14]:
%%time
ligantes_com_tanimoto = calcula_tanimoto(smiles, ligantes, 20)

CPU times: user 7.55 s, sys: 495 ms, total: 8.04 s
Wall time: 8.07 s


In [11]:
ligantes_com_tanimoto

Unnamed: 0,Smiles,logp,tpsa,molwt,qed,hba,hbd,active_probability,i_tanimoto
0,C=C(C)C1C(=O)N2C(CCC1O)C(=O)C2C(=O)NC(Cc1c[nH]...,1.85300,131.60,508.268570,0.303294,5,4,0.002141,0.45
1,CCN1CCN(C(=O)C(Cc2c[nH]c3ccccc23)NC(=O)C(N)Cc2...,1.94010,131.76,505.268905,0.370028,6,4,0.004460,0.39
2,CC(NC(=O)CNCCCC(=O)O)C(=O)N1CC2c3ccccc3CC2C1C(...,1.41720,127.84,510.284220,0.352686,5,4,0.214548,0.51
3,CN=c1ccc(CC(NC(=O)C(N)CC(C)C)C(=O)NC2CC(=O)N(C...,1.29420,132.68,508.316189,0.364081,5,4,0.044124,0.42
4,CNC(=O)C(CC(=O)O)NC(=O)C(C)NC(=O)C1N(C(=O)Cc2c...,0.85890,144.91,512.183791,0.385961,5,4,0.005586,0.49
...,...,...,...,...,...,...,...,...,...
106,CNC(=O)C(CNC1CC2(C)C(C)CC1C1CCC3(C)C1C23)NC(=O...,1.86490,130.39,502.261377,0.452182,5,4,0.022848,0.39
107,NC(=O)CC(CCc1ccccc1)NC(=O)C1C(F)C(O)CN1C(=O)C(...,1.11260,138.75,504.193961,0.383711,5,4,0.002473,0.44
108,CC(C)CNC(=O)C(NC1CC2C(=O)n3ccc(c32)C1=O)C(C)NC...,1.12470,135.32,497.243833,0.388640,7,4,0.008248,0.39
109,CNC(=O)C(Cc1ccccc1)NC(=O)C1CCCN1C(=O)C(Cc1cccc...,0.91570,133.63,507.284555,0.379482,5,4,0.006088,0.47


In [15]:
ligantes_com_tanimoto.sort_values(by='i_tanimoto')

Unnamed: 0,Smiles,logp,tpsa,molwt,qed,hba,hbd,active_probability,i_tanimoto
4,CN(C)CCc1cccc(N=c2[nH]c(=Nc3ccc(Br)cc3)[nH]c(=...,1.10650,147.85,506.084805,0.396179,5,4,0.438547,0.20
19,CCN=c1[nH]c(=NCCNS(=O)(=O)c2cccc(C)c2)[nH]c(=N...,1.77402,130.62,505.089556,0.364303,5,4,0.001153,0.21
109,CNS(=O)(=O)CCN=c1[nH]c(=NCc2cccc(Cl)c2F)[nH]c(...,1.52480,130.62,501.096156,0.391028,5,4,0.002275,0.22
79,Cc1cc(CNC(=O)Nc2ccc(C(=O)NCC(F)(F)F)cc2)ccc1Cn...,1.33162,128.97,505.180564,0.361310,5,4,0.001158,0.22
25,CCN(CC)C(=O)c1ccc(N=c2[nH]c(=NCCNC(C)=O)[nH]c(...,1.66370,133.86,496.252207,0.414009,5,4,0.006457,0.23
...,...,...,...,...,...,...,...,...,...
15,CC1CNC(=O)C(C(C)C)CCNC(=O)C(C(C)C)NC(=O)C2CCCN...,1.79790,133.63,507.378455,0.406921,5,4,0.026337,0.63
51,C=CCNC(=O)C1C2CN(C(=O)C(N)C(CC(N)=O)Cc3ccc(F)c...,0.08850,147.62,499.259483,0.300990,5,4,0.041670,0.63
41,CC(C)CC1C(=O)N2CCCC2C(=O)NC(C(C)C)C(=O)NC1CC(C...,1.82830,127.84,508.362471,0.377822,5,4,0.005590,0.63
44,CCC(C)C(NC(=O)C(C)NC(=O)C1CC2CC2(N)C1)C(=O)N1C...,1.58930,133.63,503.347155,0.379166,5,4,0.007446,0.65
