In [6]:
import fpkit.similarity as fps
import fpkit.filters as filters

import pandas as pd

# FPKit (Fingerprint Kit) usage example
FPKit currently provides two tools:
- A set of 51 similarity measures (in _fpkit.similarity_), collected in the following work: [Todeschini et al, _J Chem Inf Model_ 52(11):2884-2901, **2012**](https://pubs.acs.org/doi/10.1021/ci300261r). This is entirely general and can be applied to any two python lists, numpy arrays and pandas Series containing 0s and 1s, as well as molecular fingerprint types implemented in the *cinfony* cheminformatics package, provided they are the same length (and same type).
- Three filtering options for interaction fingerprints (in _fpkit.filters_). This currently requires the use of pandas DataFrames, and in some cases, Schrödinger-style column headers, as documented in _filters.py_.

# Citation
If you use FPKit in your research, please cite our work: [Rácz et al. _J. Cheminformatics_ 10:48, **2018**](https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0302-y)

In [14]:
df=pd.read_csv('./IFP_example.csv',index_col=0)
df

Unnamed: 0,A85_contact,A85_backbone,A85_sidechain,A85_polar,A85_hydrophobic,A85_acceptor,A85_donor,A85_aromatic,A85_charged,A86_contact,...,A528_charged,A529_contact,A529_backbone,A529_sidechain,A529_polar,A529_hydrophobic,A529_acceptor,A529_donor,A529_aromatic,A529_charged
2H8H_ligand,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03939514,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815499,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815489,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03939513,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815240,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815491,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815514,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815490,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815501,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


After importing the necessary modules and reading the plain csv file containing the IFPs into a DataFrame, let's apply some filtering rules! First we will filter out the _Any contact_ bit definition (simply labeled _contact_ in the headers).

In [15]:
df=filters.excludeBits(df,['contact'])
df

Unnamed: 0,A85_backbone,A85_sidechain,A85_polar,A85_hydrophobic,A85_acceptor,A85_donor,A85_aromatic,A85_charged,A86_backbone,A86_sidechain,...,A528_aromatic,A528_charged,A529_backbone,A529_sidechain,A529_polar,A529_hydrophobic,A529_acceptor,A529_donor,A529_aromatic,A529_charged
2H8H_ligand,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03939514,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815499,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815489,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03939513,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815240,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815491,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815514,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815490,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
ZINC03815501,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Next, we will get rid of the large blocks of zeros by applying the interaction-based filtering rule.

In [16]:
df=filters.filterInteractions(df)
df

Unnamed: 0,A273_backbone,A273_sidechain,A273_hydrophobic,A281_sidechain,A281_hydrophobic,A293_backbone,A293_sidechain,A293_hydrophobic,A294_backbone,A295_backbone,...,A404_backbone,A404_sidechain,A404_polar,A404_charged,A405_backbone,A405_sidechain,A405_hydrophobic,A405_aromatic,A407_sidechain,A407_hydrophobic
2H8H_ligand,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03939514,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,1,1,1,0,0
ZINC03815499,0,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03815489,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03939513,1,1,1,1,1,0,1,1,0,0,...,1,1,1,1,1,0,0,0,1,1
ZINC03815240,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03815491,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03815514,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03815490,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0
ZINC03815501,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,0,0,0,0,0,0


We can calculate the similarity of two fingerprints with any of the 51 metrics.
- The abbreviations and definitions of the similarity metrics are published in Table 1 of [Todeschini et al, _J Chem Inf Model_ 52(11):2884-2901, **2012**](https://pubs.acs.org/doi/10.1021/ci300261r) and as a supplementary table in our recent, open access work: [Rácz et al, _Metabolomics_ 14:29, **2018**](https://link.springer.com/article/10.1007/s11306-018-1327-y)

In [19]:
a=fps.similarity(df.iloc[0],df.iloc[1],metric='RG',scale=True)
b=fps.similarity(df.loc['2H8H_ligand'],df.loc['ZINC03815240'],metric='SM',scale=False)

[a,b]

[0.5571428571428572, 0.7741935483870968]

We can also iterate over all similarity measures, these are stored in the lists *metrics\_redundant* (51 measures from Todeschini et al, _JCIM_, 2012) and _metrics_ (44 measures, the rest are removed as they give identical results to one of the 44).

In [25]:
sim=[]
for metric in fps.metrics:
    sim.append(fps.similarity(df.iloc[0],df.iloc[1],metric=metric,scale=True))

Or we can iterate over the similarity measures, as well as the query molecules to provide a summarizing table with the similarities of each molecule to the reference ligand, according to each metric.

In [32]:
results=pd.DataFrame(index=df.index[1:])

ref=df.iloc[0].values
for metric in fps.metrics:
    dm=[]
    for query in df.index[1:]:
        dm.append(fps.similarity(ref,df.loc[query].values,metric=metric,scale=True))
    results[metric]=pd.Series(dm, index=df.index[1:], name=metric)
    
results

Unnamed: 0,SM,RT,JT,Gle,RR,For,Sim,BB,DK,BUB,...,Pe1,Pe2,MP,HL,CT1,CT2,CT3,CT4,CT5,AC
ZINC03939514,0.612903,0.44186,0.555556,0.714286,0.483871,0.514874,0.789474,0.652174,0.717547,0.654636,...,0.561404,0.576087,0.567961,0.244624,0.884249,0.223082,0.828838,0.856926,0.545493,0.572502
ZINC03815499,0.741935,0.589744,0.659574,0.794872,0.5,0.632237,0.815789,0.775,0.795133,0.766641,...,0.720395,0.728409,0.72433,0.46669,0.929284,0.316167,0.836501,0.895261,0.644511,0.66077
ZINC03815489,0.758065,0.61039,0.680851,0.810127,0.516129,0.657253,0.842105,0.780488,0.810711,0.782321,...,0.733553,0.747387,0.740271,0.485844,0.934365,0.330799,0.843928,0.90321,0.657849,0.672628
ZINC03939513,0.677419,0.512195,0.607843,0.756098,0.5,0.574761,0.815789,0.704545,0.75813,0.71209,...,0.637061,0.657828,0.646714,0.345657,0.907815,0.265165,0.836501,0.877125,0.595576,0.615464
ZINC03815240,0.774194,0.631579,0.708333,0.829268,0.548387,0.691388,0.894737,0.772727,0.831497,0.799477,...,0.739035,0.775253,0.755869,0.492944,0.939342,0.346377,0.85813,0.913544,0.678624,0.684758
ZINC03815491,0.758065,0.61039,0.680851,0.810127,0.516129,0.657253,0.842105,0.780488,0.810711,0.782321,...,0.733553,0.747387,0.740271,0.485844,0.934365,0.330799,0.843928,0.90321,0.657849,0.672628
ZINC03815514,0.758065,0.61039,0.680851,0.810127,0.516129,0.657253,0.842105,0.780488,0.810711,0.782321,...,0.733553,0.747387,0.740271,0.485844,0.934365,0.330799,0.843928,0.90321,0.657849,0.672628
ZINC03815490,0.758065,0.61039,0.680851,0.810127,0.516129,0.657253,0.842105,0.780488,0.810711,0.782321,...,0.733553,0.747387,0.740271,0.485844,0.934365,0.330799,0.843928,0.90321,0.657849,0.672628
ZINC03815501,0.758065,0.61039,0.680851,0.810127,0.516129,0.657253,0.842105,0.780488,0.810711,0.782321,...,0.733553,0.747387,0.740271,0.485844,0.934365,0.330799,0.843928,0.90321,0.657849,0.672628
