In [1]:
#library
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.manifold import MDS
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram
import networkx as nx
import seaborn as sns
import numpy as np

### Stage 1: Determining and generating clusters and sub-clusters based on Top-Hits and Tanimoto Similarity

In [3]:
# Step 1 - Loading the Tanimoto coeficiente Matrix of Hits-Ligands
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
tanimotoHits = pd.read_csv('./input/similarityMatrix-hits.csv', delimiter=';', index_col=0)
tanimotoHits.drop('Unnamed: 47', axis='columns', inplace=True)
tanimoto_similarity = 1 - tanimotoHits
tanimoto_similarity

Unnamed: 0,21070,12593,12666,12974,15338,15339,15849,17294,17296,17456,17474,17498,20403,20497,20499,20500,20509,20525,20550,21148,21266,3206,5651,6315,ZINC100199761,ZINC100232131,ZINC142456176,(E)-Nerolidol,ZINC1676040,ZINC175245225,ZINC1846611,ZINC1849759,ZINC2018831,ZINC2600024,ZINC33841709,ZINC3861087,ZINC4098262,ZINC59206468,ZINC59586886,ZINC59778978,ZINC6071066,ZINC62237753,ZINC64634151,ZINC8220462,ZINC8234296,ZINC968471
21070,0.0,0.796992,0.799697,0.807229,0.839947,0.849754,0.704787,0.900247,0.901245,0.906327,0.895793,0.912347,0.339921,0.705036,0.655263,0.776549,0.489855,0.242915,0.269565,0.089947,0.766323,0.851449,0.62704,0.880488,0.587719,0.753351,0.748092,0.868794,0.590278,0.836066,0.877076,0.632653,0.689873,0.924171,0.824561,0.857724,0.501845,0.714777,0.960784,0.682927,0.938389,0.734043,0.836957,0.840731,0.858553,0.958791
12593,0.796992,0.0,0.236919,0.422914,0.4609,0.472973,0.647321,0.721049,0.727405,0.726943,0.727199,0.779241,0.783237,0.59562,0.621861,0.613087,0.69931,0.787535,0.799414,0.795385,0.842963,0.790507,0.570014,0.694815,0.889552,0.732857,0.809524,0.849379,0.735878,0.724812,0.806907,0.718507,0.713846,0.9616,0.741985,0.858766,0.716279,0.795764,0.912975,0.791541,0.836898,0.807044,0.742604,0.746744,0.822358,0.890582
12666,0.799697,0.236919,0.0,0.247227,0.369153,0.394517,0.646617,0.723886,0.730263,0.732558,0.728305,0.791827,0.787482,0.581845,0.606335,0.623932,0.702778,0.786533,0.800296,0.801858,0.835596,0.787375,0.517751,0.684766,0.882979,0.727536,0.795122,0.834395,0.726283,0.742129,0.796474,0.700159,0.709828,0.962723,0.73839,0.856672,0.712264,0.794793,0.906452,0.78681,0.842742,0.798752,0.737237,0.735988,0.81804,0.895397
12974,0.807229,0.422914,0.247227,0.0,0.318759,0.347884,0.665557,0.74981,0.753612,0.756798,0.746803,0.82008,0.806818,0.584437,0.611765,0.562914,0.727549,0.797753,0.809683,0.80776,0.85956,0.753452,0.532468,0.685908,0.890244,0.745981,0.813653,0.792381,0.748264,0.709059,0.732039,0.669759,0.684116,0.963671,0.684404,0.831373,0.732865,0.807958,0.875969,0.805172,0.861194,0.795699,0.705575,0.711375,0.789762,0.8896
15338,0.839947,0.4609,0.369153,0.318759,0.0,0.162821,0.707733,0.686963,0.689432,0.692857,0.701727,0.766418,0.821795,0.634691,0.669302,0.604775,0.725,0.812977,0.827451,0.844086,0.876963,0.813953,0.581486,0.705405,0.897987,0.759331,0.840112,0.836415,0.781965,0.77321,0.790462,0.748611,0.747599,0.96875,0.753804,0.866279,0.773713,0.834446,0.902299,0.832224,0.875297,0.824176,0.754032,0.737617,0.829876,0.905237
15339,0.849754,0.472973,0.394517,0.347884,0.162821,0.0,0.720245,0.675543,0.677918,0.676718,0.685868,0.750913,0.824096,0.648582,0.677819,0.620647,0.744755,0.825623,0.838002,0.85375,0.878676,0.828188,0.608748,0.72327,0.905237,0.76988,0.853437,0.839523,0.794486,0.724675,0.806409,0.759067,0.759591,0.971091,0.731126,0.87651,0.788413,0.846154,0.909695,0.845488,0.874439,0.836943,0.768461,0.756219,0.839332,0.90771
15849,0.704787,0.647321,0.646617,0.665557,0.707733,0.720245,0.0,0.841894,0.846216,0.848,0.848816,0.874332,0.761124,0.490698,0.534279,0.557734,0.545035,0.734266,0.722922,0.690808,0.761905,0.758017,0.511727,0.634615,0.768571,0.473404,0.651235,0.755814,0.528571,0.836957,0.641337,0.603825,0.623684,0.933333,0.747549,0.7,0.549296,0.591837,0.842105,0.604585,0.839583,0.566154,0.568,0.567358,0.682081,0.875576
17294,0.900247,0.721049,0.723886,0.74981,0.686963,0.675543,0.841894,0.0,0.067797,0.468185,0.519635,0.621816,0.883117,0.821119,0.836638,0.812746,0.839749,0.880837,0.895595,0.91067,0.909619,0.920271,0.796729,0.853323,0.939419,0.867729,0.919192,0.914894,0.884521,0.871837,0.913518,0.875828,0.867769,0.981051,0.875828,0.940925,0.883607,0.903545,0.949225,0.898431,0.834855,0.911277,0.879352,0.860862,0.914167,0.86383
17296,0.901245,0.727405,0.730263,0.753612,0.689432,0.677918,0.846216,0.067797,0.0,0.477941,0.51938,0.623853,0.885808,0.82157,0.838118,0.81689,0.839399,0.884398,0.898361,0.910833,0.910684,0.919589,0.798905,0.855863,0.943333,0.870296,0.921253,0.914163,0.886326,0.872638,0.914624,0.876667,0.869493,0.981771,0.876667,0.942241,0.884488,0.905473,0.948785,0.901245,0.837364,0.911445,0.880196,0.863451,0.913445,0.867521
17456,0.906327,0.726943,0.732558,0.756798,0.692857,0.676718,0.848,0.468185,0.477941,0.0,0.563054,0.623775,0.890057,0.839036,0.846336,0.818824,0.846635,0.886838,0.902518,0.914946,0.913907,0.912671,0.809266,0.857605,0.938385,0.871917,0.92893,0.910959,0.895122,0.876121,0.917854,0.885691,0.881342,0.981002,0.868003,0.936207,0.896916,0.916735,0.947277,0.90991,0.805627,0.917355,0.884491,0.879711,0.920266,0.840731


In [4]:
# Step 2 - Atributting the Hits-ligands to your protein targets
AChE = ['ZINC3861087','ZINC2600024','ZINC33841709','ZINC1676040','15849','20500','20499','20497','20525','20403']
EcdR = ['ZINC3861087','ZINC1676040','ZINC33841709','ZINC2600024','21070','20509','21148','17474','15849']
JHBP = ['ZINC3861087','ZINC2600024','ZINC33841709','ZINC2018831','ZINC100199761','ZINC175245225','ZINC968471','ZINC1849759','21266','20499']
MET = ['ZINC3861087','ZINC33841709','ZINC2600024','ZINC1849759','ZINC2018831','ZINC175245225','(E)-Nerolidol','ZINC968471','ZINC1846611','3206','20550','21266']
GABBAago = ['ZINC3861087','ZINC33841709','ZINC2600024','ZINC1676040','15339','15338','17498','20497','6315']
GABBAantago = ['17474','17498','17296','17456','17294','12666','20509','12593','21148']
OambRago = ['ZINC968471','(E)-Nerolidol','ZINC1846611','ZINC1849759','ZINC2018831','ZINC2600024','ZINC3861087','ZINC6071066','ZINC8234296','ZINC33841709','ZINC59586886','21266']
OambRantago = ['ZINC4098262','ZINC8220462','ZINC59206468','ZINC59778978','ZINC62237753','ZINC64634151','ZINC100232131','ZINC142456176','5651','12974']
final= pd.unique(np.array([*AChE,*EcdR,*JHBP,*MET,*GABBAago,*GABBAantago,*OambRago,*OambRantago]).flatten())
tnsm = tanimoto_similarity.reindex(final)
tnsm = tnsm[final]

In [6]:
# Step 3 - First Clustering
cutoff = 0.5377615 # Cut off used for Clustering
ag = AgglomerativeClustering(n_clusters=None, affinity='precomputed', linkage='single', distance_threshold=cutoff, compute_distances=True)
cl = ag.fit_predict(tanimoto_similarity)
print('Number of Clusters formed:', len(pd.unique(cl)))

Number of Clusters formed: 10


In [8]:
# Step 4 - Sub-Clustering preparing
cluster_letters = {0:'A',
                   1:'B',
                   2:'C',
                   3:'D',
                   4:'E',
                   5:'F',
                   6:'G',
                   7:'H',
                   8:'I',
                   9:'J'
                  }
cl_let = [cluster_letters[c] for c in cl]
tanimoto_similarity['cluster'] = cl_let
tnsm = tanimoto_similarity.loc[tanimoto_similarity['cluster']=='C']
tnsm = tnsm[tnsm.index]
tnsm

Unnamed: 0,21070,12593,12666,12974,15338,15339,15849,20403,20497,20499,20500,20509,20525,20550,21148,5651,6315,ZINC100199761,ZINC100232131,ZINC142456176,ZINC1676040,ZINC1846611,ZINC1849759,ZINC2018831,ZINC4098262,ZINC59206468,ZINC59778978,ZINC62237753,ZINC64634151,ZINC8220462,ZINC8234296
21070,0.0,0.796992,0.799697,0.807229,0.839947,0.849754,0.704787,0.339921,0.705036,0.655263,0.776549,0.489855,0.242915,0.269565,0.089947,0.62704,0.880488,0.587719,0.753351,0.748092,0.590278,0.877076,0.632653,0.689873,0.501845,0.714777,0.682927,0.734043,0.836957,0.840731,0.858553
12593,0.796992,0.0,0.236919,0.422914,0.4609,0.472973,0.647321,0.783237,0.59562,0.621861,0.613087,0.69931,0.787535,0.799414,0.795385,0.570014,0.694815,0.889552,0.732857,0.809524,0.735878,0.806907,0.718507,0.713846,0.716279,0.795764,0.791541,0.807044,0.742604,0.746744,0.822358
12666,0.799697,0.236919,0.0,0.247227,0.369153,0.394517,0.646617,0.787482,0.581845,0.606335,0.623932,0.702778,0.786533,0.800296,0.801858,0.517751,0.684766,0.882979,0.727536,0.795122,0.726283,0.796474,0.700159,0.709828,0.712264,0.794793,0.78681,0.798752,0.737237,0.735988,0.81804
12974,0.807229,0.422914,0.247227,0.0,0.318759,0.347884,0.665557,0.806818,0.584437,0.611765,0.562914,0.727549,0.797753,0.809683,0.80776,0.532468,0.685908,0.890244,0.745981,0.813653,0.748264,0.732039,0.669759,0.684116,0.732865,0.807958,0.805172,0.795699,0.705575,0.711375,0.789762
15338,0.839947,0.4609,0.369153,0.318759,0.0,0.162821,0.707733,0.821795,0.634691,0.669302,0.604775,0.725,0.812977,0.827451,0.844086,0.581486,0.705405,0.897987,0.759331,0.840112,0.781965,0.790462,0.748611,0.747599,0.773713,0.834446,0.832224,0.824176,0.754032,0.737617,0.829876
15339,0.849754,0.472973,0.394517,0.347884,0.162821,0.0,0.720245,0.824096,0.648582,0.677819,0.620647,0.744755,0.825623,0.838002,0.85375,0.608748,0.72327,0.905237,0.76988,0.853437,0.794486,0.806409,0.759067,0.759591,0.788413,0.846154,0.845488,0.836943,0.768461,0.756219,0.839332
15849,0.704787,0.647321,0.646617,0.665557,0.707733,0.720245,0.0,0.761124,0.490698,0.534279,0.557734,0.545035,0.734266,0.722922,0.690808,0.511727,0.634615,0.768571,0.473404,0.651235,0.528571,0.641337,0.603825,0.623684,0.549296,0.591837,0.604585,0.566154,0.568,0.567358,0.682081
20403,0.339921,0.783237,0.787482,0.806818,0.821795,0.824096,0.761124,0.0,0.732026,0.710956,0.780738,0.599502,0.25,0.24,0.394422,0.685417,0.876682,0.566929,0.713198,0.778878,0.674556,0.888889,0.65861,0.700855,0.608696,0.754491,0.719512,0.794562,0.861985,0.851064,0.872464
20497,0.705036,0.59562,0.581845,0.584437,0.634691,0.648582,0.490698,0.732026,0.0,0.166213,0.34873,0.580042,0.729211,0.730159,0.702233,0.524558,0.689873,0.833735,0.58465,0.707124,0.614634,0.666667,0.535065,0.511628,0.58,0.666667,0.682927,0.644909,0.643678,0.638202,0.721106
20499,0.655263,0.621861,0.606335,0.611765,0.669302,0.677819,0.534279,0.710956,0.166213,0.0,0.430206,0.581345,0.690531,0.695332,0.650273,0.547284,0.690265,0.81201,0.625287,0.659884,0.543243,0.669492,0.561828,0.575521,0.494413,0.607143,0.651715,0.603989,0.654676,0.676606,0.746736


In [10]:
# Step 5 - Sript to generate the sub-clustering (in this case C cluster)
cutoff = 0.467 # # Cut off used for Sub-Clustering
ag = AgglomerativeClustering(n_clusters=None, affinity='precomputed', linkage='single', distance_threshold=cutoff, compute_distances=True)
cl2 = ag.fit_predict(tnsm)
print('Número de clusters =', len(pd.unique(cl2)))
nome_cluster = 'C'
cl_let2 = [nome_cluster + '{}'.format(c+1) for c in cl2]

Número de clusters = 11


In [11]:
# Step 6 - Joining the sub-clusters with the clusters
tanimoto_similarity.loc[tanimoto_similarity['cluster']=='C', 'cluster'] = cl_let2
tanimoto_similarity

Unnamed: 0,21070,12593,12666,12974,15338,15339,15849,17294,17296,17456,17474,17498,20403,20497,20499,20500,20509,20525,20550,21148,21266,3206,5651,6315,ZINC100199761,ZINC100232131,ZINC142456176,(E)-Nerolidol,ZINC1676040,ZINC175245225,ZINC1846611,ZINC1849759,ZINC2018831,ZINC2600024,ZINC33841709,ZINC3861087,ZINC4098262,ZINC59206468,ZINC59586886,ZINC59778978,ZINC6071066,ZINC62237753,ZINC64634151,ZINC8220462,ZINC8234296,ZINC968471,cluster
21070,0.0,0.796992,0.799697,0.807229,0.839947,0.849754,0.704787,0.900247,0.901245,0.906327,0.895793,0.912347,0.339921,0.705036,0.655263,0.776549,0.489855,0.242915,0.269565,0.089947,0.766323,0.851449,0.62704,0.880488,0.587719,0.753351,0.748092,0.868794,0.590278,0.836066,0.877076,0.632653,0.689873,0.924171,0.824561,0.857724,0.501845,0.714777,0.960784,0.682927,0.938389,0.734043,0.836957,0.840731,0.858553,0.958791,C3
12593,0.796992,0.0,0.236919,0.422914,0.4609,0.472973,0.647321,0.721049,0.727405,0.726943,0.727199,0.779241,0.783237,0.59562,0.621861,0.613087,0.69931,0.787535,0.799414,0.795385,0.842963,0.790507,0.570014,0.694815,0.889552,0.732857,0.809524,0.849379,0.735878,0.724812,0.806907,0.718507,0.713846,0.9616,0.741985,0.858766,0.716279,0.795764,0.912975,0.791541,0.836898,0.807044,0.742604,0.746744,0.822358,0.890582,C6
12666,0.799697,0.236919,0.0,0.247227,0.369153,0.394517,0.646617,0.723886,0.730263,0.732558,0.728305,0.791827,0.787482,0.581845,0.606335,0.623932,0.702778,0.786533,0.800296,0.801858,0.835596,0.787375,0.517751,0.684766,0.882979,0.727536,0.795122,0.834395,0.726283,0.742129,0.796474,0.700159,0.709828,0.962723,0.73839,0.856672,0.712264,0.794793,0.906452,0.78681,0.842742,0.798752,0.737237,0.735988,0.81804,0.895397,C6
12974,0.807229,0.422914,0.247227,0.0,0.318759,0.347884,0.665557,0.74981,0.753612,0.756798,0.746803,0.82008,0.806818,0.584437,0.611765,0.562914,0.727549,0.797753,0.809683,0.80776,0.85956,0.753452,0.532468,0.685908,0.890244,0.745981,0.813653,0.792381,0.748264,0.709059,0.732039,0.669759,0.684116,0.963671,0.684404,0.831373,0.732865,0.807958,0.875969,0.805172,0.861194,0.795699,0.705575,0.711375,0.789762,0.8896,C6
15338,0.839947,0.4609,0.369153,0.318759,0.0,0.162821,0.707733,0.686963,0.689432,0.692857,0.701727,0.766418,0.821795,0.634691,0.669302,0.604775,0.725,0.812977,0.827451,0.844086,0.876963,0.813953,0.581486,0.705405,0.897987,0.759331,0.840112,0.836415,0.781965,0.77321,0.790462,0.748611,0.747599,0.96875,0.753804,0.866279,0.773713,0.834446,0.902299,0.832224,0.875297,0.824176,0.754032,0.737617,0.829876,0.905237,C6
15339,0.849754,0.472973,0.394517,0.347884,0.162821,0.0,0.720245,0.675543,0.677918,0.676718,0.685868,0.750913,0.824096,0.648582,0.677819,0.620647,0.744755,0.825623,0.838002,0.85375,0.878676,0.828188,0.608748,0.72327,0.905237,0.76988,0.853437,0.839523,0.794486,0.724675,0.806409,0.759067,0.759591,0.971091,0.731126,0.87651,0.788413,0.846154,0.909695,0.845488,0.874439,0.836943,0.768461,0.756219,0.839332,0.90771,C6
15849,0.704787,0.647321,0.646617,0.665557,0.707733,0.720245,0.0,0.841894,0.846216,0.848,0.848816,0.874332,0.761124,0.490698,0.534279,0.557734,0.545035,0.734266,0.722922,0.690808,0.761905,0.758017,0.511727,0.634615,0.768571,0.473404,0.651235,0.755814,0.528571,0.836957,0.641337,0.603825,0.623684,0.933333,0.747549,0.7,0.549296,0.591837,0.842105,0.604585,0.839583,0.566154,0.568,0.567358,0.682081,0.875576,C10
17294,0.900247,0.721049,0.723886,0.74981,0.686963,0.675543,0.841894,0.0,0.067797,0.468185,0.519635,0.621816,0.883117,0.821119,0.836638,0.812746,0.839749,0.880837,0.895595,0.91067,0.909619,0.920271,0.796729,0.853323,0.939419,0.867729,0.919192,0.914894,0.884521,0.871837,0.913518,0.875828,0.867769,0.981051,0.875828,0.940925,0.883607,0.903545,0.949225,0.898431,0.834855,0.911277,0.879352,0.860862,0.914167,0.86383,B
17296,0.901245,0.727405,0.730263,0.753612,0.689432,0.677918,0.846216,0.067797,0.0,0.477941,0.51938,0.623853,0.885808,0.82157,0.838118,0.81689,0.839399,0.884398,0.898361,0.910833,0.910684,0.919589,0.798905,0.855863,0.943333,0.870296,0.921253,0.914163,0.886326,0.872638,0.914624,0.876667,0.869493,0.981771,0.876667,0.942241,0.884488,0.905473,0.948785,0.901245,0.837364,0.911445,0.880196,0.863451,0.913445,0.867521,B
17456,0.906327,0.726943,0.732558,0.756798,0.692857,0.676718,0.848,0.468185,0.477941,0.0,0.563054,0.623775,0.890057,0.839036,0.846336,0.818824,0.846635,0.886838,0.902518,0.914946,0.913907,0.912671,0.809266,0.857605,0.938385,0.871917,0.92893,0.910959,0.895122,0.876121,0.917854,0.885691,0.881342,0.981002,0.868003,0.936207,0.896916,0.916735,0.947277,0.90991,0.805627,0.917355,0.884491,0.879711,0.920266,0.840731,B


### Stage 2: Calculating the Belonging Degree of Essential Oil compounds to each identified cluster.

In [16]:
# Step 1 - Load Tanimoto file of Hits-Ligands and Essential oils compounds
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
oil_tanimoto = pd.read_csv('./input/OleosHits2.csv', delimiter=',', index_col=0) # file containing the tanimoto of hits and EO compounds 
oil_tanimoto = 1 - oil_tanimoto

bdrac = ['Alpha-Pinene', 'Sabinene', 'Beta-Pinene', 'Beta-Myrcene', 'Limonene','(E)-Caryophyllene','Germacrene','Bicyclogermacrene','delta-Cadinene','(E)-Nerolidol','Spathulenol','Caryophylene_Oxide']

bret = ['Alpha-Thujene','Alpha-Pinene','Beta-Pinene','Beta-Myrcene','(E)-Caryophyllene','Germacrene','delta-Cadinene','Caryophylene_Oxide']

disi = ['Alpha-Pinene','Beta-Myrcene','Limonene','(E)-beta-ocimene','(E)-Caryophyllene','alpha-Humulene','Germacrene','Bicyclogermacrene','delta-Cadinene']

calli = ['3-methylhexane','Alpha-Pinene','e-hex-2-enal','alilpenthylesteroxalyc-acid','3-7-7-trimethylbicyclo-[4-1-0]hepet-2-ene',
         '3-methylbutane-2-methylbutanoate','7-7-dimethyl-2-metenilbicyclo[2-2-1]Heptane','6-6-dimethyl-2-metenilbicyclo[2-2-1]heptan-3-one',
         '1-methyl-4-isopropilcyclohex-2-en-1-ol','α-terpineol','1-8-cineol','trans-p-mentha-6-8-dien-2-ol','e-pent-3-en-2-one',
         '1-2-metoxi-4-(2-propenil)-phenol','α-camfolenal','1-2-3-4-4a-5-6-8a-octahydro-7-methyl-4-metenil-1-isoproprilnaphthalene',
         'Z-3-7-dimethyl-2-6-octadien-1-ol','α-bisabolene-epoxide','Alpha-Selinene','Beta-Selinene','Delta-Selinene','Gamma-Selinene',
         'decahydro-1-1-7-trimethyl-4-metenil-[1aR-(1aα-4aα-7α-7aβ-7bα)]-1H-cycloprop[e]azulene','1-2-3-5-6-7-8-8a-octahydro-1-8a-dimethyl-7-(1-methyletenil)-[1R-(1α-7β-8aα)]-naphthalene',
         'cis-(-)-2-4a-5-6-9a-hexahydro-3-5-5-9-tetramethyl(1H)benzocycloheptene','Cubenol','guaia-1(10)-11-diene',
         '3-3-dimethylcyclohexanoacetaldehyde','3-4-7-trimethylbicyclo[4-3-0]non-3-ene','1-(5-hydroxi-4a-8-dimethyldecahydro-2-naftalenil)ethanone','5-methyl-2-(1-methyletilideno)-cyclohexanone',
         '2-methyl-5-isopropil-cyclohex-1-3-diene']

myra = ['Alpha-Pinene','3-Carene','Camphene','alpha-Campholenal','Camfenol-6-','Limonene','m-Menta-6-8-dieno','p-Menta-1-4(8)–dieno','p-1-8-dieno-(S)','beta-Guaiene','(E)-Caryophyllene','Cedr-8(15)eno',
        'beta-Elemene','beta-Chamigrene','Guaia-1(5)-11-dieno','Patchoulenone','alpha-Himachalene','Ethylcaproate','cis-Geranylacetone','o-Anisic-acid-methyl-ester','Methyl-salicylate',
        'trans-Geranylacetone','Hexanoic-acid']

lista_final = pd.unique([*bdrac, *bret, *disi, *calli, *myra])
essential_oils = oil_tanimoto.loc[lista_final]
essential_oils.to_excel('./output/MatrizOleos-HistsVsCompostos.xlsx', sheet_name='sheet1')
essential_oils # This matrix represente de Tanimoto similarities (m,n) where m is the Essential oil compounds and n is TopHit ligand to the protein target

Unnamed: 0,21070,12593,12666,12974,15338,15339,15849,17294,17296,17456,17474,17498,20403,20497,20499,20500,20509,20525,20550,21148,21266,3206,5651,6315,ZINC100199761,ZINC100232131,ZINC142456176,(E)-Nerolidol,ZINC1676040,ZINC175245225,ZINC1846611,ZINC1849759,ZINC2018831,ZINC2600024,ZINC33841709,ZINC3861087,ZINC4098262,ZINC59206468,ZINC59586886,ZINC59778978,ZINC6071066,ZINC62237753,ZINC64634151,ZINC8220462,ZINC8234296,ZINC968471
Alpha-Pinene,0.816024,0.756061,0.750769,0.753927,0.797858,0.81071,0.479042,0.888433,0.88843,0.900813,0.897844,0.909006,0.817694,0.527559,0.610966,0.68046,0.690821,0.815104,0.799427,0.80625,0.830769,0.775362,0.673913,0.651685,0.761733,0.369128,0.587045,0.754579,0.586093,0.88806,0.522634,0.59,0.601286,0.940171,0.78098,0.686957,0.586093,0.394309,0.788136,0.441406,0.8753,0.517647,0.508306,0.5,0.584615,0.879552
Sabinene,0.728435,0.804665,0.796736,0.807692,0.821288,0.830882,0.626016,0.893791,0.897457,0.903409,0.900842,0.910946,0.709677,0.691589,0.652284,0.773019,0.724057,0.656805,0.620462,0.758958,0.706485,0.869128,0.704255,0.746073,0.5,0.404605,0.446429,0.850847,0.5,0.906863,0.678967,0.736527,0.742029,0.921397,0.868633,0.771429,0.505263,0.330508,0.895349,0.334728,0.906542,0.473684,0.654655,0.655072,0.760135,0.909836
Beta-Pinene,0.759582,0.810398,0.809598,0.818662,0.846154,0.857143,0.62426,0.910299,0.911371,0.922634,0.917861,0.925996,0.786585,0.664948,0.602857,0.770833,0.73028,0.762763,0.742475,0.753676,0.774545,0.843137,0.711364,0.714706,0.660714,0.523333,0.438776,0.839844,0.419753,0.911051,0.60793,0.707483,0.714754,0.915344,0.855422,0.724138,0.432653,0.097297,0.885321,0.246305,0.902062,0.365854,0.609589,0.63871,0.705179,0.911585
Beta-Myrcene,0.963115,0.926868,0.920583,0.892788,0.917986,0.924202,0.865204,0.956859,0.955575,0.955844,0.964824,0.963746,0.964912,0.875339,0.881159,0.858667,0.918033,0.966555,0.965909,0.964912,0.96,0.769231,0.910798,0.815603,0.952128,0.855219,0.84,0.614286,0.893701,0.95122,0.660377,0.826271,0.839357,0.903226,0.768889,0.721311,0.893701,0.883408,0.157895,0.880531,0.862676,0.84,0.778226,0.787072,0.716763,0.818605
Limonene,0.87931,0.823622,0.813505,0.752437,0.809249,0.823765,0.647975,0.918987,0.918298,0.921519,0.928016,0.945402,0.891239,0.67663,0.698864,0.714653,0.796915,0.895652,0.890675,0.875912,0.870849,0.724638,0.770642,0.560284,0.864979,0.61745,0.560209,0.585106,0.70696,0.899705,0.087248,0.592742,0.616858,0.9125,0.702602,0.583851,0.70696,0.673554,0.586667,0.673469,0.868195,0.545455,0.517787,0.518939,0.522613,0.852113
(E)-Caryophyllene,0.778816,0.759146,0.755796,0.744227,0.787755,0.80454,0.547826,0.88687,0.887781,0.896637,0.898209,0.906367,0.777465,0.621891,0.603723,0.675234,0.701456,0.775956,0.766467,0.77451,0.79288,0.774074,0.686275,0.600592,0.707692,0.474522,0.459459,0.688976,0.521127,0.897243,0.442478,0.622517,0.632588,0.924107,0.739394,0.665158,0.551724,0.290749,0.726027,0.28821,0.862069,0.473251,0.491468,0.473333,0.595331,0.867435
Germacrene,0.866013,0.807752,0.793651,0.774074,0.815126,0.828794,0.646884,0.910535,0.910699,0.917569,0.922339,0.935545,0.878963,0.674479,0.699187,0.73913,0.75,0.883657,0.877676,0.862069,0.853147,0.728889,0.762749,0.654088,0.849802,0.630915,0.599057,0.62201,0.720137,0.902778,0.409326,0.615672,0.637011,0.923077,0.711806,0.603352,0.720137,0.695817,0.555556,0.695489,0.869919,0.618644,0.455253,0.503623,0.354167,0.855263
Bicyclogermacrene,0.803571,0.784661,0.776276,0.755208,0.775815,0.787342,0.588398,0.887797,0.88687,0.889344,0.89916,0.906221,0.74221,0.631068,0.665835,0.673563,0.693046,0.752044,0.740299,0.804348,0.787975,0.786477,0.704017,0.647059,0.684411,0.347973,0.625,0.695817,0.660436,0.902439,0.540323,0.612378,0.622642,0.922747,0.747059,0.668122,0.664596,0.608392,0.709821,0.627986,0.870813,0.594096,0.541935,0.528302,0.516,0.867978
delta-Cadinene,0.840731,0.746744,0.735988,0.711375,0.737617,0.756219,0.567358,0.860862,0.863451,0.879711,0.881868,0.895928,0.851064,0.638202,0.676606,0.636564,0.732181,0.847926,0.848635,0.836512,0.838798,0.806854,0.700397,0.561644,0.834835,0.541209,0.567273,0.729373,0.627907,0.896861,0.469697,0.631579,0.636364,0.938406,0.758621,0.728938,0.647564,0.625,0.741445,0.62963,0.865342,0.599338,0.342282,0.0,0.509025,0.861893
(E)-Nerolidol,0.868794,0.849379,0.834395,0.792381,0.836415,0.839523,0.755814,0.914894,0.914163,0.910959,0.935922,0.929619,0.878882,0.750649,0.754821,0.720207,0.812339,0.883929,0.873754,0.864662,0.913043,0.716418,0.784404,0.742138,0.926531,0.798817,0.804444,0.0,0.728938,0.881098,0.61194,0.616935,0.629344,0.936709,0.540426,0.638037,0.786713,0.841912,0.528571,0.84,0.869186,0.808,0.731959,0.729373,0.702703,0.853047


### Stage 3 - Determining the BDSTFL Matrix for Essential Oils compounds to Determined Clusters

In [17]:
# Step 1 - Determinint the Cluster and Subclusters Centroids
centroids = tanimoto_similarity.groupby('cluster').mean()
def get_cluster(oils, reference):
    cluster = []
    for i in range(len(oils)):
        d = []
        for j in range(len(reference)):
            dist = np.linalg.norm(oils.iloc[i] - reference.iloc[j])
            d.append(dist)
        cluster.append(reference.index[d.index(min(d))])
    
    return cluster

In [18]:
#tanimotito['cluster'] = cl
centroids = tanimoto_similarity.groupby('cluster').mean()
def get_cluster_fuzzy(oils, reference, m):
    cluster = {}
    for i in range(len(oils)):
        
        dists = []
        for j in range(len(reference)):
            num = np.linalg.norm(oils.iloc[i] - reference.iloc[j])
            det = 0
            for k in range(len(reference)):
                det += (num/np.linalg.norm(oils.iloc[i] - reference.iloc[k]))**(2/(m-1))
            mu = 1/det
            dists.append(mu)
        cluster[oils.index[i]]= dists
    
    return cluster

In [24]:
tanimoto_similarity_oils = get_cluster_fuzzy(essential_oils, centroids, 1.496261586526824)
tanimoto_similarity_oils = pd.DataFrame(tanimoto_similarity_oils, index=centroids.index)
tanimoto_similarity_oils = tanimoto_similarity_oils.transpose()
tanimoto_similarity_oils.to_excel('./output/Tanimoto-Similarity_Oilcompound-to-ClusterCentroid.xlsx')
tanimoto_similarity_oils # BDSTFL Matrix (m,n) showing the Tanimoto similarity, where m is the Essential Oil compounds and n is the Cluster determined

cluster,A,B,C1,C10,C11,C2,C3,C4,C5,C6,C7,C8,C9,D,E,F,G,H,I,J
Alpha-Pinene,0.007976,0.002093,0.178422,0.116649,0.033253,0.140204,0.004369,0.380391,0.029474,0.00523,0.028554,0.016006,0.015403,0.018524,0.008102,0.002067,0.002351,0.005807,0.003408,0.001717
Sabinene,0.003865,0.001841,0.017247,0.024351,0.010116,0.690594,0.010345,0.164838,0.008977,0.003201,0.009316,0.010209,0.013552,0.008337,0.004117,0.001922,0.001915,0.010525,0.002953,0.001777
Beta-Pinene,0.001517,0.000632,0.008061,0.008253,0.003937,0.927001,0.002111,0.023264,0.003399,0.001091,0.003475,0.003192,0.003751,0.003382,0.001533,0.000659,0.000708,0.002387,0.000992,0.000654
Beta-Myrcene,0.792273,0.00651,0.015115,0.004535,0.014721,0.004465,0.003088,0.005378,0.005883,0.004197,0.004491,0.0037,0.004235,0.038881,0.02587,0.008815,0.018546,0.010472,0.012292,0.016534
Limonene,0.03871,0.002747,0.663109,0.020585,0.069277,0.01953,0.003065,0.025454,0.024038,0.005212,0.013642,0.00781,0.007505,0.061542,0.017231,0.002811,0.004068,0.005986,0.004744,0.002933
(E)-Caryophyllene,0.008565,0.002199,0.148546,0.070756,0.032144,0.42096,0.005501,0.184836,0.025379,0.005247,0.022587,0.015147,0.014916,0.018754,0.008055,0.002157,0.0025,0.006476,0.003402,0.001873
Germacrene,0.016746,0.000972,0.877865,0.007762,0.020296,0.006241,0.001099,0.009578,0.008619,0.001832,0.004831,0.002884,0.003023,0.023834,0.006923,0.001007,0.001444,0.002294,0.001744,0.001007
Bicyclogermacrene,0.014188,0.002497,0.301151,0.067326,0.047258,0.048052,0.005778,0.350589,0.036213,0.005984,0.024912,0.015725,0.017901,0.03007,0.012116,0.002493,0.002925,0.008454,0.004278,0.00209
delta-Cadinene,0.006792,0.001675,0.817681,0.023932,0.03632,0.015456,0.002015,0.028897,0.012591,0.004139,0.011468,0.007063,0.006015,0.010089,0.005582,0.0016,0.001838,0.003248,0.002357,0.001244
(E)-Nerolidol,0.8074,0.003197,0.017295,0.006235,0.014898,0.00466,0.002612,0.005744,0.013129,0.004161,0.006853,0.004953,0.005042,0.039529,0.039837,0.003537,0.005207,0.005225,0.006826,0.00366
