# Imports e inicialización del servidor

In [2]:
import mysql.connector
import os, getpass, re, requests
import gensim
import pandas as pd
import numpy as np
import cvxpy as cp
import tensorflow as tf
import matplotlib.pyplot as plt
from mysql.connector import Error
from itertools import combinations
from ast import literal_eval
from collections import defaultdict
from scipy.spatial import distance
from sklearn.manifold import TSNE
from bs4 import BeautifulSoup
import plotly.graph_objects as go
K = 10

In [3]:
# Definición de parámetros de conexión
connection_params = {
    'host': 'localhost',
    'user': 'cmescobar',
    'database': 'foodb',
    'password': getpass.getpass(prompt='Introduzca la contraseña: ')
}

try:
    connection = mysql.connector.connect(**connection_params)

    if connection.is_connected():
        db_Info = connection.get_server_info()
        print("Connected to MySQL Server version ", db_Info)
        cursor = connection.cursor()

except Error as e:
    print("Error while connecting to MySQL", e)

Introduzca la contraseña: ········
Connected to MySQL Server version  8.0.26


In [4]:
def cosine_similarity(A, b):
    # Definición de los argumentos de la similaridad coseno
    A_dot_b = np.sum(A * b, axis=1)
    A_norm = np.sqrt(np.sum(A ** 2, axis=1))
    b_norm = np.sqrt(np.sum(b ** 2))
    return A_dot_b / (A_norm * b_norm)


def find_Nprox_words(terms_to_rev, N_words, metric='euclidean'):
    # Definición del diccionario de palabras
    word_dict_out = dict()

    for term in terms_to_rev:
        # Definición del vector de embedding
        embed_term = embeddings_index.get(term)

        if embed_term is None: continue
        
        if metric == 'euclidean':
            # Diferencia cuadrática
            dif_metric = np.sum((embedding_matrix - embed_term) ** 2, axis=1)
            reverse_sort = False
        
        elif metric == 'cosine':
            # Similaridad coseno
            dif_metric = (cosine_similarity(embedding_matrix, embed_term) + 1) / 2
            reverse_sort = True
        
        # Etiquetando posición
        pos_dif = [(i, delta) for i, delta in enumerate(dif_metric)]

        # Ordenando
        pos_dif.sort(key=lambda x: x[1], reverse=reverse_sort)

        # Agregar al diccionario
        word_dict_out[term] = [(embedding_words[i[0]], i[0], i[1]) for i in pos_dif[1:1+N_words]]

    return word_dict_out


def find_Nprox_words_joint(terms_to_rev, N_words, metric='euclidean'):
    # Definición del diccionario de palabras
    word_dict_out = dict()
    
    # Definición de un vector de embedding total
    embed_total = list()
    
    for term in terms_to_rev:
        # Definición del vector de embedding
        embed_term = embeddings_index.get(term)

        if embed_term is None: continue
        else: embed_total.append(embed_term)    
        
    # Calculando la media del vector embebido
    embed_total = np.array(embed_total).mean(axis=0)
    
    if metric == 'euclidean':
        # Diferencia cuadrática
        dif_metric = np.sum((embedding_matrix - embed_total) ** 2, axis=1)
        reverse_sort = False

    elif metric == 'cosine':
        # Similaridad coseno
        dif_metric = (cosine_similarity(embedding_matrix, embed_total) + 1) / 2
        reverse_sort = True

    # Etiquetando posición
    pos_dif = [(i, delta) for i, delta in enumerate(dif_metric)]

    # Ordenando
    pos_dif.sort(key=lambda x: x[1], reverse=reverse_sort)

    # Agregar al diccionario
    word_dict_out[' '.join(terms_to_rev)] = [(embedding_words[i[0]], i[0], i[1]) for i in pos_dif[1:1+N_words]]

    return word_dict_out

# Obteniendo los datos

In [7]:
query = \
'''SELECT he.id, he.name, he.description, he.chebi_name, he.chebi_id, he.chebi_definition 
   FROM health_effects he 
'''

# Solicitud de la query
df = pd.read_sql(query, connection)
df

Unnamed: 0,id,name,description,chebi_name,chebi_id,chebi_definition
0,1,(+)-inotropic,An agent that alters the force or energy of mu...,,,
1,2,(-)-chronotropic,An agent that may change the heart rate by aff...,,,
2,3,(-)-inotropic,An agent that alters the force or energy of mu...,,,
3,4,11beta-hydroxysteroid-dehydrogenase inhibitor,,enzyme inhibitor,23924,A compound or agent that combines with an enzy...
4,5,12-lipoxygenase inhibitor,,enzyme inhibitor,23924,A compound or agent that combines with an enzy...
...,...,...,...,...,...,...
1430,1431,xanthine oxidase inhibitor,,xanthine oxidase inhibitor,35634,An EC 1.17.3.* (oxidoreductase acting on CH or...
1431,1432,inhibitor,,inhibitor,35222,A substance that diminishes the rate of a chem...
1432,1433,prostaglandin antagonist,,prostaglandin antagonist,49023,A compound that inhibits the action of prostag...
1433,1434,platelet aggregation inhibitor,,platelet aggregation inhibitor,50427,A drug or agent which antagonizes or impairs a...


# Abriendo el modelo GloVe entrenado en Wikipedia

In [23]:
# Cargando el embedding en memoria
embeddings_index = dict()

# Definición de la matriz de embedding
embeddings_matrix = list()

# Definición del diccionario de decodificación de palabras
idx2word = dict()

# Abriendo el archivo
with open('C:/Users/Chris-Brota/Desktop/glove.6b/glove.6B.100d.txt', 'r', encoding='utf8') as file:
    for num, line in enumerate(file):
        values = line.split()
        word = values[0]
        coefs = np.asarray(values[1:], dtype='float32')
        embeddings_index[word] = coefs
        embeddings_matrix.append(coefs)
        idx2word[num] = word

# Pasando a array
embeddings_matrix = np.array(embeddings_matrix)

print(f'Vector de {len(embeddings_index)} palabras cargadas.')

Vector de 400001 palabras cargadas.


In [6]:
print(embeddings_index.get('citric'))

[ 0.28123   0.19583   0.06453  -0.13524   0.99726  -0.19329  -0.48548
  0.43083  -0.16015  -0.12496  -0.5319    0.57561  -0.5881    0.085808
 -0.0859    0.75721  -1.1515   -0.084785  0.65576   0.064682  0.067194
 -1.1096    0.92557   0.57395  -0.34474   0.37906  -0.82575  -0.55303
 -0.58417   0.044774  0.76227   0.75636  -0.45388  -0.8378   -0.39259
 -0.3134    0.29243   0.025914  1.0914   -0.13309   0.57329  -0.12037
 -0.90361  -0.91887   1.013     1.276    -0.31055  -0.86031  -0.45764
 -0.35792  -0.34129  -0.64878  -0.21554  -0.32376  -0.75714   0.80563
 -1.143     0.128    -0.34669  -0.40518  -0.031299 -0.63768  -0.35624
 -0.28367   0.73029  -0.47432   0.84953  -1.1061    0.046679 -1.1738
 -0.4851    0.45272   0.16722   0.35547  -0.27191  -0.26114   0.21498
 -0.79119  -0.75023  -0.63608   0.068707 -0.46095  -0.44356   0.52749
 -0.32422   0.77409  -0.81332   0.1544   -0.40902   0.4267   -0.59445
 -0.10446  -1.2722   -0.76292   0.6785   -0.43202  -0.19278  -0.26015
  0.93397   0.54029

# Procesando las etiquetas

In [22]:
def preprocessing_text(text):
    # Remover URLs con una expresión regular
    url_pattern = re.compile(r'https?://\S+|www\.\S+')
    text = url_pattern.sub(r'', text)

    # Remover emails
    text = re.sub('\S*@\S*\s?', '', text)

    # Remover caracteres no alfabéticos
    text = re.sub("[^A-Za-z\-\s]+", "", text)
    
    # Transformar guiones en espacios
    text = re.sub('\-+', ' ', text)
    
    # Remover caracteres de espacio mayor a 1
    text = re.sub('\s+', ' ', text)
    
    # Pasando todo a minúscula
    text = text.lower()
    
    return text


# Definición de la lista que almacenará los nombres
names = list()
for text in df['name']:
    preprocessed_text = preprocessing_text(text)
    names.append(preprocessed_text)
    print(text, '\t', preprocessed_text)

(+)-inotropic 	  inotropic
(-)-chronotropic 	  chronotropic
(-)-inotropic 	  inotropic
11beta-hydroxysteroid-dehydrogenase inhibitor 	 beta hydroxysteroid dehydrogenase inhibitor
12-lipoxygenase inhibitor 	  lipoxygenase inhibitor
17beta-hydroxysteroid-dehydrogenase inhibitor 	 beta hydroxysteroid dehydrogenase inhibitor
5alpha-reductase inhibitor 	 alpha reductase inhibitor
5-hydroxyeicosatetraenoic-acid inhibitor 	  hydroxyeicosatetraenoic acid inhibitor
5-hydroxytryptamine inhibitor 	  hydroxytryptamine inhibitor
5-lipoxygenase inhibitor 	  lipoxygenase inhibitor
8-hydroxyeicosatetraenoic-acid inhibitor 	  hydroxyeicosatetraenoic acid inhibitor
beta inhibitor 	 beta inhibitor
abortifacient 	 abortifacient
absorbent 	 absorbent
acantholytic 	 acantholytic
acaricide 	 acaricide
acarifuge 	 acarifuge
angiotensin converting enzyme inhibitor 	 angiotensin converting enzyme inhibitor
acetyl-CoA-carboxylase inhibitor 	 acetyl coa carboxylase inhibitor
acetylcholinergic 	 acetylcholinergic


pediculicide 	 pediculicide
pemphigenic 	 pemphigenic
percutaneostimulant 	 percutaneostimulant
perfume 	 perfume
perfumery 	 perfumery
peristaltic 	 peristaltic
peroxidase inhibitor 	 peroxidase inhibitor
peroxynitrite scavenger 	 peroxynitrite scavenger
pesticide 	 pesticide
prostaglandin-E2 inhibitor 	 prostaglandin e inhibitor
phagocytotic 	 phagocytotic
pheromonal 	 pheromonal
pheromone 	 pheromone
phosphodiesterase inhibitor 	 phosphodiesterase inhibitor
phospholipase-A2 inhibitor 	 phospholipase a inhibitor
phospholipase inhibitor 	 phospholipase inhibitor
photoactive 	 photoactive
photocarcinogenic 	 photocarcinogenic
photodermatitigenic 	 photodermatitigenic
photosensitizer 	 photosensitizer
phototoxic 	 phototoxic
phytoalexin 	 phytoalexin
phytohormonal 	 phytohormonal
phytohormone 	 phytohormone
phytotoxic 	 phytotoxic
pigment 	 pigment
piscicide 	 piscicide
pituitary sensitizer 	 pituitary sensitizer
pituitary stimulant 	 pituitary stimulant
protein kinase C inhibitor 	 pro

# Crear un diccionario de vectores embebidos para cada nombre 

In [40]:
embedded_names = dict()
rare_words = list()

# Definición de la lista que almacenará los nombres
for text in df['name']:
    # Acondicionando el texto
    preprocessed_text = preprocessing_text(text)
    
    # Separando por palabras
    preprocessed_text = preprocessed_text.split()
    
    # Definición de la list de vectores embebidos
    embedded_vects_i = list()
    
    for i in preprocessed_text:
        if len(i) > 2:
            # Buscar el vector embebido
            embedded_i = embeddings_index.get(i)
            if embedded_i is not None:
                embedded_vects_i.append(embedded_i)
            else:
                rare_words.append(i)
                
    if embedded_vects_i:
        embedded_names[text] = embedded_vects_i
    else:
        print(f'Nombre {text} no tiene vectores embebidos')

Nombre (-)-chronotropic no tiene vectores embebidos
Nombre acantholytic no tiene vectores embebidos
Nombre acaricide no tiene vectores embebidos
Nombre acarifuge no tiene vectores embebidos
Nombre acetylcholinergic no tiene vectores embebidos
Nombre acidifier no tiene vectores embebidos
Nombre acidulant no tiene vectores embebidos
Nombre acnegenic no tiene vectores embebidos
Nombre adaptogenic no tiene vectores embebidos
Nombre aedifuge no tiene vectores embebidos
Nombre aggregant no tiene vectores embebidos
Nombre albuminurigenic no tiene vectores embebidos
Nombre allelochemic no tiene vectores embebidos
Nombre amblyopic no tiene vectores embebidos
Nombre amebicide no tiene vectores embebidos
Nombre amnesigenic no tiene vectores embebidos
Nombre amphetaminagenic no tiene vectores embebidos
Nombre amphiestrogenic no tiene vectores embebidos
Nombre amphiglycemic no tiene vectores embebidos
Nombre amphitensive no tiene vectores embebidos
Nombre analeptic no tiene vectores embebidos
Nombr

# Obtener vectores representativos por concepto

In [54]:
key_list = list()
embedded_main_names = list() 

for key in embedded_names.keys():
    repr_vect = np.array(embedded_names[key]).mean(axis=0)
    embedded_main_names.append(repr_vect)
    key_list.append(key)
    
# Transformando a array
embedded_main_names = np.array(embedded_main_names)
embedded_main_names.shape

(1050, 100)

# Definición de funciones alimenticias objetivo

In [50]:
def find_Nprox_concepts(concept, embedding_matrix, index2word, N_words, 
                        metric='euclidean'):
    # Definición del vector de embedding
    embedded_concept = embeddings_index.get(concept)
    
    # Si no se encuentra el concepto, se termina la ejecución
    if embedded_concept is None: 
        print(f'Concept {concept} not found.')
        return
    
    if metric == 'euclidean':
        # Diferencia cuadrática
        dif_metric = np.sum((embedding_matrix - embedded_concept) ** 2, axis=1)
        reverse_sort = False

    elif metric == 'cosine':
        # Similaridad coseno
        dif_metric = (cosine_similarity(embedding_matrix, embedded_concept)) 
        reverse_sort = True
    
    else:
        raise Exception('Parámetro "metric" no válido. Intente nuevamente.')
    
    # Etiquetando posición
    pos_dif = [(i, delta) for i, delta in enumerate(dif_metric)]
    
    # Ordenando
    pos_dif.sort(key=lambda x: x[1], reverse=reverse_sort)
    
    # Obtener la salida
    if metric == 'euclidean':
        proximate_concepts = [(index2word[i[0]], np.sqrt(i[1])) 
                               for i in pos_dif[1:1+N_words]]
    
    elif metric == 'cosine':
        proximate_concepts = [(index2word[i[0]], i[1] * 100) 
                               for i in pos_dif[1:1+N_words]]

    return proximate_concepts


def find_concepts_by_radius(concept, embedding_matrix, index2word, radius):
    # Definición del vector de embedding
    embedded_concept = embeddings_index.get(concept)
    
    # Si no se encuentra el concepto, se termina la ejecución
    if embedded_concept is None: 
        print(f'Concept {concept} not found.')
        return

    # Diferencia cuadrática
    dif_metric = np.sqrt(np.sum((embedding_matrix - embedded_concept) ** 2, axis=1))
    
    # Etiquetando posición y filtrando solo las que cumplan con el criterio del radio
    pos_dif = [(i, delta) for i, delta in enumerate(dif_metric)
               if delta <= radius]
    
    # Ordenando
    pos_dif.sort(key=lambda x: x[1])

    # Obtener la salida
    proximate_concepts = [(index2word[i[0]], i[1]) for i in pos_dif]

    return proximate_concepts

In [58]:
# Definición de los conceptos objetivos a revisar en el embedding
objective_concepts = ['digestion', 'antioxidant', 'nutrition',  'vitamins', 
                      'nutrients', 'minerals', 'energetic', 'energy']

for concept in objective_concepts:
    result = find_Nprox_concepts(concept, embedding_matrix=embedded_main_names, 
                                 index2word=key_list, N_words=20, 
                                 metric='cosine')
    
#     result = find_concepts_by_radius(concept, embedding_matrix=embeddings_matrix, 
#                                      index2word=idx2word, radius=5)
    
    print(concept)
    display(result)

1050
digestion


[('insulin-sparing', 54.072123765945435),
 ('choline-sparing', 52.624547481536865),
 ('calcium-sparing', 52.448850870132446),
 ('collagen-sparing', 49.449992179870605),
 ('proteolytic', 49.178722500801086),
 ('heme-sparing', 49.05781447887421),
 ('trypsin enhancer', 46.5628981590271),
 ('dopamine-adenylate-cyclase inhibitor', 46.34324014186859),
 ('potassium-sparing', 46.31853401660919),
 ('angiotensin converting enzyme inhibitor', 46.15551233291626),
 ('thyroid-peroxidase inhibitor', 45.89885473251343),
 ('5-hydroxyeicosatetraenoic-acid inhibitor', 45.3142374753952),
 ('8-hydroxyeicosatetraenoic-acid inhibitor', 45.3142374753952),
 ('cytochrome-p450 inducer', 45.15664279460907),
 ('cytochrome-p488 inducer', 45.15664279460907),
 ('cytochrome-P21 inducer', 45.15664279460907),
 ('cytochrome-P450-1A inducer', 45.15664279460907),
 ('cytochrome-P450-2B inducer', 45.15664279460907),
 ('urinary antiseptic', 44.86902952194214),
 ('sodium-sparing', 44.7851836681366)]

1050
antioxidant


[('vitamin-A activity', 59.71183776855469),
 ('lipase inhibitor', 59.33906435966492),
 ('thyroid-peroxidase inhibitor', 59.17971730232239),
 ('TNF inhibitor', 58.8148832321167),
 ('aryl-hydrocarbon-hydroxylase inhibitor', 58.10437798500061),
 ('estrogen agonist', 57.83643126487732),
 ('arylamine-N-acetyltransferase inhibitor', 56.27970099449158),
 ('peroxidase inhibitor', 56.04261755943298),
 ('antitumor', 55.810028314590454),
 ('prostaglandin-E2 inhibitor', 55.529069900512695),
 ('prostaglandin inhibitor', 55.529069900512695),
 ('estrogenic', 55.29184341430664),
 ('platelet aggregation inhibitor', 55.205899477005005),
 ('protein kinase C inhibitor', 55.070918798446655),
 ('protein kinase D inhibitor', 55.070918798446655),
 ('protein kinase inhibitor', 55.070918798446655),
 ('activator-protein-1 inhibitor', 55.06500005722046),
 ('interleukin-6 inhibitor', 54.894739389419556),
 ('protein-tyrosine-kinase inhibitor', 54.82253432273865),
 ('platelet aggregation factor inhibitor', 54.546511

1050
nutrition


[('asthma preventive', 54.996609687805176),
 ('cancer preventive', 54.4893741607666),
 ('vitamin-A activity', 49.89945888519287),
 ('anti obesity', 46.63013517856598),
 ('digestive', 44.99504566192627),
 ('diagnostic', 43.874791264534),
 ('cold preventive', 43.81129741668701),
 ('essential', 43.39466691017151),
 ('anti reproductive', 42.708319425582886),
 ('anti metabolic', 40.96790850162506),
 ('anti glucose-tolerance-factor', 40.55205583572388),
 ('contraceptive', 39.57436382770538),
 ('pesticide', 38.7987494468689),
 ('fistula preventive', 37.2603714466095),
 ('hormone', 36.21225357055664),
 ('antibiotic', 35.92291176319122),
 ('hormonal', 34.808188676834106),
 ('anti infertility', 34.651872515678406),
 ('anti fertility', 34.42915380001068),
 ('anti AIDS', 34.23459529876709)]

1050
vitamins


[('calcium-sparing', 61.17306351661682),
 ('antioxidant', 58.32422971725464),
 ('5-hydroxyeicosatetraenoic-acid inhibitor', 55.44595718383789),
 ('8-hydroxyeicosatetraenoic-acid inhibitor', 55.44595718383789),
 ('potassium-sparing', 53.406333923339844),
 ('calcium antagonist', 53.03217172622681),
 ('sodium-sparing', 50.38707256317139),
 ('protein kinase C inhibitor', 50.38687586784363),
 ('protein kinase D inhibitor', 50.38687586784363),
 ('protein kinase inhibitor', 50.38687586784363),
 ('protease inhibitor', 49.08213019371033),
 ('estrogen receptor beta binder', 48.93285036087036),
 ('nitric-oxide inhibitor', 48.89238774776459),
 ('protein-tyrosine-kinase inhibitor', 48.55432212352753),
 ('hydrogen-peroxide inhibitor', 47.78055250644684),
 ('calcium channel blocker', 47.3043292760849),
 ('hormone', 47.08269238471985),
 ('mineral-corticoid', 46.808645129203796),
 ('nitric-oxide-synthase inhibitor', 46.80411517620087),
 ('activator-protein-1 inhibitor', 46.6132789850235)]

1050
nutrients


[('potassium-sparing', 55.94268441200256),
 ('vitamin-A activity', 52.7692973613739),
 ('sodium-sparing', 52.0516574382782),
 ('digestive', 51.123279333114624),
 ('calcium antagonist', 49.359700083732605),
 ('toxic', 48.60750138759613),
 ('nitric-oxide-genic', 48.24086129665375),
 ('mineral-corticoid', 47.80844748020172),
 ('hydrogen-peroxide inhibitor', 47.23955690860748),
 ('calcium channel blocker', 46.51098549365997),
 ('5-hydroxyeicosatetraenoic-acid inhibitor', 46.3395893573761),
 ('8-hydroxyeicosatetraenoic-acid inhibitor', 46.3395893573761),
 ('essential', 46.33876383304596),
 ('nitric-oxide scavenger', 45.601606369018555),
 ('anti C-reactive-protein', 45.15191614627838),
 ('nitric-oxide inhibitor', 44.987866282463074),
 ('choline-sparing', 44.97024118900299),
 ('antioxidant', 43.904879689216614),
 ('insulin-sparing', 43.584269285202026),
 ('mitogen-activated-protein-kinase inhibitor', 43.063175678253174)]

1050
minerals


[('copper chelator', 64.11359906196594),
 ('copper antagonist', 48.56516420841217),
 ('toxic', 42.69481003284454),
 ('metal chelator', 41.26313924789429),
 ('vitamin-A activity', 39.95558023452759),
 ('essential', 39.075592160224915),
 ('calcium-sparing', 38.076549768447876),
 ('nitric-oxide-genic', 37.851086258888245),
 ('hydrogen-peroxide inhibitor', 35.860249400138855),
 ('calcium antagonist', 35.249823331832886),
 ('nitric-oxide inhibitor', 35.0784033536911),
 ('nitric-oxide scavenger', 34.64736342430115),
 ('anti alkali', 33.56099724769592),
 ('aryl-hydrocarbon-hydroxylase inhibitor', 33.355653285980225),
 ('pigment', 32.944321632385254),
 ('advanced glycation end products inhibitor', 32.855841517448425),
 ('nitric-oxide-synthase inhibitor', 32.788315415382385),
 ('5-hydroxyeicosatetraenoic-acid inhibitor', 31.530770659446716),
 ('8-hydroxyeicosatetraenoic-acid inhibitor', 31.530770659446716),
 ('corrosive', 31.36352002620697)]

1050
energetic


[('anti neurotic', 45.398905873298645),
 ('anti flatulent', 41.1248654127121),
 ('anti anemic', 40.52497446537018),
 ('anti manic', 40.13121724128723),
 ('anti myopic', 39.95591104030609),
 ('hypnotic', 39.79558050632477),
 ('anti complementary', 39.374127984046936),
 ('anti C-reactive-protein', 38.94692659378052),
 ('anti hyperkinetic', 38.33085298538208),
 ('anti complement', 37.657591700553894),
 ('anti schizophrenic', 36.97980344295502),
 ('anti autistic', 36.81725263595581),
 ('anti dyspeptic', 36.45451068878174),
 ('cathartic', 36.02980673313141),
 ('anti apoplectic', 35.83267033100128),
 ('central nervous system active', 35.631194710731506),
 ('anti spasmodic', 35.071054100990295),
 ('anti testosterone', 34.92199778556824),
 ('anti shock', 34.55324470996857),
 ('anti asthmatic', 34.54141616821289)]

1050
energy


[('anti advanced glycation end products', 52.70897150039673),
 ('central nervous system active', 52.622777223587036),
 ('mineral-corticoid', 52.41909623146057),
 ('essential', 50.332945585250854),
 ('central nervous system sedative', 48.432111740112305),
 ('anti radiation', 47.29127585887909),
 ('anti X-radiation', 47.29127585887909),
 ('central nervous system inhibitor', 46.23767137527466),
 ('central nervous system paralytic', 45.88586688041687),
 ('advanced glycation end products inhibitor', 45.72388827800751),
 ('central nervous system stimulant', 45.366331934928894),
 ('anti glucose-tolerance-factor', 44.847172498703),
 ('central nervous system depressant', 44.558992981910706),
 ('vitamin-A activity', 43.76234710216522),
 ('anti metabolic', 43.13865602016449),
 ('nodulation signal', 42.86608695983887),
 ('anti spare-tire', 42.50875413417816),
 ('copper chelator', 42.35895276069641),
 ('anti aging', 42.104506492614746),
 ('anti hot-flash', 42.08964705467224)]

In [57]:
len(key_list)

1050