# Spector API

In [None]:

from typing import Dict, List
import json
import numpy as np

import requests


URL = "https://model-apis.semanticscholar.org/specter/v1/invoke"
MAX_BATCH_SIZE = 16


def chunks(lst, chunk_size=MAX_BATCH_SIZE):
    """Splits a longer list to respect batch size"""
    for i in range(0, len(lst), chunk_size):
        yield lst[i : i + chunk_size]


SAMPLE_PAPERS = [
    {
        "paper_id": "A",
        "title": "Angiotensin-converting enzyme 2 is a functional receptor for the SARS coronavirus",
        "abstract": "Spike (S) proteins of coronaviruses ...",
    },
    {
        "paper_id": "B",
        "title": "Hospital outbreak of Middle East respiratory syndrome coronavirus",
        "abstract": "Between April 1 and May 23, 2013, a total of 23 cases of MERS-CoV ...",
    },
]


def embed(papers):
    embeddings_by_paper_id: Dict[str, List[float]] = {}

    for chunk in chunks(papers):
        # Allow Python requests to convert the data above to JSON
        response = requests.post(URL, json=chunk)

        if response.status_code != 200:
            raise RuntimeError("Sorry, something went wrong, please try later!")

        for paper in response.json()["preds"]:
            embeddings_by_paper_id[paper["paper_id"]] = paper["embedding"]

    return embeddings_by_paper_id


In [1]:
%load_ext autoreload
%autoreload 2
from cord.vectors import get_embeddings, papers_similar_to

D:\Projects\cord19\data\cord-cache\SpectorSimilarity.ann True


In [14]:
title = """
WHO has developed this interim guidance to meet the need
for recommendations on safe home care for patients with
suspected COVID-19 who present with mild symptoms
a and on public health measures related to the management of their
contacts.
"""
abstract = """
WHO has developed this interim guidance to meet the need
for recommendations on safe home care for patients with
suspected COVID-19 who present with mild symptoms
a
and
on public health measures related to the management of their
contacts.
"""
get_embeddings(title,None)

[0.1810917854309082,
 -5.966172695159912,
 -0.8498125672340393,
 3.4571444988250732,
 -2.1879444122314453,
 -0.9650493264198303,
 -5.966675758361816,
 3.884913444519043,
 1.7972986698150635,
 0.43913620710372925,
 -0.7171406745910645,
 -1.0080366134643555,
 -2.1366326808929443,
 -0.45451119542121887,
 0.7047880291938782,
 -2.8447251319885254,
 -4.737700462341309,
 1.9914917945861816,
 2.0576870441436768,
 -1.5358411073684692,
 1.692751407623291,
 3.5170416831970215,
 4.101092338562012,
 2.9593515396118164,
 0.1448926031589508,
 -2.315516233444214,
 2.444979190826416,
 -0.09897439181804657,
 1.6457053422927856,
 1.4612030982971191,
 -6.6194610595703125,
 -0.5585726499557495,
 0.4118262529373169,
 0.5100516676902771,
 1.1775476932525635,
 -1.9656606912612915,
 1.96274733543396,
 0.4392041563987732,
 -3.0268056392669678,
 0.4562227129936218,
 2.320300817489624,
 -1.0061898231506348,
 2.01444149017334,
 1.4776651859283447,
 4.570910930633545,
 3.736983060836792,
 -1.1181901693344116,
 3.31

In [11]:
embeddings = load_specter_embeddings()

# Check vector similarity

In [18]:
mother_to_child = get_embeddings('Mother to child transmission')
mother_to_son = get_embeddings('Mother to son transmission')

In [2]:
papers_similar_to('Mother to child transmission')

['lkyvok5t',
 'bv7udg9k',
 'w6lkzth1',
 'b39672xw',
 'ix6r2nij',
 'q7gosbvn',
 'mhn3ror0',
 'dlsduvbr',
 '7a6xau9l',
 'uqbry4r8']

In [26]:
cosine_similarity(np.array(mother_to_child).reshape(1, -1), np.array(mother_to_son).reshape(1, -1))

array([[0.92047202]])

In [25]:
np.array(mother_to_son)

array([ 1.26422548e+00, -2.25139284e+00,  3.08235717e+00,  1.19616842e+00,
        2.98021674e-01, -2.16234994e+00,  7.23419189e-01, -1.89618862e+00,
        2.76547694e+00, -3.18535161e+00,  1.10630178e+00,  1.81092310e+00,
       -7.16883421e-01, -7.58839697e-02,  2.77980471e+00,  7.44045973e-01,
       -1.43979359e+00,  9.39333439e-01, -2.47916055e+00,  4.25459921e-01,
       -1.82960737e+00, -1.21104920e+00, -7.91671634e-01,  5.37474990e-01,
        5.77619362e+00, -2.03433573e-01,  1.14582002e+00, -5.87071896e-01,
        1.42275012e+00, -1.19842291e-01, -1.76777339e+00,  2.97579944e-01,
       -1.25426531e+00, -1.58306003e+00,  2.77100062e+00, -2.71885228e+00,
       -1.96867466e+00,  6.30351782e-01, -1.29415357e+00,  2.15326881e+00,
       -4.60218668e+00, -1.82290804e+00, -3.39613199e+00, -2.53870273e+00,
       -2.28756219e-02,  3.21628809e+00, -3.96986437e+00,  1.37358916e+00,
        6.74912453e-01, -1.04426527e+00, -9.15506333e-02,  1.10488069e+00,
       -1.89492130e+00, -