# Update cited-by in the database

In [None]:
from pathlib import Path
from tqdm import tqdm
from embedding_search.crossref import query_crossref
from embedding_search.vector_store import get_author

authors_path = Path("./authors/").glob("*.json")
last_processed = "0000-0002-5769-7094"


short_listed_authors = []
start = False
for author_file in list(authors_path):
    if author_file.stem == last_processed:
        start = True
    if start:
        short_listed_authors.append(author_file)

In [None]:
short_listed_authors

In [None]:
for author_file in tqdm(short_listed_authors):
    print(author_file)
    author = get_author(author_file.stem)

    for article in author.articles:
        _, cited_by = query_crossref(article.doi)
        if cited_by:
            article.cited_by = cited_by

    author.save(author_file)

In [1]:
from embedding_search.vector_store import MiniStore
import logging

logging.basicConfig(level=logging.INFO)
store = MiniStore()
store.build()

63it [00:04, 14.37it/s]


# Feature 1: Search related articles

In [None]:
articles = store.search("pandemic resilience", type="article")
[print(article.title) for article in articles]

In [None]:
# This search is based on the author's articles centroid, i.e., the average of all the articles' embeddings
# i.e., it is based on relevancy only, not weighted by the number of articles
authors = store.search("higgs boson", type="author")
[print(author) for author in authors]

In [None]:
# This is a weighted by articles related to the query
authors = store.weighted_search_author("Higgs field", n_pool=100)
[print(author) for author in authors]

# Network graph
The purpose of this network graph prototype is to visualize a author and their works.

In [None]:
from embedding_search.visualize import EmbeddingsProcessor, QueryPlotter

In [None]:
STORE = store  # cache this...


def plot(query: str):
    processor = EmbeddingsProcessor(STORE)
    plotter = QueryPlotter(processor)
    return plotter.plot(query)

In [None]:
query = "higgs boson"
p = plot(query)
p.save(f"plots/{query}.html")