In [148]:
import dotenv
import os
import requests
import pandas as pd
import xmltodict
import cohere
import numpy as np

from pathlib import Path

In [99]:
PROJ_DIR = Path.cwd().parent
DOTENV_PATH = PROJ_DIR / '.env'
dotenv.load_dotenv(DOTENV_PATH)

True

In [189]:
def retrieve_arxiv_articles_df(num_articles=10):
    url = f"http://export.arxiv.org/api/query?search_query=all:electron&start=0&max_results={num_articles}"

    payload={}
    headers = {}

    response = requests.request("GET", url, headers=headers, data=payload)
    parsed_xml = xmltodict.parse(response.text)
    
    articles_dict = {'link': [],
                     'updated_ts': [],
                     'published_ts': [],
                     'title': [],
                     'summary': [],
                     'author': [],
                    }
    
    for article in parsed_xml['feed']['entry']:
        articles_dict['link'].append(article['id'])
        articles_dict['updated_ts'].append(article['updated'])
        articles_dict['published_ts'].append(article['published'])
        articles_dict['title'].append(article['title'])
        articles_dict['summary'].append(article['summary'])
        articles_dict['author'].append(", ".join([author['name'] for author in article['author']]) if isinstance(article['author'], list) else article['author']['name'])

    articles_df = pd.DataFrame.from_dict(articles_dict)
    
    return articles_df

In [191]:
%%time
articles_df = retrieve_arxiv_articles_df(num_articles=1000)

CPU times: user 133 ms, sys: 10.2 ms, total: 143 ms
Wall time: 6.5 s


In [192]:
articles_df

Unnamed: 0,link,updated_ts,published_ts,title,summary,author
0,http://arxiv.org/abs/cond-mat/0102536v1,2001-02-28T20:12:09Z,2001-02-28T20:12:09Z,Impact of Electron-Electron Cusp on Configurat...,The effect of the electron-electron cusp on th...,"David Prendergast, M. Nolan, Claudia Filippi, ..."
1,http://arxiv.org/abs/astro-ph/0608371v1,2006-08-17T14:05:46Z,2006-08-17T14:05:46Z,Electron thermal conductivity owing to collisi...,We calculate the thermal conductivity of elect...,"P. S. Shternin, D. G. Yakovlev"
2,http://arxiv.org/abs/1802.06593v1,2018-02-19T11:51:42Z,2018-02-19T11:51:42Z,Electron pairing: from metastable electron pai...,Starting from the shell structure in atoms and...,"Guo-Qiang Hai, Ladir Cândido, Braulio G. A. Br..."
3,http://arxiv.org/abs/2010.01066v1,2020-10-02T15:46:56Z,2020-10-02T15:46:56Z,Electron Temperature Anisotropy and Electron B...,Electron temperature anisotropies and electron...,"Heyu Sun, Jinsong Zhao, Wen Liu, Huasheng Xie,..."
4,http://arxiv.org/abs/1501.04914v1,2015-01-20T18:48:22Z,2015-01-20T18:48:22Z,Hamiltonian of a many-electron system with sin...,Based on the metastable electron-pair energy b...,"G. -Q. Hai, F. M. Peeters"
...,...,...,...,...,...,...
995,http://arxiv.org/abs/1001.3371v2,2013-01-18T19:46:59Z,2010-01-19T17:40:39Z,Theory of the Anomalous Magnetic Moment of the...,It is shown that it follows from our model of ...,E. L. Koschmieder
996,http://arxiv.org/abs/1002.3930v1,2010-02-20T18:43:33Z,2010-02-20T18:43:33Z,Transport properties of a molecule embedded in...,We theoretically investigate the transport pro...,"Jong Soo Lim, Rosa Lopez, Gloria Platero, Pasc..."
997,http://arxiv.org/abs/1003.0583v1,2010-03-02T13:22:57Z,2010-03-02T13:22:57Z,Hyperfine Effects in Ionic Orbital Electron Ca...,The K-orbital electron capture in ions with on...,M. A. Goñi
998,http://arxiv.org/abs/1003.3294v1,2010-03-17T05:24:22Z,2010-03-17T05:24:22Z,Prominent 5d-orbital contribution to the condu...,We have examined the valence-band electronic s...,"A. Sekiyama, J. Yamaguchi, A. Higashiya, M. Ob..."


## Get embeddings from Cohere API

In [193]:
# Paste your API key here. Remember to not share publicly
cohere_api_key = os.environ.get('COHERE_API_KEY', None)

# Create and retrieve a Cohere API key from dashboard.cohere.ai/welcome/register
cohere_client = cohere.Client(cohere_api_key)

In [194]:
articles_df['combined_text'] = (articles_df['title'] + '. ' + articles_df['summary']).str.replace('\n', ' ').str.lower()

In [195]:
articles_df['combined_text'].iloc[0]

'impact of electron-electron cusp on configuration interaction energies. the effect of the electron-electron cusp on the convergence of configuration interaction (ci) wave functions is examined. by analogy with the pseudopotential approach for electron-ion interactions, an effective electron-electron interaction is developed which closely reproduces the scattering of the coulomb interaction but is smooth and finite at zero electron-electron separation. the exact many-electron wave function for this smooth effective interaction has no cusp at zero electron-electron separation. we perform ci and quantum monte carlo calculations for he and be atoms, both with the coulomb electron-electron interaction and with the smooth effective electron-electron interaction. we find that convergence of the ci expansion of the wave function for the smooth electron-electron interaction is not significantly improved compared with that for the divergent coulomb interaction for energy differences on the orde

In [196]:
articles_df

Unnamed: 0,link,updated_ts,published_ts,title,summary,author,combined_text
0,http://arxiv.org/abs/cond-mat/0102536v1,2001-02-28T20:12:09Z,2001-02-28T20:12:09Z,Impact of Electron-Electron Cusp on Configurat...,The effect of the electron-electron cusp on th...,"David Prendergast, M. Nolan, Claudia Filippi, ...",impact of electron-electron cusp on configurat...
1,http://arxiv.org/abs/astro-ph/0608371v1,2006-08-17T14:05:46Z,2006-08-17T14:05:46Z,Electron thermal conductivity owing to collisi...,We calculate the thermal conductivity of elect...,"P. S. Shternin, D. G. Yakovlev",electron thermal conductivity owing to collisi...
2,http://arxiv.org/abs/1802.06593v1,2018-02-19T11:51:42Z,2018-02-19T11:51:42Z,Electron pairing: from metastable electron pai...,Starting from the shell structure in atoms and...,"Guo-Qiang Hai, Ladir Cândido, Braulio G. A. Br...",electron pairing: from metastable electron pai...
3,http://arxiv.org/abs/2010.01066v1,2020-10-02T15:46:56Z,2020-10-02T15:46:56Z,Electron Temperature Anisotropy and Electron B...,Electron temperature anisotropies and electron...,"Heyu Sun, Jinsong Zhao, Wen Liu, Huasheng Xie,...",electron temperature anisotropy and electron b...
4,http://arxiv.org/abs/1501.04914v1,2015-01-20T18:48:22Z,2015-01-20T18:48:22Z,Hamiltonian of a many-electron system with sin...,Based on the metastable electron-pair energy b...,"G. -Q. Hai, F. M. Peeters",hamiltonian of a many-electron system with sin...
...,...,...,...,...,...,...,...
995,http://arxiv.org/abs/1001.3371v2,2013-01-18T19:46:59Z,2010-01-19T17:40:39Z,Theory of the Anomalous Magnetic Moment of the...,It is shown that it follows from our model of ...,E. L. Koschmieder,theory of the anomalous magnetic moment of the...
996,http://arxiv.org/abs/1002.3930v1,2010-02-20T18:43:33Z,2010-02-20T18:43:33Z,Transport properties of a molecule embedded in...,We theoretically investigate the transport pro...,"Jong Soo Lim, Rosa Lopez, Gloria Platero, Pasc...",transport properties of a molecule embedded in...
997,http://arxiv.org/abs/1003.0583v1,2010-03-02T13:22:57Z,2010-03-02T13:22:57Z,Hyperfine Effects in Ionic Orbital Electron Ca...,The K-orbital electron capture in ions with on...,M. A. Goñi,hyperfine effects in ionic orbital electron ca...
998,http://arxiv.org/abs/1003.3294v1,2010-03-17T05:24:22Z,2010-03-17T05:24:22Z,Prominent 5d-orbital contribution to the condu...,We have examined the valence-band electronic s...,"A. Sekiyama, J. Yamaguchi, A. Higashiya, M. Ob...",prominent 5d-orbital contribution to the condu...


In [199]:
%%time

article_embeddings = cohere_client.embed(texts=list(articles_df['combined_text']),
                                          model='small',
                                          truncate='LEFT').embeddings

CPU times: user 546 ms, sys: 125 ms, total: 671 ms
Wall time: 2.74 s


In [200]:
np.array(article_embeddings).shape

(1000, 1024)