In [5]:
%load_ext autoreload
%autoreload 2

In [6]:
# To ensure our src module can be found and imported
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import dotenv
import os
import requests
import pandas as pd
import xmltodict
import cohere
import numpy as np

from pathlib import Path

from src.data.arxiv_downloader import ArxivDownloader
from src.models.cohere import CohereModel
from src.utils import (
    compute_top_k,
)

In [7]:
PROJ_DIR = Path.cwd().parent
DOTENV_PATH = PROJ_DIR / '.env'
dotenv.load_dotenv(DOTENV_PATH)

True

In [8]:
# Abstracted Arxiv downloading logic into ArxivDownloader class that has simple caching functionality
downloader = ArxivDownloader(download_refresh_interval_days=1)

In [9]:
%%time
articles_df = downloader.retrieve_arxiv_articles_df()

CPU times: user 164 ms, sys: 11.2 ms, total: 176 ms
Wall time: 5.96 s


In [10]:
articles_df

Unnamed: 0,link,updated_ts,published_ts,title,summary,author,category,combined_text
0,http://arxiv.org/abs/1709.06620v1,2017-09-19T19:26:20Z,2017-09-19T19:26:20Z,Learning of Coordination Policies for Robotic ...,"Inspired by biological swarms, robotic swarms ...","Qiyang Li, Xintong Du, Yizhou Huang, Quinlan S...",cs.RO,learning of coordination policies for robotic ...
1,http://arxiv.org/abs/2011.05605v2,2020-11-20T18:19:32Z,2020-11-11T07:35:21Z,Decentralized Motion Planning for Multi-Robot ...,This work presents a decentralized motion plan...,"Sivanathan Kandhasamy, Vinayagam Babu Kuppusam...",cs.RO,decentralized motion planning for multi-robot ...
2,http://arxiv.org/abs/2209.14745v2,2022-12-29T08:48:05Z,2022-09-29T13:02:58Z,A Multiagent Framework for the Asynchronous an...,The traditional ML development methodology doe...,Andrea Gesmundo,cs.LG,a multiagent framework for the asynchronous an...
3,http://arxiv.org/abs/2003.08376v3,2020-11-07T02:48:22Z,2020-03-18T17:54:28Z,Inverting the Pose Forecasting Pipeline with S...,Many autonomous systems forecast aspects of th...,"Xinshuo Weng, Jianren Wang, Sergey Levine, Kri...",cs.CV,inverting the pose forecasting pipeline with s...
4,http://arxiv.org/abs/2008.12760v1,2020-08-28T17:35:22Z,2020-08-28T17:35:22Z,AllenAct: A Framework for Embodied AI Research,"The domain of Embodied AI, in which agents lea...","Luca Weihs, Jordi Salvador, Klemen Kotar, Unna...",cs.CV,allenact: a framework for embodied ai research...
...,...,...,...,...,...,...,...,...
995,http://arxiv.org/abs/1912.07521v3,2022-03-31T21:56:45Z,2019-12-12T03:48:40Z,Exploration and Coordination of Complementary ...,The hunter and gatherer approach copes with th...,"Mehdi Dadvar, Saeed Moazami, Harley R. Myler, ...",cs.MA,exploration and coordination of complementary ...
996,http://arxiv.org/abs/2001.05994v2,2020-10-07T20:41:11Z,2020-01-16T18:51:42Z,Adversarially Guided Self-Play for Adopting So...,Robotic agents must adopt existing social conv...,"Mycal Tucker, Yilun Zhou, Julie Shah",cs.AI,adversarially guided self-play for adopting so...
997,http://arxiv.org/abs/2002.06417v1,2020-02-15T17:36:00Z,2020-02-15T17:36:00Z,Designing Interaction for Multi-agent Cooperat...,Future intelligent system will involve very va...,"Chao Wang, Stephan Hasler, Manuel Muehlig, Fra...",cs.HC,designing interaction for multi-agent cooperat...
998,http://arxiv.org/abs/2004.03053v3,2022-11-14T03:27:14Z,2020-04-07T00:34:36Z,Scenario-Transferable Semantic Graph Reasoning...,Accurately predicting the possible behaviors o...,"Yeping Hu, Wei Zhan, Masayoshi Tomizuka",cs.RO,scenario-transferable semantic graph reasoning...


## Get embeddings from Cohere API

In [11]:
articles_df['combined_text'].iloc[0]

'learning of coordination policies for robotic swarms. inspired by biological swarms, robotic swarms are envisioned to solve real-world problems that are difficult for individual agents. biological swarms can achieve collective intelligence based on local interactions and simple rules; however, designing effective distributed policies for large-scale robotic swarms to achieve a global objective can be challenging. although it is often possible to design an optimal centralized strategy for smaller numbers of agents, those methods can fail as the number of agents increases. motivated by the growing success of machine learning, we develop a deep learning approach that learns distributed coordination policies from centralized policies. in contrast to traditional distributed control approaches, which are usually based on human-designed policies for relatively simple tasks, this learning-based approach can be adapted to more difficult tasks. we demonstrate the efficacy of our proposed approa

In [12]:
# Abstracted Cohere API logic into CohereModel class
cohere_model = CohereModel()

In [13]:
%%time
query = "speech to text whisper wav2vec"

res_embeddings = cohere_model.get_embeddings(texts=[query]+list(articles_df['combined_text']))

CPU times: user 532 ms, sys: 116 ms, total: 649 ms
Wall time: 2.31 s


In [14]:
query_embedding = res_embeddings[0]
article_embeddings = res_embeddings[1:]

top_k_indices = compute_top_k(query_embedding, article_embeddings, k=20)
articles_df.iloc[top_k_indices]

Unnamed: 0,link,updated_ts,published_ts,title,summary,author,category,combined_text
814,http://arxiv.org/abs/2106.04283v1,2021-06-08T12:22:29Z,2021-06-08T12:22:29Z,NWT: Towards natural audio-to-video generation...,"In this work we introduce NWT, an expressive s...","Rayhane Mama, Marc S. Tyndel, Hashiam Kadhim, ...",cs.SD,nwt: towards natural audio-to-video generation...
216,http://arxiv.org/abs/2106.09296v3,2022-01-14T16:43:19Z,2021-06-17T07:59:15Z,Voice2Series: Reprogramming Acoustic Models fo...,Learning to classify time series with limited ...,"Chao-Han Huck Yang, Yun-Yun Tsai, Pin-Yu Chen",cs.LG,voice2series: reprogramming acoustic models fo...
810,http://arxiv.org/abs/2202.08509v1,2022-02-17T08:26:25Z,2022-02-17T08:26:25Z,A Study of Designing Compact Audio-Visual Wake...,Audio-only-based wake word spotting (WWS) is c...,"Hengshun Zhou, Jun Du, Chao-Han Huck Yang, Shi...",cs.SD,a study of designing compact audio-visual wake...
212,http://arxiv.org/abs/1910.10942v2,2020-02-10T09:36:23Z,2019-10-24T06:54:36Z,A Recurrent Variational Autoencoder for Speech...,This paper presents a generative approach to s...,"Simon Leglaive, Xavier Alameda-Pineda, Laurent...",cs.LG,a recurrent variational autoencoder for speech...
817,http://arxiv.org/abs/2110.08791v1,2021-10-17T11:14:00Z,2021-10-17T11:14:00Z,Taming Visually Guided Sound Generation,Recent advances in visually-induced audio gene...,"Vladimir Iashin, Esa Rahtu",cs.CV,taming visually guided sound generation. recen...
223,http://arxiv.org/abs/2301.07851v1,2023-01-19T02:37:56Z,2023-01-19T02:37:56Z,From English to More Languages: Parameter-Effi...,"In this work, we propose a new parameter-effic...","Chao-Han Huck Yang, Bo Li, Yu Zhang, Nanxin Ch...",cs.SD,from english to more languages: parameter-effi...
219,http://arxiv.org/abs/2202.08532v1,2022-02-17T09:17:58Z,2022-02-17T09:17:58Z,Mitigating Closed-model Adversarial Examples w...,"In this work, we aim to enhance the system rob...","Chao-Han Huck Yang, Zeeshan Ahmed, Yile Gu, Jo...",eess.AS,mitigating closed-model adversarial examples w...
222,http://arxiv.org/abs/2211.01839v1,2022-11-03T14:20:32Z,2022-11-03T14:20:32Z,HyperSound: Generating Implicit Neural Represe...,Implicit neural representations (INRs) are a r...,"Filip Szatkowski, Karol J. Piczak, Przemysław ...",cs.SD,hypersound: generating implicit neural represe...
429,http://arxiv.org/abs/2206.02211v3,2022-12-04T08:24:02Z,2022-06-05T16:18:27Z,Variable-rate hierarchical CPC leads to acoust...,The success of deep learning comes from its ab...,"Santiago Cuervo, Adrian Łańcucki, Ricard Marxe...",cs.SD,variable-rate hierarchical cpc leads to acoust...
412,http://arxiv.org/abs/2206.00393v1,2022-06-01T11:00:07Z,2022-06-01T11:00:07Z,Towards Generalisable Audio Representations fo...,"In audio-visual navigation (AVN), an intellige...","Shunqi Mao, Chaoyi Zhang, Heng Wang, Weidong Cai",cs.SD,towards generalisable audio representations fo...


In [22]:
articles_df.iloc[top_k_indices][['link', 'updated_ts', 'published_ts', 'title', 'summary', 'author', 'category']].to_dict(orient='records')

[{'link': 'http://arxiv.org/abs/2106.04283v1',
  'updated_ts': '2021-06-08T12:22:29Z',
  'published_ts': '2021-06-08T12:22:29Z',
  'title': 'NWT: Towards natural audio-to-video generation with representation\n  learning',
  'summary': "In this work we introduce NWT, an expressive speech-to-video model. Unlike\napproaches that use domain-specific intermediate representations such as pose\nkeypoints, NWT learns its own latent representations, with minimal assumptions\nabout the audio and video content. To this end, we propose a novel discrete\nvariational autoencoder with adversarial loss, dVAE-Adv, which learns a new\ndiscrete latent representation we call Memcodes. Memcodes are straightforward\nto implement, require no additional loss terms, are stable to train compared\nwith other approaches, and show evidence of interpretability. To predict on the\nMemcode space, we use an autoregressive encoder-decoder model conditioned on\naudio. Additionally, our model can control latent attribute