In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# To ensure our src module can be found and imported
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import dotenv
import os
import requests
import pandas as pd
import xmltodict
import cohere
import numpy as np

from pathlib import Path

from src.data.arxiv_downloader import ArxivDownloader
from src.models.cohere import CohereModel
from src.utils import (
    compute_top_k,
)

In [3]:
PROJ_DIR = Path.cwd().parent
DOTENV_PATH = PROJ_DIR / '.env'
dotenv.load_dotenv(DOTENV_PATH)

True

In [30]:
# Abstracted Arxiv downloading logic into ArxivDownloader class that has simple caching functionality
downloader = ArxivDownloader(download_refresh_interval_days=1)

In [40]:
%%time
articles_df, is_from_cache = downloader.retrieve_arxiv_articles_df()

CPU times: user 17 µs, sys: 1e+03 ns, total: 18 µs
Wall time: 20 µs


In [41]:
# True if the articles_df was retrieved from cache, False otherwise. 
# This can be passed to CohereModel to return a cached version of document embeddings for fast response times
is_from_cache

True

In [33]:
articles_df

Unnamed: 0,link,updated_ts,published_ts,title,summary,author,category,combined_text
0,http://arxiv.org/abs/1709.06620v1,2017-09-19T19:26:20Z,2017-09-19T19:26:20Z,Learning of Coordination Policies for Robotic ...,"Inspired by biological swarms, robotic swarms ...","Qiyang Li, Xintong Du, Yizhou Huang, Quinlan S...",cs.RO,learning of coordination policies for robotic ...
1,http://arxiv.org/abs/2011.05605v2,2020-11-20T18:19:32Z,2020-11-11T07:35:21Z,Decentralized Motion Planning for Multi-Robot ...,This work presents a decentralized motion plan...,"Sivanathan Kandhasamy, Vinayagam Babu Kuppusam...",cs.RO,decentralized motion planning for multi-robot ...
2,http://arxiv.org/abs/2209.14745v2,2022-12-29T08:48:05Z,2022-09-29T13:02:58Z,A Multiagent Framework for the Asynchronous an...,The traditional ML development methodology doe...,Andrea Gesmundo,cs.LG,a multiagent framework for the asynchronous an...
3,http://arxiv.org/abs/2003.08376v3,2020-11-07T02:48:22Z,2020-03-18T17:54:28Z,Inverting the Pose Forecasting Pipeline with S...,Many autonomous systems forecast aspects of th...,"Xinshuo Weng, Jianren Wang, Sergey Levine, Kri...",cs.CV,inverting the pose forecasting pipeline with s...
4,http://arxiv.org/abs/2008.12760v1,2020-08-28T17:35:22Z,2020-08-28T17:35:22Z,AllenAct: A Framework for Embodied AI Research,"The domain of Embodied AI, in which agents lea...","Luca Weihs, Jordi Salvador, Klemen Kotar, Unna...",cs.CV,allenact: a framework for embodied ai research...
...,...,...,...,...,...,...,...,...
95,http://arxiv.org/abs/2109.15266v3,2022-05-31T12:56:48Z,2021-09-30T17:06:39Z,Modeling Interactions of Autonomous Vehicles a...,Reliable pedestrian crash avoidance mitigation...,"Raphael Trumpp, Harald Bayerlein, David Gesbert",cs.RO,modeling interactions of autonomous vehicles a...
96,http://arxiv.org/abs/2110.08229v1,2021-10-05T16:46:04Z,2021-10-05T16:46:04Z,Influencing Towards Stable Multi-Agent Interac...,Learning in multi-agent environments is diffic...,"Woodrow Z. Wang, Andy Shih, Annie Xie, Dorsa S...",cs.RO,influencing towards stable multi-agent interac...
97,http://arxiv.org/abs/2112.09012v1,2021-12-16T16:47:00Z,2021-12-16T16:47:00Z,Centralizing State-Values in Dueling Networks ...,We study the problem of multi-robot mapless na...,"Enrico Marchesini, Alessandro Farinelli",cs.MA,centralizing state-values in dueling networks ...
98,http://arxiv.org/abs/2201.08484v4,2022-06-24T20:24:35Z,2022-01-20T22:54:32Z,Iterated Reasoning with Mutual Information in ...,Information sharing is key in building team co...,"Sachin Konan, Esmaeil Seraj, Matthew Gombolay",cs.MA,iterated reasoning with mutual information in ...


## Get embeddings from Cohere API

In [15]:
articles_df['combined_text'].iloc[0]

'learning of coordination policies for robotic swarms. inspired by biological swarms, robotic swarms are envisioned to solve real-world problems that are difficult for individual agents. biological swarms can achieve collective intelligence based on local interactions and simple rules; however, designing effective distributed policies for large-scale robotic swarms to achieve a global objective can be challenging. although it is often possible to design an optimal centralized strategy for smaller numbers of agents, those methods can fail as the number of agents increases. motivated by the growing success of machine learning, we develop a deep learning approach that learns distributed coordination policies from centralized policies. in contrast to traditional distributed control approaches, which are usually based on human-designed policies for relatively simple tasks, this learning-based approach can be adapted to more difficult tasks. we demonstrate the efficacy of our proposed approa

In [38]:
# Abstracted Cohere API logic into CohereModel class
cohere_model = CohereModel()

In [49]:
%%time
query = "speech to text whisper wav2vec"

res_embeddings = cohere_model.get_embeddings(texts=[query]+list(articles_df['combined_text']), from_cache=is_from_cache)

CPU times: user 17.3 ms, sys: 1.91 ms, total: 19.2 ms
Wall time: 139 ms


In [50]:
query_embedding = res_embeddings[0]
article_embeddings = res_embeddings[1:]

top_k_indices, similarity_scores = compute_top_k(query_embedding, article_embeddings, k=20)
results_df = articles_df.iloc[top_k_indices]
results_df['similarity'] = similarity_scores

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  results_df['similarity'] = similarity_scores


In [51]:
results_df

Unnamed: 0,link,updated_ts,published_ts,title,summary,author,category,combined_text,similarity
26,http://arxiv.org/abs/2008.09622v3,2021-02-11T18:36:45Z,2020-08-21T18:00:33Z,Learning to Set Waypoints for Audio-Visual Nav...,"In audio-visual navigation, an agent intellige...","Changan Chen, Sagnik Majumder, Ziad Al-Halah, ...",cs.CV,learning to set waypoints for audio-visual nav...,0.364
42,http://arxiv.org/abs/2209.15007v2,2022-11-02T17:59:47Z,2022-09-29T17:59:55Z,Understanding Collapse in Non-Contrastive Siam...,Contrastive methods have led a recent surge in...,"Alexander C. Li, Alexei A. Efros, Deepak Pathak",cs.LG,understanding collapse in non-contrastive siam...,0.303
10,http://arxiv.org/abs/2103.14023v3,2021-10-07T05:04:39Z,2021-03-25T17:59:01Z,AgentFormer: Agent-Aware Transformers for Soci...,Predicting accurate future trajectories of mul...,"Ye Yuan, Xinshuo Weng, Yanglan Ou, Kris Kitani",cs.AI,agentformer: agent-aware transformers for soci...,0.292
33,http://arxiv.org/abs/2102.02886v3,2021-04-05T17:59:16Z,2021-02-04T20:58:37Z,Ivy: Templated Deep Learning for Inter-Framewo...,"We introduce Ivy, a templated Deep Learning (D...","Daniel Lenton, Fabio Pardo, Fabian Falck, Step...",cs.LG,ivy: templated deep learning for inter-framewo...,0.289
30,http://arxiv.org/abs/1602.00991v2,2016-03-08T22:09:05Z,2016-02-02T16:10:16Z,Deep Tracking: Seeing Beyond Seeing Using Recu...,This paper presents to the best of our knowled...,"Peter Ondruska, Ingmar Posner",cs.LG,deep tracking: seeing beyond seeing using recu...,0.273
29,http://arxiv.org/abs/1609.06666v2,2017-03-05T15:29:45Z,2016-09-21T18:32:11Z,Vote3Deep: Fast Object Detection in 3D Point C...,This paper proposes a computationally efficien...,"Martin Engelcke, Dushyant Rao, Dominic Zeng Wa...",cs.RO,vote3deep: fast object detection in 3d point c...,0.273
34,http://arxiv.org/abs/2112.03257v1,2021-12-06T18:59:52Z,2021-12-06T18:59:52Z,Functional Regularization for Reinforcement Le...,We propose a simple architecture for deep rein...,"Alexander C. Li, Deepak Pathak",cs.LG,functional regularization for reinforcement le...,0.271
94,http://arxiv.org/abs/2109.06514v1,2021-09-14T08:18:47Z,2021-09-14T08:18:47Z,Vision Transformer for Learning Driving Polici...,Driving in a complex urban environment is a di...,"Eshagh Kargar, Ville Kyrki",cs.LG,vision transformer for learning driving polici...,0.266
11,http://arxiv.org/abs/2104.00563v3,2022-02-11T04:59:43Z,2021-02-19T18:53:26Z,Latent Variable Sequential Set Transformers Fo...,Robust multi-agent trajectory prediction is es...,"Roger Girgis, Florian Golemo, Felipe Codevilla...",cs.RO,latent variable sequential set transformers fo...,0.265
63,http://arxiv.org/abs/2007.13729v1,2020-07-27T17:59:08Z,2020-07-27T17:59:08Z,Noisy Agents: Self-supervised Exploration by P...,Humans integrate multiple sensory modalities (...,"Chuang Gan, Xiaoyu Chen, Phillip Isola, Antoni...",cs.CV,noisy agents: self-supervised exploration by p...,0.264


In [22]:
results_df[['link', 'updated_ts', 'published_ts', 'title', 'summary', 'author', 'category', 'similarity']].to_dict(orient='records')

[{'link': 'http://arxiv.org/abs/2106.04283v1',
  'updated_ts': '2021-06-08T12:22:29Z',
  'published_ts': '2021-06-08T12:22:29Z',
  'title': 'NWT: Towards natural audio-to-video generation with representation\n  learning',
  'summary': "In this work we introduce NWT, an expressive speech-to-video model. Unlike approaches that use domain-specific intermediate representations such as pose keypoints, NWT learns its own latent representations, with minimal assumptions about the audio and video content. To this end, we propose a novel discrete variational autoencoder with adversarial loss, dVAE-Adv, which learns a new discrete latent representation we call Memcodes. Memcodes are straightforward to implement, require no additional loss terms, are stable to train compared with other approaches, and show evidence of interpretability. To predict on the Memcode space, we use an autoregressive encoder-decoder model conditioned on audio. Additionally, our model can control latent attributes in the 